Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pycparser/ply/yacc.py: 18%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1916 statements  

1# ----------------------------------------------------------------------------- 

2# ply: yacc.py 

3# 

4# Copyright (C) 2001-2017 

5# David M. Beazley (Dabeaz LLC) 

6# All rights reserved. 

7# 

8# Redistribution and use in source and binary forms, with or without 

9# modification, are permitted provided that the following conditions are 

10# met: 

11# 

12# * Redistributions of source code must retain the above copyright notice, 

13# this list of conditions and the following disclaimer. 

14# * Redistributions in binary form must reproduce the above copyright notice, 

15# this list of conditions and the following disclaimer in the documentation 

16# and/or other materials provided with the distribution. 

17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 

18# endorse or promote products derived from this software without 

19# specific prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32# ----------------------------------------------------------------------------- 

33# 

34# This implements an LR parser that is constructed from grammar rules defined 

35# as Python functions. The grammer is specified by supplying the BNF inside 

36# Python documentation strings. The inspiration for this technique was borrowed 

37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 

38# Spark and the GNU bison utility. 

39# 

40# The current implementation is only somewhat object-oriented. The 

41# LR parser itself is defined in terms of an object (which allows multiple 

42# parsers to co-exist). However, most of the variables used during table 

43# construction are defined in terms of global variables. Users shouldn't 

44# notice unless they are trying to define multiple parsers at the same 

45# time using threads (in which case they should have their head examined). 

46# 

47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 

48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 

49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 

50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 

51# by the more efficient DeRemer and Pennello algorithm. 

52# 

53# :::::::: WARNING ::::::: 

54# 

55# Construction of LR parsing tables is fairly complicated and expensive. 

56# To make this module run fast, a *LOT* of work has been put into 

57# optimization---often at the expensive of readability and what might 

58# consider to be good Python "coding style." Modify the code at your 

59# own risk! 

60# ---------------------------------------------------------------------------- 

61 

62import re 

63import types 

64import sys 

65import os.path 

66import inspect 

67import base64 

68import warnings 

69 

70__version__ = '3.10' 

71__tabversion__ = '3.10' 

72 

73#----------------------------------------------------------------------------- 

74# === User configurable parameters === 

75# 

76# Change these to modify the default behavior of yacc (if you wish) 

77#----------------------------------------------------------------------------- 

78 

79yaccdebug = True # Debugging mode. If set, yacc generates a 

80 # a 'parser.out' file in the current directory 

81 

82debug_file = 'parser.out' # Default name of the debugging file 

83tab_module = 'parsetab' # Default name of the table module 

84default_lr = 'LALR' # Default LR table generation method 

85 

86error_count = 3 # Number of symbols that must be shifted to leave recovery mode 

87 

88yaccdevel = False # Set to True if developing yacc. This turns off optimized 

89 # implementations of certain functions. 

90 

91resultlimit = 40 # Size limit of results when running in debug mode. 

92 

93pickle_protocol = 0 # Protocol to use when writing pickle files 

94 

95# String type-checking compatibility 

96if sys.version_info[0] < 3: 

97 string_types = basestring 

98else: 

99 string_types = str 

100 

101MAXINT = sys.maxsize 

102 

103# This object is a stand-in for a logging object created by the 

104# logging module. PLY will use this by default to create things 

105# such as the parser.out file. If a user wants more detailed 

106# information, they can create their own logging object and pass 

107# it into PLY. 

108 

109class PlyLogger(object): 

110 def __init__(self, f): 

111 self.f = f 

112 

113 def debug(self, msg, *args, **kwargs): 

114 self.f.write((msg % args) + '\n') 

115 

116 info = debug 

117 

118 def warning(self, msg, *args, **kwargs): 

119 self.f.write('WARNING: ' + (msg % args) + '\n') 

120 

121 def error(self, msg, *args, **kwargs): 

122 self.f.write('ERROR: ' + (msg % args) + '\n') 

123 

124 critical = debug 

125 

126# Null logger is used when no output is generated. Does nothing. 

127class NullLogger(object): 

128 def __getattribute__(self, name): 

129 return self 

130 

131 def __call__(self, *args, **kwargs): 

132 return self 

133 

134# Exception raised for yacc-related errors 

135class YaccError(Exception): 

136 pass 

137 

138# Format the result message that the parser produces when running in debug mode. 

139def format_result(r): 

140 repr_str = repr(r) 

141 if '\n' in repr_str: 

142 repr_str = repr(repr_str) 

143 if len(repr_str) > resultlimit: 

144 repr_str = repr_str[:resultlimit] + ' ...' 

145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 

146 return result 

147 

148# Format stack entries when the parser is running in debug mode 

149def format_stack_entry(r): 

150 repr_str = repr(r) 

151 if '\n' in repr_str: 

152 repr_str = repr(repr_str) 

153 if len(repr_str) < 16: 

154 return repr_str 

155 else: 

156 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 

157 

158# Panic mode error recovery support. This feature is being reworked--much of the 

159# code here is to offer a deprecation/backwards compatible transition 

160 

161_errok = None 

162_token = None 

163_restart = None 

164_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). 

165Instead, invoke the methods on the associated parser instance: 

166 

167 def p_error(p): 

168 ... 

169 # Use parser.errok(), parser.token(), parser.restart() 

170 ... 

171 

172 parser = yacc.yacc() 

173''' 

174 

175def errok(): 

176 warnings.warn(_warnmsg) 

177 return _errok() 

178 

179def restart(): 

180 warnings.warn(_warnmsg) 

181 return _restart() 

182 

183def token(): 

184 warnings.warn(_warnmsg) 

185 return _token() 

186 

187# Utility function to call the p_error() function with some deprecation hacks 

188def call_errorfunc(errorfunc, token, parser): 

189 global _errok, _token, _restart 

190 _errok = parser.errok 

191 _token = parser.token 

192 _restart = parser.restart 

193 r = errorfunc(token) 

194 try: 

195 del _errok, _token, _restart 

196 except NameError: 

197 pass 

198 return r 

199 

200#----------------------------------------------------------------------------- 

201# === LR Parsing Engine === 

202# 

203# The following classes are used for the LR parser itself. These are not 

204# used during table construction and are independent of the actual LR 

205# table generation algorithm 

206#----------------------------------------------------------------------------- 

207 

208# This class is used to hold non-terminal grammar symbols during parsing. 

209# It normally has the following attributes set: 

210# .type = Grammar symbol type 

211# .value = Symbol value 

212# .lineno = Starting line number 

213# .endlineno = Ending line number (optional, set automatically) 

214# .lexpos = Starting lex position 

215# .endlexpos = Ending lex position (optional, set automatically) 

216 

217class YaccSymbol: 

218 def __str__(self): 

219 return self.type 

220 

221 def __repr__(self): 

222 return str(self) 

223 

224# This class is a wrapper around the objects actually passed to each 

225# grammar rule. Index lookup and assignment actually assign the 

226# .value attribute of the underlying YaccSymbol object. 

227# The lineno() method returns the line number of a given 

228# item (or 0 if not defined). The linespan() method returns 

229# a tuple of (startline,endline) representing the range of lines 

230# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 

231# representing the range of positional information for a symbol. 

232 

233class YaccProduction: 

234 def __init__(self, s, stack=None): 

235 self.slice = s 

236 self.stack = stack 

237 self.lexer = None 

238 self.parser = None 

239 

240 def __getitem__(self, n): 

241 if isinstance(n, slice): 

242 return [s.value for s in self.slice[n]] 

243 elif n >= 0: 

244 return self.slice[n].value 

245 else: 

246 return self.stack[n].value 

247 

248 def __setitem__(self, n, v): 

249 self.slice[n].value = v 

250 

251 def __getslice__(self, i, j): 

252 return [s.value for s in self.slice[i:j]] 

253 

254 def __len__(self): 

255 return len(self.slice) 

256 

257 def lineno(self, n): 

258 return getattr(self.slice[n], 'lineno', 0) 

259 

260 def set_lineno(self, n, lineno): 

261 self.slice[n].lineno = lineno 

262 

263 def linespan(self, n): 

264 startline = getattr(self.slice[n], 'lineno', 0) 

265 endline = getattr(self.slice[n], 'endlineno', startline) 

266 return startline, endline 

267 

268 def lexpos(self, n): 

269 return getattr(self.slice[n], 'lexpos', 0) 

270 

271 def lexspan(self, n): 

272 startpos = getattr(self.slice[n], 'lexpos', 0) 

273 endpos = getattr(self.slice[n], 'endlexpos', startpos) 

274 return startpos, endpos 

275 

276 def error(self): 

277 raise SyntaxError 

278 

279# ----------------------------------------------------------------------------- 

280# == LRParser == 

281# 

282# The LR Parsing engine. 

283# ----------------------------------------------------------------------------- 

284 

285class LRParser: 

286 def __init__(self, lrtab, errorf): 

287 self.productions = lrtab.lr_productions 

288 self.action = lrtab.lr_action 

289 self.goto = lrtab.lr_goto 

290 self.errorfunc = errorf 

291 self.set_defaulted_states() 

292 self.errorok = True 

293 

294 def errok(self): 

295 self.errorok = True 

296 

297 def restart(self): 

298 del self.statestack[:] 

299 del self.symstack[:] 

300 sym = YaccSymbol() 

301 sym.type = '$end' 

302 self.symstack.append(sym) 

303 self.statestack.append(0) 

304 

305 # Defaulted state support. 

306 # This method identifies parser states where there is only one possible reduction action. 

307 # For such states, the parser can make a choose to make a rule reduction without consuming 

308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 

309 # certain kinds of advanced parsing situations where the lexer and parser interact with 

310 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 

311 # 

312 # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 

313 def set_defaulted_states(self): 

314 self.defaulted_states = {} 

315 for state, actions in self.action.items(): 

316 rules = list(actions.values()) 

317 if len(rules) == 1 and rules[0] < 0: 

318 self.defaulted_states[state] = rules[0] 

319 

320 def disable_defaulted_states(self): 

321 self.defaulted_states = {} 

322 

323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

324 if debug or yaccdevel: 

325 if isinstance(debug, int): 

326 debug = PlyLogger(sys.stderr) 

327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 

328 elif tracking: 

329 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 

330 else: 

331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 

332 

333 

334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

335 # parsedebug(). 

336 # 

337 # This is the debugging enabled version of parse(). All changes made to the 

338 # parsing engine should be made here. Optimized versions of this function 

339 # are automatically created by the ply/ygen.py script. This script cuts out 

340 # sections enclosed in markers such as this: 

341 # 

342 # #--! DEBUG 

343 # statements 

344 # #--! DEBUG 

345 # 

346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

347 

348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

349 #--! parsedebug-start 

350 lookahead = None # Current lookahead symbol 

351 lookaheadstack = [] # Stack of lookahead symbols 

352 actions = self.action # Local reference to action table (to avoid lookup on self.) 

353 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

354 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

355 defaulted_states = self.defaulted_states # Local reference to defaulted states 

356 pslice = YaccProduction(None) # Production object passed to grammar rules 

357 errorcount = 0 # Used during error recovery 

358 

359 #--! DEBUG 

360 debug.info('PLY: PARSE DEBUG START') 

361 #--! DEBUG 

362 

363 # If no lexer was given, we will try to use the lex module 

364 if not lexer: 

365 from . import lex 

366 lexer = lex.lexer 

367 

368 # Set up the lexer and parser objects on pslice 

369 pslice.lexer = lexer 

370 pslice.parser = self 

371 

372 # If input was supplied, pass to lexer 

373 if input is not None: 

374 lexer.input(input) 

375 

376 if tokenfunc is None: 

377 # Tokenize function 

378 get_token = lexer.token 

379 else: 

380 get_token = tokenfunc 

381 

382 # Set the parser() token method (sometimes used in error recovery) 

383 self.token = get_token 

384 

385 # Set up the state and symbol stacks 

386 

387 statestack = [] # Stack of parsing states 

388 self.statestack = statestack 

389 symstack = [] # Stack of grammar symbols 

390 self.symstack = symstack 

391 

392 pslice.stack = symstack # Put in the production 

393 errtoken = None # Err token 

394 

395 # The start state is assumed to be (0,$end) 

396 

397 statestack.append(0) 

398 sym = YaccSymbol() 

399 sym.type = '$end' 

400 symstack.append(sym) 

401 state = 0 

402 while True: 

403 # Get the next symbol on the input. If a lookahead symbol 

404 # is already set, we just use that. Otherwise, we'll pull 

405 # the next token off of the lookaheadstack or from the lexer 

406 

407 #--! DEBUG 

408 debug.debug('') 

409 debug.debug('State : %s', state) 

410 #--! DEBUG 

411 

412 if state not in defaulted_states: 

413 if not lookahead: 

414 if not lookaheadstack: 

415 lookahead = get_token() # Get the next token 

416 else: 

417 lookahead = lookaheadstack.pop() 

418 if not lookahead: 

419 lookahead = YaccSymbol() 

420 lookahead.type = '$end' 

421 

422 # Check the action table 

423 ltype = lookahead.type 

424 t = actions[state].get(ltype) 

425 else: 

426 t = defaulted_states[state] 

427 #--! DEBUG 

428 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 

429 #--! DEBUG 

430 

431 #--! DEBUG 

432 debug.debug('Stack : %s', 

433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

434 #--! DEBUG 

435 

436 if t is not None: 

437 if t > 0: 

438 # shift a symbol on the stack 

439 statestack.append(t) 

440 state = t 

441 

442 #--! DEBUG 

443 debug.debug('Action : Shift and goto state %s', t) 

444 #--! DEBUG 

445 

446 symstack.append(lookahead) 

447 lookahead = None 

448 

449 # Decrease error count on successful shift 

450 if errorcount: 

451 errorcount -= 1 

452 continue 

453 

454 if t < 0: 

455 # reduce a symbol on the stack, emit a production 

456 p = prod[-t] 

457 pname = p.name 

458 plen = p.len 

459 

460 # Get production function 

461 sym = YaccSymbol() 

462 sym.type = pname # Production name 

463 sym.value = None 

464 

465 #--! DEBUG 

466 if plen: 

467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 

468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 

469 goto[statestack[-1-plen]][pname]) 

470 else: 

471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 

472 goto[statestack[-1]][pname]) 

473 

474 #--! DEBUG 

475 

476 if plen: 

477 targ = symstack[-plen-1:] 

478 targ[0] = sym 

479 

480 #--! TRACKING 

481 if tracking: 

482 t1 = targ[1] 

483 sym.lineno = t1.lineno 

484 sym.lexpos = t1.lexpos 

485 t1 = targ[-1] 

486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 

487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 

488 #--! TRACKING 

489 

490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

491 # The code enclosed in this section is duplicated 

492 # below as a performance optimization. Make sure 

493 # changes get made in both locations. 

494 

495 pslice.slice = targ 

496 

497 try: 

498 # Call the grammar rule with our special slice object 

499 del symstack[-plen:] 

500 self.state = state 

501 p.callable(pslice) 

502 del statestack[-plen:] 

503 #--! DEBUG 

504 debug.info('Result : %s', format_result(pslice[0])) 

505 #--! DEBUG 

506 symstack.append(sym) 

507 state = goto[statestack[-1]][pname] 

508 statestack.append(state) 

509 except SyntaxError: 

510 # If an error was set. Enter error recovery state 

511 lookaheadstack.append(lookahead) # Save the current lookahead token 

512 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

513 statestack.pop() # Pop back one state (before the reduce) 

514 state = statestack[-1] 

515 sym.type = 'error' 

516 sym.value = 'error' 

517 lookahead = sym 

518 errorcount = error_count 

519 self.errorok = False 

520 

521 continue 

522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

523 

524 else: 

525 

526 #--! TRACKING 

527 if tracking: 

528 sym.lineno = lexer.lineno 

529 sym.lexpos = lexer.lexpos 

530 #--! TRACKING 

531 

532 targ = [sym] 

533 

534 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

535 # The code enclosed in this section is duplicated 

536 # above as a performance optimization. Make sure 

537 # changes get made in both locations. 

538 

539 pslice.slice = targ 

540 

541 try: 

542 # Call the grammar rule with our special slice object 

543 self.state = state 

544 p.callable(pslice) 

545 #--! DEBUG 

546 debug.info('Result : %s', format_result(pslice[0])) 

547 #--! DEBUG 

548 symstack.append(sym) 

549 state = goto[statestack[-1]][pname] 

550 statestack.append(state) 

551 except SyntaxError: 

552 # If an error was set. Enter error recovery state 

553 lookaheadstack.append(lookahead) # Save the current lookahead token 

554 statestack.pop() # Pop back one state (before the reduce) 

555 state = statestack[-1] 

556 sym.type = 'error' 

557 sym.value = 'error' 

558 lookahead = sym 

559 errorcount = error_count 

560 self.errorok = False 

561 

562 continue 

563 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

564 

565 if t == 0: 

566 n = symstack[-1] 

567 result = getattr(n, 'value', None) 

568 #--! DEBUG 

569 debug.info('Done : Returning %s', format_result(result)) 

570 debug.info('PLY: PARSE DEBUG END') 

571 #--! DEBUG 

572 return result 

573 

574 if t is None: 

575 

576 #--! DEBUG 

577 debug.error('Error : %s', 

578 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

579 #--! DEBUG 

580 

581 # We have some kind of parsing error here. To handle 

582 # this, we are going to push the current token onto 

583 # the tokenstack and replace it with an 'error' token. 

584 # If there are any synchronization rules, they may 

585 # catch it. 

586 # 

587 # In addition to pushing the error token, we call call 

588 # the user defined p_error() function if this is the 

589 # first syntax error. This function is only called if 

590 # errorcount == 0. 

591 if errorcount == 0 or self.errorok: 

592 errorcount = error_count 

593 self.errorok = False 

594 errtoken = lookahead 

595 if errtoken.type == '$end': 

596 errtoken = None # End of file! 

597 if self.errorfunc: 

598 if errtoken and not hasattr(errtoken, 'lexer'): 

599 errtoken.lexer = lexer 

600 self.state = state 

601 tok = call_errorfunc(self.errorfunc, errtoken, self) 

602 if self.errorok: 

603 # User must have done some kind of panic 

604 # mode recovery on their own. The 

605 # returned token is the next lookahead 

606 lookahead = tok 

607 errtoken = None 

608 continue 

609 else: 

610 if errtoken: 

611 if hasattr(errtoken, 'lineno'): 

612 lineno = lookahead.lineno 

613 else: 

614 lineno = 0 

615 if lineno: 

616 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

617 else: 

618 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

619 else: 

620 sys.stderr.write('yacc: Parse error in input. EOF\n') 

621 return 

622 

623 else: 

624 errorcount = error_count 

625 

626 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

627 # entire parse has been rolled back and we're completely hosed. The token is 

628 # discarded and we just keep going. 

629 

630 if len(statestack) <= 1 and lookahead.type != '$end': 

631 lookahead = None 

632 errtoken = None 

633 state = 0 

634 # Nuke the pushback stack 

635 del lookaheadstack[:] 

636 continue 

637 

638 # case 2: the statestack has a couple of entries on it, but we're 

639 # at the end of the file. nuke the top entry and generate an error token 

640 

641 # Start nuking entries on the stack 

642 if lookahead.type == '$end': 

643 # Whoa. We're really hosed here. Bail out 

644 return 

645 

646 if lookahead.type != 'error': 

647 sym = symstack[-1] 

648 if sym.type == 'error': 

649 # Hmmm. Error is on top of stack, we'll just nuke input 

650 # symbol and continue 

651 #--! TRACKING 

652 if tracking: 

653 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 

654 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 

655 #--! TRACKING 

656 lookahead = None 

657 continue 

658 

659 # Create the error symbol for the first time and make it the new lookahead symbol 

660 t = YaccSymbol() 

661 t.type = 'error' 

662 

663 if hasattr(lookahead, 'lineno'): 

664 t.lineno = t.endlineno = lookahead.lineno 

665 if hasattr(lookahead, 'lexpos'): 

666 t.lexpos = t.endlexpos = lookahead.lexpos 

667 t.value = lookahead 

668 lookaheadstack.append(lookahead) 

669 lookahead = t 

670 else: 

671 sym = symstack.pop() 

672 #--! TRACKING 

673 if tracking: 

674 lookahead.lineno = sym.lineno 

675 lookahead.lexpos = sym.lexpos 

676 #--! TRACKING 

677 statestack.pop() 

678 state = statestack[-1] 

679 

680 continue 

681 

682 # Call an error function here 

683 raise RuntimeError('yacc: internal parser error!!!\n') 

684 

685 #--! parsedebug-end 

686 

687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

688 # parseopt(). 

689 # 

690 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 

691 # This code is automatically generated by the ply/ygen.py script. Make 

692 # changes to the parsedebug() method instead. 

693 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

694 

695 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

696 #--! parseopt-start 

697 lookahead = None # Current lookahead symbol 

698 lookaheadstack = [] # Stack of lookahead symbols 

699 actions = self.action # Local reference to action table (to avoid lookup on self.) 

700 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

701 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

702 defaulted_states = self.defaulted_states # Local reference to defaulted states 

703 pslice = YaccProduction(None) # Production object passed to grammar rules 

704 errorcount = 0 # Used during error recovery 

705 

706 

707 # If no lexer was given, we will try to use the lex module 

708 if not lexer: 

709 from . import lex 

710 lexer = lex.lexer 

711 

712 # Set up the lexer and parser objects on pslice 

713 pslice.lexer = lexer 

714 pslice.parser = self 

715 

716 # If input was supplied, pass to lexer 

717 if input is not None: 

718 lexer.input(input) 

719 

720 if tokenfunc is None: 

721 # Tokenize function 

722 get_token = lexer.token 

723 else: 

724 get_token = tokenfunc 

725 

726 # Set the parser() token method (sometimes used in error recovery) 

727 self.token = get_token 

728 

729 # Set up the state and symbol stacks 

730 

731 statestack = [] # Stack of parsing states 

732 self.statestack = statestack 

733 symstack = [] # Stack of grammar symbols 

734 self.symstack = symstack 

735 

736 pslice.stack = symstack # Put in the production 

737 errtoken = None # Err token 

738 

739 # The start state is assumed to be (0,$end) 

740 

741 statestack.append(0) 

742 sym = YaccSymbol() 

743 sym.type = '$end' 

744 symstack.append(sym) 

745 state = 0 

746 while True: 

747 # Get the next symbol on the input. If a lookahead symbol 

748 # is already set, we just use that. Otherwise, we'll pull 

749 # the next token off of the lookaheadstack or from the lexer 

750 

751 

752 if state not in defaulted_states: 

753 if not lookahead: 

754 if not lookaheadstack: 

755 lookahead = get_token() # Get the next token 

756 else: 

757 lookahead = lookaheadstack.pop() 

758 if not lookahead: 

759 lookahead = YaccSymbol() 

760 lookahead.type = '$end' 

761 

762 # Check the action table 

763 ltype = lookahead.type 

764 t = actions[state].get(ltype) 

765 else: 

766 t = defaulted_states[state] 

767 

768 

769 if t is not None: 

770 if t > 0: 

771 # shift a symbol on the stack 

772 statestack.append(t) 

773 state = t 

774 

775 

776 symstack.append(lookahead) 

777 lookahead = None 

778 

779 # Decrease error count on successful shift 

780 if errorcount: 

781 errorcount -= 1 

782 continue 

783 

784 if t < 0: 

785 # reduce a symbol on the stack, emit a production 

786 p = prod[-t] 

787 pname = p.name 

788 plen = p.len 

789 

790 # Get production function 

791 sym = YaccSymbol() 

792 sym.type = pname # Production name 

793 sym.value = None 

794 

795 

796 if plen: 

797 targ = symstack[-plen-1:] 

798 targ[0] = sym 

799 

800 #--! TRACKING 

801 if tracking: 

802 t1 = targ[1] 

803 sym.lineno = t1.lineno 

804 sym.lexpos = t1.lexpos 

805 t1 = targ[-1] 

806 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 

807 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 

808 #--! TRACKING 

809 

810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

811 # The code enclosed in this section is duplicated 

812 # below as a performance optimization. Make sure 

813 # changes get made in both locations. 

814 

815 pslice.slice = targ 

816 

817 try: 

818 # Call the grammar rule with our special slice object 

819 del symstack[-plen:] 

820 self.state = state 

821 p.callable(pslice) 

822 del statestack[-plen:] 

823 symstack.append(sym) 

824 state = goto[statestack[-1]][pname] 

825 statestack.append(state) 

826 except SyntaxError: 

827 # If an error was set. Enter error recovery state 

828 lookaheadstack.append(lookahead) # Save the current lookahead token 

829 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

830 statestack.pop() # Pop back one state (before the reduce) 

831 state = statestack[-1] 

832 sym.type = 'error' 

833 sym.value = 'error' 

834 lookahead = sym 

835 errorcount = error_count 

836 self.errorok = False 

837 

838 continue 

839 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

840 

841 else: 

842 

843 #--! TRACKING 

844 if tracking: 

845 sym.lineno = lexer.lineno 

846 sym.lexpos = lexer.lexpos 

847 #--! TRACKING 

848 

849 targ = [sym] 

850 

851 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

852 # The code enclosed in this section is duplicated 

853 # above as a performance optimization. Make sure 

854 # changes get made in both locations. 

855 

856 pslice.slice = targ 

857 

858 try: 

859 # Call the grammar rule with our special slice object 

860 self.state = state 

861 p.callable(pslice) 

862 symstack.append(sym) 

863 state = goto[statestack[-1]][pname] 

864 statestack.append(state) 

865 except SyntaxError: 

866 # If an error was set. Enter error recovery state 

867 lookaheadstack.append(lookahead) # Save the current lookahead token 

868 statestack.pop() # Pop back one state (before the reduce) 

869 state = statestack[-1] 

870 sym.type = 'error' 

871 sym.value = 'error' 

872 lookahead = sym 

873 errorcount = error_count 

874 self.errorok = False 

875 

876 continue 

877 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

878 

879 if t == 0: 

880 n = symstack[-1] 

881 result = getattr(n, 'value', None) 

882 return result 

883 

884 if t is None: 

885 

886 

887 # We have some kind of parsing error here. To handle 

888 # this, we are going to push the current token onto 

889 # the tokenstack and replace it with an 'error' token. 

890 # If there are any synchronization rules, they may 

891 # catch it. 

892 # 

893 # In addition to pushing the error token, we call call 

894 # the user defined p_error() function if this is the 

895 # first syntax error. This function is only called if 

896 # errorcount == 0. 

897 if errorcount == 0 or self.errorok: 

898 errorcount = error_count 

899 self.errorok = False 

900 errtoken = lookahead 

901 if errtoken.type == '$end': 

902 errtoken = None # End of file! 

903 if self.errorfunc: 

904 if errtoken and not hasattr(errtoken, 'lexer'): 

905 errtoken.lexer = lexer 

906 self.state = state 

907 tok = call_errorfunc(self.errorfunc, errtoken, self) 

908 if self.errorok: 

909 # User must have done some kind of panic 

910 # mode recovery on their own. The 

911 # returned token is the next lookahead 

912 lookahead = tok 

913 errtoken = None 

914 continue 

915 else: 

916 if errtoken: 

917 if hasattr(errtoken, 'lineno'): 

918 lineno = lookahead.lineno 

919 else: 

920 lineno = 0 

921 if lineno: 

922 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

923 else: 

924 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

925 else: 

926 sys.stderr.write('yacc: Parse error in input. EOF\n') 

927 return 

928 

929 else: 

930 errorcount = error_count 

931 

932 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

933 # entire parse has been rolled back and we're completely hosed. The token is 

934 # discarded and we just keep going. 

935 

936 if len(statestack) <= 1 and lookahead.type != '$end': 

937 lookahead = None 

938 errtoken = None 

939 state = 0 

940 # Nuke the pushback stack 

941 del lookaheadstack[:] 

942 continue 

943 

944 # case 2: the statestack has a couple of entries on it, but we're 

945 # at the end of the file. nuke the top entry and generate an error token 

946 

947 # Start nuking entries on the stack 

948 if lookahead.type == '$end': 

949 # Whoa. We're really hosed here. Bail out 

950 return 

951 

952 if lookahead.type != 'error': 

953 sym = symstack[-1] 

954 if sym.type == 'error': 

955 # Hmmm. Error is on top of stack, we'll just nuke input 

956 # symbol and continue 

957 #--! TRACKING 

958 if tracking: 

959 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 

960 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 

961 #--! TRACKING 

962 lookahead = None 

963 continue 

964 

965 # Create the error symbol for the first time and make it the new lookahead symbol 

966 t = YaccSymbol() 

967 t.type = 'error' 

968 

969 if hasattr(lookahead, 'lineno'): 

970 t.lineno = t.endlineno = lookahead.lineno 

971 if hasattr(lookahead, 'lexpos'): 

972 t.lexpos = t.endlexpos = lookahead.lexpos 

973 t.value = lookahead 

974 lookaheadstack.append(lookahead) 

975 lookahead = t 

976 else: 

977 sym = symstack.pop() 

978 #--! TRACKING 

979 if tracking: 

980 lookahead.lineno = sym.lineno 

981 lookahead.lexpos = sym.lexpos 

982 #--! TRACKING 

983 statestack.pop() 

984 state = statestack[-1] 

985 

986 continue 

987 

988 # Call an error function here 

989 raise RuntimeError('yacc: internal parser error!!!\n') 

990 

991 #--! parseopt-end 

992 

993 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

994 # parseopt_notrack(). 

995 # 

996 # Optimized version of parseopt() with line number tracking removed. 

997 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 

998 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 

999 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1000 

1001 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

1002 #--! parseopt-notrack-start 

1003 lookahead = None # Current lookahead symbol 

1004 lookaheadstack = [] # Stack of lookahead symbols 

1005 actions = self.action # Local reference to action table (to avoid lookup on self.) 

1006 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

1007 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

1008 defaulted_states = self.defaulted_states # Local reference to defaulted states 

1009 pslice = YaccProduction(None) # Production object passed to grammar rules 

1010 errorcount = 0 # Used during error recovery 

1011 

1012 

1013 # If no lexer was given, we will try to use the lex module 

1014 if not lexer: 

1015 from . import lex 

1016 lexer = lex.lexer 

1017 

1018 # Set up the lexer and parser objects on pslice 

1019 pslice.lexer = lexer 

1020 pslice.parser = self 

1021 

1022 # If input was supplied, pass to lexer 

1023 if input is not None: 

1024 lexer.input(input) 

1025 

1026 if tokenfunc is None: 

1027 # Tokenize function 

1028 get_token = lexer.token 

1029 else: 

1030 get_token = tokenfunc 

1031 

1032 # Set the parser() token method (sometimes used in error recovery) 

1033 self.token = get_token 

1034 

1035 # Set up the state and symbol stacks 

1036 

1037 statestack = [] # Stack of parsing states 

1038 self.statestack = statestack 

1039 symstack = [] # Stack of grammar symbols 

1040 self.symstack = symstack 

1041 

1042 pslice.stack = symstack # Put in the production 

1043 errtoken = None # Err token 

1044 

1045 # The start state is assumed to be (0,$end) 

1046 

1047 statestack.append(0) 

1048 sym = YaccSymbol() 

1049 sym.type = '$end' 

1050 symstack.append(sym) 

1051 state = 0 

1052 while True: 

1053 # Get the next symbol on the input. If a lookahead symbol 

1054 # is already set, we just use that. Otherwise, we'll pull 

1055 # the next token off of the lookaheadstack or from the lexer 

1056 

1057 

1058 if state not in defaulted_states: 

1059 if not lookahead: 

1060 if not lookaheadstack: 

1061 lookahead = get_token() # Get the next token 

1062 else: 

1063 lookahead = lookaheadstack.pop() 

1064 if not lookahead: 

1065 lookahead = YaccSymbol() 

1066 lookahead.type = '$end' 

1067 

1068 # Check the action table 

1069 ltype = lookahead.type 

1070 t = actions[state].get(ltype) 

1071 else: 

1072 t = defaulted_states[state] 

1073 

1074 

1075 if t is not None: 

1076 if t > 0: 

1077 # shift a symbol on the stack 

1078 statestack.append(t) 

1079 state = t 

1080 

1081 

1082 symstack.append(lookahead) 

1083 lookahead = None 

1084 

1085 # Decrease error count on successful shift 

1086 if errorcount: 

1087 errorcount -= 1 

1088 continue 

1089 

1090 if t < 0: 

1091 # reduce a symbol on the stack, emit a production 

1092 p = prod[-t] 

1093 pname = p.name 

1094 plen = p.len 

1095 

1096 # Get production function 

1097 sym = YaccSymbol() 

1098 sym.type = pname # Production name 

1099 sym.value = None 

1100 

1101 

1102 if plen: 

1103 targ = symstack[-plen-1:] 

1104 targ[0] = sym 

1105 

1106 

1107 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1108 # The code enclosed in this section is duplicated 

1109 # below as a performance optimization. Make sure 

1110 # changes get made in both locations. 

1111 

1112 pslice.slice = targ 

1113 

1114 try: 

1115 # Call the grammar rule with our special slice object 

1116 del symstack[-plen:] 

1117 self.state = state 

1118 p.callable(pslice) 

1119 del statestack[-plen:] 

1120 symstack.append(sym) 

1121 state = goto[statestack[-1]][pname] 

1122 statestack.append(state) 

1123 except SyntaxError: 

1124 # If an error was set. Enter error recovery state 

1125 lookaheadstack.append(lookahead) # Save the current lookahead token 

1126 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

1127 statestack.pop() # Pop back one state (before the reduce) 

1128 state = statestack[-1] 

1129 sym.type = 'error' 

1130 sym.value = 'error' 

1131 lookahead = sym 

1132 errorcount = error_count 

1133 self.errorok = False 

1134 

1135 continue 

1136 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1137 

1138 else: 

1139 

1140 

1141 targ = [sym] 

1142 

1143 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1144 # The code enclosed in this section is duplicated 

1145 # above as a performance optimization. Make sure 

1146 # changes get made in both locations. 

1147 

1148 pslice.slice = targ 

1149 

1150 try: 

1151 # Call the grammar rule with our special slice object 

1152 self.state = state 

1153 p.callable(pslice) 

1154 symstack.append(sym) 

1155 state = goto[statestack[-1]][pname] 

1156 statestack.append(state) 

1157 except SyntaxError: 

1158 # If an error was set. Enter error recovery state 

1159 lookaheadstack.append(lookahead) # Save the current lookahead token 

1160 statestack.pop() # Pop back one state (before the reduce) 

1161 state = statestack[-1] 

1162 sym.type = 'error' 

1163 sym.value = 'error' 

1164 lookahead = sym 

1165 errorcount = error_count 

1166 self.errorok = False 

1167 

1168 continue 

1169 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1170 

1171 if t == 0: 

1172 n = symstack[-1] 

1173 result = getattr(n, 'value', None) 

1174 return result 

1175 

1176 if t is None: 

1177 

1178 

1179 # We have some kind of parsing error here. To handle 

1180 # this, we are going to push the current token onto 

1181 # the tokenstack and replace it with an 'error' token. 

1182 # If there are any synchronization rules, they may 

1183 # catch it. 

1184 # 

1185 # In addition to pushing the error token, we call call 

1186 # the user defined p_error() function if this is the 

1187 # first syntax error. This function is only called if 

1188 # errorcount == 0. 

1189 if errorcount == 0 or self.errorok: 

1190 errorcount = error_count 

1191 self.errorok = False 

1192 errtoken = lookahead 

1193 if errtoken.type == '$end': 

1194 errtoken = None # End of file! 

1195 if self.errorfunc: 

1196 if errtoken and not hasattr(errtoken, 'lexer'): 

1197 errtoken.lexer = lexer 

1198 self.state = state 

1199 tok = call_errorfunc(self.errorfunc, errtoken, self) 

1200 if self.errorok: 

1201 # User must have done some kind of panic 

1202 # mode recovery on their own. The 

1203 # returned token is the next lookahead 

1204 lookahead = tok 

1205 errtoken = None 

1206 continue 

1207 else: 

1208 if errtoken: 

1209 if hasattr(errtoken, 'lineno'): 

1210 lineno = lookahead.lineno 

1211 else: 

1212 lineno = 0 

1213 if lineno: 

1214 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

1215 else: 

1216 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

1217 else: 

1218 sys.stderr.write('yacc: Parse error in input. EOF\n') 

1219 return 

1220 

1221 else: 

1222 errorcount = error_count 

1223 

1224 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

1225 # entire parse has been rolled back and we're completely hosed. The token is 

1226 # discarded and we just keep going. 

1227 

1228 if len(statestack) <= 1 and lookahead.type != '$end': 

1229 lookahead = None 

1230 errtoken = None 

1231 state = 0 

1232 # Nuke the pushback stack 

1233 del lookaheadstack[:] 

1234 continue 

1235 

1236 # case 2: the statestack has a couple of entries on it, but we're 

1237 # at the end of the file. nuke the top entry and generate an error token 

1238 

1239 # Start nuking entries on the stack 

1240 if lookahead.type == '$end': 

1241 # Whoa. We're really hosed here. Bail out 

1242 return 

1243 

1244 if lookahead.type != 'error': 

1245 sym = symstack[-1] 

1246 if sym.type == 'error': 

1247 # Hmmm. Error is on top of stack, we'll just nuke input 

1248 # symbol and continue 

1249 lookahead = None 

1250 continue 

1251 

1252 # Create the error symbol for the first time and make it the new lookahead symbol 

1253 t = YaccSymbol() 

1254 t.type = 'error' 

1255 

1256 if hasattr(lookahead, 'lineno'): 

1257 t.lineno = t.endlineno = lookahead.lineno 

1258 if hasattr(lookahead, 'lexpos'): 

1259 t.lexpos = t.endlexpos = lookahead.lexpos 

1260 t.value = lookahead 

1261 lookaheadstack.append(lookahead) 

1262 lookahead = t 

1263 else: 

1264 sym = symstack.pop() 

1265 statestack.pop() 

1266 state = statestack[-1] 

1267 

1268 continue 

1269 

1270 # Call an error function here 

1271 raise RuntimeError('yacc: internal parser error!!!\n') 

1272 

1273 #--! parseopt-notrack-end 

1274 

1275# ----------------------------------------------------------------------------- 

1276# === Grammar Representation === 

1277# 

1278# The following functions, classes, and variables are used to represent and 

1279# manipulate the rules that make up a grammar. 

1280# ----------------------------------------------------------------------------- 

1281 

1282# regex matching identifiers 

1283_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 

1284 

1285# ----------------------------------------------------------------------------- 

1286# class Production: 

1287# 

1288# This class stores the raw information about a single production or grammar rule. 

1289# A grammar rule refers to a specification such as this: 

1290# 

1291# expr : expr PLUS term 

1292# 

1293# Here are the basic attributes defined on all productions 

1294# 

1295# name - Name of the production. For example 'expr' 

1296# prod - A list of symbols on the right side ['expr','PLUS','term'] 

1297# prec - Production precedence level 

1298# number - Production number. 

1299# func - Function that executes on reduce 

1300# file - File where production function is defined 

1301# lineno - Line number where production function is defined 

1302# 

1303# The following attributes are defined or optional. 

1304# 

1305# len - Length of the production (number of symbols on right hand side) 

1306# usyms - Set of unique symbols found in the production 

1307# ----------------------------------------------------------------------------- 

1308 

1309class Production(object): 

1310 reduced = 0 

1311 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 

1312 self.name = name 

1313 self.prod = tuple(prod) 

1314 self.number = number 

1315 self.func = func 

1316 self.callable = None 

1317 self.file = file 

1318 self.line = line 

1319 self.prec = precedence 

1320 

1321 # Internal settings used during table construction 

1322 

1323 self.len = len(self.prod) # Length of the production 

1324 

1325 # Create a list of unique production symbols used in the production 

1326 self.usyms = [] 

1327 for s in self.prod: 

1328 if s not in self.usyms: 

1329 self.usyms.append(s) 

1330 

1331 # List of all LR items for the production 

1332 self.lr_items = [] 

1333 self.lr_next = None 

1334 

1335 # Create a string representation 

1336 if self.prod: 

1337 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 

1338 else: 

1339 self.str = '%s -> <empty>' % self.name 

1340 

1341 def __str__(self): 

1342 return self.str 

1343 

1344 def __repr__(self): 

1345 return 'Production(' + str(self) + ')' 

1346 

1347 def __len__(self): 

1348 return len(self.prod) 

1349 

1350 def __nonzero__(self): 

1351 return 1 

1352 

1353 def __getitem__(self, index): 

1354 return self.prod[index] 

1355 

1356 # Return the nth lr_item from the production (or None if at the end) 

1357 def lr_item(self, n): 

1358 if n > len(self.prod): 

1359 return None 

1360 p = LRItem(self, n) 

1361 # Precompute the list of productions immediately following. 

1362 try: 

1363 p.lr_after = Prodnames[p.prod[n+1]] 

1364 except (IndexError, KeyError): 

1365 p.lr_after = [] 

1366 try: 

1367 p.lr_before = p.prod[n-1] 

1368 except IndexError: 

1369 p.lr_before = None 

1370 return p 

1371 

1372 # Bind the production function name to a callable 

1373 def bind(self, pdict): 

1374 if self.func: 

1375 self.callable = pdict[self.func] 

1376 

1377# This class serves as a minimal standin for Production objects when 

1378# reading table data from files. It only contains information 

1379# actually used by the LR parsing engine, plus some additional 

1380# debugging information. 

1381class MiniProduction(object): 

1382 def __init__(self, str, name, len, func, file, line): 

1383 self.name = name 

1384 self.len = len 

1385 self.func = func 

1386 self.callable = None 

1387 self.file = file 

1388 self.line = line 

1389 self.str = str 

1390 

1391 def __str__(self): 

1392 return self.str 

1393 

1394 def __repr__(self): 

1395 return 'MiniProduction(%s)' % self.str 

1396 

1397 # Bind the production function name to a callable 

1398 def bind(self, pdict): 

1399 if self.func: 

1400 self.callable = pdict[self.func] 

1401 

1402 

1403# ----------------------------------------------------------------------------- 

1404# class LRItem 

1405# 

1406# This class represents a specific stage of parsing a production rule. For 

1407# example: 

1408# 

1409# expr : expr . PLUS term 

1410# 

1411# In the above, the "." represents the current location of the parse. Here 

1412# basic attributes: 

1413# 

1414# name - Name of the production. For example 'expr' 

1415# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 

1416# number - Production number. 

1417# 

1418# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 

1419# then lr_next refers to 'expr -> expr PLUS . term' 

1420# lr_index - LR item index (location of the ".") in the prod list. 

1421# lookaheads - LALR lookahead symbols for this item 

1422# len - Length of the production (number of symbols on right hand side) 

1423# lr_after - List of all productions that immediately follow 

1424# lr_before - Grammar symbol immediately before 

1425# ----------------------------------------------------------------------------- 

1426 

1427class LRItem(object): 

1428 def __init__(self, p, n): 

1429 self.name = p.name 

1430 self.prod = list(p.prod) 

1431 self.number = p.number 

1432 self.lr_index = n 

1433 self.lookaheads = {} 

1434 self.prod.insert(n, '.') 

1435 self.prod = tuple(self.prod) 

1436 self.len = len(self.prod) 

1437 self.usyms = p.usyms 

1438 

1439 def __str__(self): 

1440 if self.prod: 

1441 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 

1442 else: 

1443 s = '%s -> <empty>' % self.name 

1444 return s 

1445 

1446 def __repr__(self): 

1447 return 'LRItem(' + str(self) + ')' 

1448 

1449# ----------------------------------------------------------------------------- 

1450# rightmost_terminal() 

1451# 

1452# Return the rightmost terminal from a list of symbols. Used in add_production() 

1453# ----------------------------------------------------------------------------- 

1454def rightmost_terminal(symbols, terminals): 

1455 i = len(symbols) - 1 

1456 while i >= 0: 

1457 if symbols[i] in terminals: 

1458 return symbols[i] 

1459 i -= 1 

1460 return None 

1461 

1462# ----------------------------------------------------------------------------- 

1463# === GRAMMAR CLASS === 

1464# 

1465# The following class represents the contents of the specified grammar along 

1466# with various computed properties such as first sets, follow sets, LR items, etc. 

1467# This data is used for critical parts of the table generation process later. 

1468# ----------------------------------------------------------------------------- 

1469 

1470class GrammarError(YaccError): 

1471 pass 

1472 

1473class Grammar(object): 

1474 def __init__(self, terminals): 

1475 self.Productions = [None] # A list of all of the productions. The first 

1476 # entry is always reserved for the purpose of 

1477 # building an augmented grammar 

1478 

1479 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 

1480 # productions of that nonterminal. 

1481 

1482 self.Prodmap = {} # A dictionary that is only used to detect duplicate 

1483 # productions. 

1484 

1485 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 

1486 # list of the rules where they are used. 

1487 

1488 for term in terminals: 

1489 self.Terminals[term] = [] 

1490 

1491 self.Terminals['error'] = [] 

1492 

1493 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 

1494 # of rule numbers where they are used. 

1495 

1496 self.First = {} # A dictionary of precomputed FIRST(x) symbols 

1497 

1498 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 

1499 

1500 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 

1501 # form ('right',level) or ('nonassoc', level) or ('left',level) 

1502 

1503 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 

1504 # This is only used to provide error checking and to generate 

1505 # a warning about unused precedence rules. 

1506 

1507 self.Start = None # Starting symbol for the grammar 

1508 

1509 

1510 def __len__(self): 

1511 return len(self.Productions) 

1512 

1513 def __getitem__(self, index): 

1514 return self.Productions[index] 

1515 

1516 # ----------------------------------------------------------------------------- 

1517 # set_precedence() 

1518 # 

1519 # Sets the precedence for a given terminal. assoc is the associativity such as 

1520 # 'left','right', or 'nonassoc'. level is a numeric level. 

1521 # 

1522 # ----------------------------------------------------------------------------- 

1523 

1524 def set_precedence(self, term, assoc, level): 

1525 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 

1526 if term in self.Precedence: 

1527 raise GrammarError('Precedence already specified for terminal %r' % term) 

1528 if assoc not in ['left', 'right', 'nonassoc']: 

1529 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 

1530 self.Precedence[term] = (assoc, level) 

1531 

1532 # ----------------------------------------------------------------------------- 

1533 # add_production() 

1534 # 

1535 # Given an action function, this function assembles a production rule and 

1536 # computes its precedence level. 

1537 # 

1538 # The production rule is supplied as a list of symbols. For example, 

1539 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 

1540 # symbols ['expr','PLUS','term']. 

1541 # 

1542 # Precedence is determined by the precedence of the right-most non-terminal 

1543 # or the precedence of a terminal specified by %prec. 

1544 # 

1545 # A variety of error checks are performed to make sure production symbols 

1546 # are valid and that %prec is used correctly. 

1547 # ----------------------------------------------------------------------------- 

1548 

1549 def add_production(self, prodname, syms, func=None, file='', line=0): 

1550 

1551 if prodname in self.Terminals: 

1552 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 

1553 if prodname == 'error': 

1554 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 

1555 if not _is_identifier.match(prodname): 

1556 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 

1557 

1558 # Look for literal tokens 

1559 for n, s in enumerate(syms): 

1560 if s[0] in "'\"": 

1561 try: 

1562 c = eval(s) 

1563 if (len(c) > 1): 

1564 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 

1565 (file, line, s, prodname)) 

1566 if c not in self.Terminals: 

1567 self.Terminals[c] = [] 

1568 syms[n] = c 

1569 continue 

1570 except SyntaxError: 

1571 pass 

1572 if not _is_identifier.match(s) and s != '%prec': 

1573 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 

1574 

1575 # Determine the precedence level 

1576 if '%prec' in syms: 

1577 if syms[-1] == '%prec': 

1578 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 

1579 if syms[-2] != '%prec': 

1580 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 

1581 (file, line)) 

1582 precname = syms[-1] 

1583 prodprec = self.Precedence.get(precname) 

1584 if not prodprec: 

1585 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 

1586 else: 

1587 self.UsedPrecedence.add(precname) 

1588 del syms[-2:] # Drop %prec from the rule 

1589 else: 

1590 # If no %prec, precedence is determined by the rightmost terminal symbol 

1591 precname = rightmost_terminal(syms, self.Terminals) 

1592 prodprec = self.Precedence.get(precname, ('right', 0)) 

1593 

1594 # See if the rule is already in the rulemap 

1595 map = '%s -> %s' % (prodname, syms) 

1596 if map in self.Prodmap: 

1597 m = self.Prodmap[map] 

1598 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 

1599 'Previous definition at %s:%d' % (m.file, m.line)) 

1600 

1601 # From this point on, everything is valid. Create a new Production instance 

1602 pnumber = len(self.Productions) 

1603 if prodname not in self.Nonterminals: 

1604 self.Nonterminals[prodname] = [] 

1605 

1606 # Add the production number to Terminals and Nonterminals 

1607 for t in syms: 

1608 if t in self.Terminals: 

1609 self.Terminals[t].append(pnumber) 

1610 else: 

1611 if t not in self.Nonterminals: 

1612 self.Nonterminals[t] = [] 

1613 self.Nonterminals[t].append(pnumber) 

1614 

1615 # Create a production and add it to the list of productions 

1616 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 

1617 self.Productions.append(p) 

1618 self.Prodmap[map] = p 

1619 

1620 # Add to the global productions list 

1621 try: 

1622 self.Prodnames[prodname].append(p) 

1623 except KeyError: 

1624 self.Prodnames[prodname] = [p] 

1625 

1626 # ----------------------------------------------------------------------------- 

1627 # set_start() 

1628 # 

1629 # Sets the starting symbol and creates the augmented grammar. Production 

1630 # rule 0 is S' -> start where start is the start symbol. 

1631 # ----------------------------------------------------------------------------- 

1632 

1633 def set_start(self, start=None): 

1634 if not start: 

1635 start = self.Productions[1].name 

1636 if start not in self.Nonterminals: 

1637 raise GrammarError('start symbol %s undefined' % start) 

1638 self.Productions[0] = Production(0, "S'", [start]) 

1639 self.Nonterminals[start].append(0) 

1640 self.Start = start 

1641 

1642 # ----------------------------------------------------------------------------- 

1643 # find_unreachable() 

1644 # 

1645 # Find all of the nonterminal symbols that can't be reached from the starting 

1646 # symbol. Returns a list of nonterminals that can't be reached. 

1647 # ----------------------------------------------------------------------------- 

1648 

1649 def find_unreachable(self): 

1650 

1651 # Mark all symbols that are reachable from a symbol s 

1652 def mark_reachable_from(s): 

1653 if s in reachable: 

1654 return 

1655 reachable.add(s) 

1656 for p in self.Prodnames.get(s, []): 

1657 for r in p.prod: 

1658 mark_reachable_from(r) 

1659 

1660 reachable = set() 

1661 mark_reachable_from(self.Productions[0].prod[0]) 

1662 return [s for s in self.Nonterminals if s not in reachable] 

1663 

1664 # ----------------------------------------------------------------------------- 

1665 # infinite_cycles() 

1666 # 

1667 # This function looks at the various parsing rules and tries to detect 

1668 # infinite recursion cycles (grammar rules where there is no possible way 

1669 # to derive a string of only terminals). 

1670 # ----------------------------------------------------------------------------- 

1671 

1672 def infinite_cycles(self): 

1673 terminates = {} 

1674 

1675 # Terminals: 

1676 for t in self.Terminals: 

1677 terminates[t] = True 

1678 

1679 terminates['$end'] = True 

1680 

1681 # Nonterminals: 

1682 

1683 # Initialize to false: 

1684 for n in self.Nonterminals: 

1685 terminates[n] = False 

1686 

1687 # Then propagate termination until no change: 

1688 while True: 

1689 some_change = False 

1690 for (n, pl) in self.Prodnames.items(): 

1691 # Nonterminal n terminates iff any of its productions terminates. 

1692 for p in pl: 

1693 # Production p terminates iff all of its rhs symbols terminate. 

1694 for s in p.prod: 

1695 if not terminates[s]: 

1696 # The symbol s does not terminate, 

1697 # so production p does not terminate. 

1698 p_terminates = False 

1699 break 

1700 else: 

1701 # didn't break from the loop, 

1702 # so every symbol s terminates 

1703 # so production p terminates. 

1704 p_terminates = True 

1705 

1706 if p_terminates: 

1707 # symbol n terminates! 

1708 if not terminates[n]: 

1709 terminates[n] = True 

1710 some_change = True 

1711 # Don't need to consider any more productions for this n. 

1712 break 

1713 

1714 if not some_change: 

1715 break 

1716 

1717 infinite = [] 

1718 for (s, term) in terminates.items(): 

1719 if not term: 

1720 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1721 # s is used-but-not-defined, and we've already warned of that, 

1722 # so it would be overkill to say that it's also non-terminating. 

1723 pass 

1724 else: 

1725 infinite.append(s) 

1726 

1727 return infinite 

1728 

1729 # ----------------------------------------------------------------------------- 

1730 # undefined_symbols() 

1731 # 

1732 # Find all symbols that were used the grammar, but not defined as tokens or 

1733 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 

1734 # and prod is the production where the symbol was used. 

1735 # ----------------------------------------------------------------------------- 

1736 def undefined_symbols(self): 

1737 result = [] 

1738 for p in self.Productions: 

1739 if not p: 

1740 continue 

1741 

1742 for s in p.prod: 

1743 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1744 result.append((s, p)) 

1745 return result 

1746 

1747 # ----------------------------------------------------------------------------- 

1748 # unused_terminals() 

1749 # 

1750 # Find all terminals that were defined, but not used by the grammar. Returns 

1751 # a list of all symbols. 

1752 # ----------------------------------------------------------------------------- 

1753 def unused_terminals(self): 

1754 unused_tok = [] 

1755 for s, v in self.Terminals.items(): 

1756 if s != 'error' and not v: 

1757 unused_tok.append(s) 

1758 

1759 return unused_tok 

1760 

1761 # ------------------------------------------------------------------------------ 

1762 # unused_rules() 

1763 # 

1764 # Find all grammar rules that were defined, but not used (maybe not reachable) 

1765 # Returns a list of productions. 

1766 # ------------------------------------------------------------------------------ 

1767 

1768 def unused_rules(self): 

1769 unused_prod = [] 

1770 for s, v in self.Nonterminals.items(): 

1771 if not v: 

1772 p = self.Prodnames[s][0] 

1773 unused_prod.append(p) 

1774 return unused_prod 

1775 

1776 # ----------------------------------------------------------------------------- 

1777 # unused_precedence() 

1778 # 

1779 # Returns a list of tuples (term,precedence) corresponding to precedence 

1780 # rules that were never used by the grammar. term is the name of the terminal 

1781 # on which precedence was applied and precedence is a string such as 'left' or 

1782 # 'right' corresponding to the type of precedence. 

1783 # ----------------------------------------------------------------------------- 

1784 

1785 def unused_precedence(self): 

1786 unused = [] 

1787 for termname in self.Precedence: 

1788 if not (termname in self.Terminals or termname in self.UsedPrecedence): 

1789 unused.append((termname, self.Precedence[termname][0])) 

1790 

1791 return unused 

1792 

1793 # ------------------------------------------------------------------------- 

1794 # _first() 

1795 # 

1796 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 

1797 # 

1798 # During execution of compute_first1, the result may be incomplete. 

1799 # Afterward (e.g., when called from compute_follow()), it will be complete. 

1800 # ------------------------------------------------------------------------- 

1801 def _first(self, beta): 

1802 

1803 # We are computing First(x1,x2,x3,...,xn) 

1804 result = [] 

1805 for x in beta: 

1806 x_produces_empty = False 

1807 

1808 # Add all the non-<empty> symbols of First[x] to the result. 

1809 for f in self.First[x]: 

1810 if f == '<empty>': 

1811 x_produces_empty = True 

1812 else: 

1813 if f not in result: 

1814 result.append(f) 

1815 

1816 if x_produces_empty: 

1817 # We have to consider the next x in beta, 

1818 # i.e. stay in the loop. 

1819 pass 

1820 else: 

1821 # We don't have to consider any further symbols in beta. 

1822 break 

1823 else: 

1824 # There was no 'break' from the loop, 

1825 # so x_produces_empty was true for all x in beta, 

1826 # so beta produces empty as well. 

1827 result.append('<empty>') 

1828 

1829 return result 

1830 

1831 # ------------------------------------------------------------------------- 

1832 # compute_first() 

1833 # 

1834 # Compute the value of FIRST1(X) for all symbols 

1835 # ------------------------------------------------------------------------- 

1836 def compute_first(self): 

1837 if self.First: 

1838 return self.First 

1839 

1840 # Terminals: 

1841 for t in self.Terminals: 

1842 self.First[t] = [t] 

1843 

1844 self.First['$end'] = ['$end'] 

1845 

1846 # Nonterminals: 

1847 

1848 # Initialize to the empty set: 

1849 for n in self.Nonterminals: 

1850 self.First[n] = [] 

1851 

1852 # Then propagate symbols until no change: 

1853 while True: 

1854 some_change = False 

1855 for n in self.Nonterminals: 

1856 for p in self.Prodnames[n]: 

1857 for f in self._first(p.prod): 

1858 if f not in self.First[n]: 

1859 self.First[n].append(f) 

1860 some_change = True 

1861 if not some_change: 

1862 break 

1863 

1864 return self.First 

1865 

1866 # --------------------------------------------------------------------- 

1867 # compute_follow() 

1868 # 

1869 # Computes all of the follow sets for every non-terminal symbol. The 

1870 # follow set is the set of all symbols that might follow a given 

1871 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 

1872 # --------------------------------------------------------------------- 

1873 def compute_follow(self, start=None): 

1874 # If already computed, return the result 

1875 if self.Follow: 

1876 return self.Follow 

1877 

1878 # If first sets not computed yet, do that first. 

1879 if not self.First: 

1880 self.compute_first() 

1881 

1882 # Add '$end' to the follow list of the start symbol 

1883 for k in self.Nonterminals: 

1884 self.Follow[k] = [] 

1885 

1886 if not start: 

1887 start = self.Productions[1].name 

1888 

1889 self.Follow[start] = ['$end'] 

1890 

1891 while True: 

1892 didadd = False 

1893 for p in self.Productions[1:]: 

1894 # Here is the production set 

1895 for i, B in enumerate(p.prod): 

1896 if B in self.Nonterminals: 

1897 # Okay. We got a non-terminal in a production 

1898 fst = self._first(p.prod[i+1:]) 

1899 hasempty = False 

1900 for f in fst: 

1901 if f != '<empty>' and f not in self.Follow[B]: 

1902 self.Follow[B].append(f) 

1903 didadd = True 

1904 if f == '<empty>': 

1905 hasempty = True 

1906 if hasempty or i == (len(p.prod)-1): 

1907 # Add elements of follow(a) to follow(b) 

1908 for f in self.Follow[p.name]: 

1909 if f not in self.Follow[B]: 

1910 self.Follow[B].append(f) 

1911 didadd = True 

1912 if not didadd: 

1913 break 

1914 return self.Follow 

1915 

1916 

1917 # ----------------------------------------------------------------------------- 

1918 # build_lritems() 

1919 # 

1920 # This function walks the list of productions and builds a complete set of the 

1921 # LR items. The LR items are stored in two ways: First, they are uniquely 

1922 # numbered and placed in the list _lritems. Second, a linked list of LR items 

1923 # is built for each production. For example: 

1924 # 

1925 # E -> E PLUS E 

1926 # 

1927 # Creates the list 

1928 # 

1929 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 

1930 # ----------------------------------------------------------------------------- 

1931 

1932 def build_lritems(self): 

1933 for p in self.Productions: 

1934 lastlri = p 

1935 i = 0 

1936 lr_items = [] 

1937 while True: 

1938 if i > len(p): 

1939 lri = None 

1940 else: 

1941 lri = LRItem(p, i) 

1942 # Precompute the list of productions immediately following 

1943 try: 

1944 lri.lr_after = self.Prodnames[lri.prod[i+1]] 

1945 except (IndexError, KeyError): 

1946 lri.lr_after = [] 

1947 try: 

1948 lri.lr_before = lri.prod[i-1] 

1949 except IndexError: 

1950 lri.lr_before = None 

1951 

1952 lastlri.lr_next = lri 

1953 if not lri: 

1954 break 

1955 lr_items.append(lri) 

1956 lastlri = lri 

1957 i += 1 

1958 p.lr_items = lr_items 

1959 

1960# ----------------------------------------------------------------------------- 

1961# == Class LRTable == 

1962# 

1963# This basic class represents a basic table of LR parsing information. 

1964# Methods for generating the tables are not defined here. They are defined 

1965# in the derived class LRGeneratedTable. 

1966# ----------------------------------------------------------------------------- 

1967 

1968class VersionError(YaccError): 

1969 pass 

1970 

1971class LRTable(object): 

1972 def __init__(self): 

1973 self.lr_action = None 

1974 self.lr_goto = None 

1975 self.lr_productions = None 

1976 self.lr_method = None 

1977 

1978 def read_table(self, module): 

1979 if isinstance(module, types.ModuleType): 

1980 parsetab = module 

1981 else: 

1982 exec('import %s' % module) 

1983 parsetab = sys.modules[module] 

1984 

1985 if parsetab._tabversion != __tabversion__: 

1986 raise VersionError('yacc table file version is out of date') 

1987 

1988 self.lr_action = parsetab._lr_action 

1989 self.lr_goto = parsetab._lr_goto 

1990 

1991 self.lr_productions = [] 

1992 for p in parsetab._lr_productions: 

1993 self.lr_productions.append(MiniProduction(*p)) 

1994 

1995 self.lr_method = parsetab._lr_method 

1996 return parsetab._lr_signature 

1997 

1998 def read_pickle(self, filename): 

1999 try: 

2000 import cPickle as pickle 

2001 except ImportError: 

2002 import pickle 

2003 

2004 if not os.path.exists(filename): 

2005 raise ImportError 

2006 

2007 in_f = open(filename, 'rb') 

2008 

2009 tabversion = pickle.load(in_f) 

2010 if tabversion != __tabversion__: 

2011 raise VersionError('yacc table file version is out of date') 

2012 self.lr_method = pickle.load(in_f) 

2013 signature = pickle.load(in_f) 

2014 self.lr_action = pickle.load(in_f) 

2015 self.lr_goto = pickle.load(in_f) 

2016 productions = pickle.load(in_f) 

2017 

2018 self.lr_productions = [] 

2019 for p in productions: 

2020 self.lr_productions.append(MiniProduction(*p)) 

2021 

2022 in_f.close() 

2023 return signature 

2024 

2025 # Bind all production function names to callable objects in pdict 

2026 def bind_callables(self, pdict): 

2027 for p in self.lr_productions: 

2028 p.bind(pdict) 

2029 

2030 

2031# ----------------------------------------------------------------------------- 

2032# === LR Generator === 

2033# 

2034# The following classes and functions are used to generate LR parsing tables on 

2035# a grammar. 

2036# ----------------------------------------------------------------------------- 

2037 

2038# ----------------------------------------------------------------------------- 

2039# digraph() 

2040# traverse() 

2041# 

2042# The following two functions are used to compute set valued functions 

2043# of the form: 

2044# 

2045# F(x) = F'(x) U U{F(y) | x R y} 

2046# 

2047# This is used to compute the values of Read() sets as well as FOLLOW sets 

2048# in LALR(1) generation. 

2049# 

2050# Inputs: X - An input set 

2051# R - A relation 

2052# FP - Set-valued function 

2053# ------------------------------------------------------------------------------ 

2054 

2055def digraph(X, R, FP): 

2056 N = {} 

2057 for x in X: 

2058 N[x] = 0 

2059 stack = [] 

2060 F = {} 

2061 for x in X: 

2062 if N[x] == 0: 

2063 traverse(x, N, stack, F, X, R, FP) 

2064 return F 

2065 

2066def traverse(x, N, stack, F, X, R, FP): 

2067 stack.append(x) 

2068 d = len(stack) 

2069 N[x] = d 

2070 F[x] = FP(x) # F(X) <- F'(x) 

2071 

2072 rel = R(x) # Get y's related to x 

2073 for y in rel: 

2074 if N[y] == 0: 

2075 traverse(y, N, stack, F, X, R, FP) 

2076 N[x] = min(N[x], N[y]) 

2077 for a in F.get(y, []): 

2078 if a not in F[x]: 

2079 F[x].append(a) 

2080 if N[x] == d: 

2081 N[stack[-1]] = MAXINT 

2082 F[stack[-1]] = F[x] 

2083 element = stack.pop() 

2084 while element != x: 

2085 N[stack[-1]] = MAXINT 

2086 F[stack[-1]] = F[x] 

2087 element = stack.pop() 

2088 

2089class LALRError(YaccError): 

2090 pass 

2091 

2092# ----------------------------------------------------------------------------- 

2093# == LRGeneratedTable == 

2094# 

2095# This class implements the LR table generation algorithm. There are no 

2096# public methods except for write() 

2097# ----------------------------------------------------------------------------- 

2098 

2099class LRGeneratedTable(LRTable): 

2100 def __init__(self, grammar, method='LALR', log=None): 

2101 if method not in ['SLR', 'LALR']: 

2102 raise LALRError('Unsupported method %s' % method) 

2103 

2104 self.grammar = grammar 

2105 self.lr_method = method 

2106 

2107 # Set up the logger 

2108 if not log: 

2109 log = NullLogger() 

2110 self.log = log 

2111 

2112 # Internal attributes 

2113 self.lr_action = {} # Action table 

2114 self.lr_goto = {} # Goto table 

2115 self.lr_productions = grammar.Productions # Copy of grammar Production array 

2116 self.lr_goto_cache = {} # Cache of computed gotos 

2117 self.lr0_cidhash = {} # Cache of closures 

2118 

2119 self._add_count = 0 # Internal counter used to detect cycles 

2120 

2121 # Diagonistic information filled in by the table generator 

2122 self.sr_conflict = 0 

2123 self.rr_conflict = 0 

2124 self.conflicts = [] # List of conflicts 

2125 

2126 self.sr_conflicts = [] 

2127 self.rr_conflicts = [] 

2128 

2129 # Build the tables 

2130 self.grammar.build_lritems() 

2131 self.grammar.compute_first() 

2132 self.grammar.compute_follow() 

2133 self.lr_parse_table() 

2134 

2135 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 

2136 

2137 def lr0_closure(self, I): 

2138 self._add_count += 1 

2139 

2140 # Add everything in I to J 

2141 J = I[:] 

2142 didadd = True 

2143 while didadd: 

2144 didadd = False 

2145 for j in J: 

2146 for x in j.lr_after: 

2147 if getattr(x, 'lr0_added', 0) == self._add_count: 

2148 continue 

2149 # Add B --> .G to J 

2150 J.append(x.lr_next) 

2151 x.lr0_added = self._add_count 

2152 didadd = True 

2153 

2154 return J 

2155 

2156 # Compute the LR(0) goto function goto(I,X) where I is a set 

2157 # of LR(0) items and X is a grammar symbol. This function is written 

2158 # in a way that guarantees uniqueness of the generated goto sets 

2159 # (i.e. the same goto set will never be returned as two different Python 

2160 # objects). With uniqueness, we can later do fast set comparisons using 

2161 # id(obj) instead of element-wise comparison. 

2162 

2163 def lr0_goto(self, I, x): 

2164 # First we look for a previously cached entry 

2165 g = self.lr_goto_cache.get((id(I), x)) 

2166 if g: 

2167 return g 

2168 

2169 # Now we generate the goto set in a way that guarantees uniqueness 

2170 # of the result 

2171 

2172 s = self.lr_goto_cache.get(x) 

2173 if not s: 

2174 s = {} 

2175 self.lr_goto_cache[x] = s 

2176 

2177 gs = [] 

2178 for p in I: 

2179 n = p.lr_next 

2180 if n and n.lr_before == x: 

2181 s1 = s.get(id(n)) 

2182 if not s1: 

2183 s1 = {} 

2184 s[id(n)] = s1 

2185 gs.append(n) 

2186 s = s1 

2187 g = s.get('$end') 

2188 if not g: 

2189 if gs: 

2190 g = self.lr0_closure(gs) 

2191 s['$end'] = g 

2192 else: 

2193 s['$end'] = gs 

2194 self.lr_goto_cache[(id(I), x)] = g 

2195 return g 

2196 

2197 # Compute the LR(0) sets of item function 

2198 def lr0_items(self): 

2199 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 

2200 i = 0 

2201 for I in C: 

2202 self.lr0_cidhash[id(I)] = i 

2203 i += 1 

2204 

2205 # Loop over the items in C and each grammar symbols 

2206 i = 0 

2207 while i < len(C): 

2208 I = C[i] 

2209 i += 1 

2210 

2211 # Collect all of the symbols that could possibly be in the goto(I,X) sets 

2212 asyms = {} 

2213 for ii in I: 

2214 for s in ii.usyms: 

2215 asyms[s] = None 

2216 

2217 for x in asyms: 

2218 g = self.lr0_goto(I, x) 

2219 if not g or id(g) in self.lr0_cidhash: 

2220 continue 

2221 self.lr0_cidhash[id(g)] = len(C) 

2222 C.append(g) 

2223 

2224 return C 

2225 

2226 # ----------------------------------------------------------------------------- 

2227 # ==== LALR(1) Parsing ==== 

2228 # 

2229 # LALR(1) parsing is almost exactly the same as SLR except that instead of 

2230 # relying upon Follow() sets when performing reductions, a more selective 

2231 # lookahead set that incorporates the state of the LR(0) machine is utilized. 

2232 # Thus, we mainly just have to focus on calculating the lookahead sets. 

2233 # 

2234 # The method used here is due to DeRemer and Pennelo (1982). 

2235 # 

2236 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 

2237 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 

2238 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 

2239 # 

2240 # Further details can also be found in: 

2241 # 

2242 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 

2243 # McGraw-Hill Book Company, (1985). 

2244 # 

2245 # ----------------------------------------------------------------------------- 

2246 

2247 # ----------------------------------------------------------------------------- 

2248 # compute_nullable_nonterminals() 

2249 # 

2250 # Creates a dictionary containing all of the non-terminals that might produce 

2251 # an empty production. 

2252 # ----------------------------------------------------------------------------- 

2253 

2254 def compute_nullable_nonterminals(self): 

2255 nullable = set() 

2256 num_nullable = 0 

2257 while True: 

2258 for p in self.grammar.Productions[1:]: 

2259 if p.len == 0: 

2260 nullable.add(p.name) 

2261 continue 

2262 for t in p.prod: 

2263 if t not in nullable: 

2264 break 

2265 else: 

2266 nullable.add(p.name) 

2267 if len(nullable) == num_nullable: 

2268 break 

2269 num_nullable = len(nullable) 

2270 return nullable 

2271 

2272 # ----------------------------------------------------------------------------- 

2273 # find_nonterminal_trans(C) 

2274 # 

2275 # Given a set of LR(0) items, this functions finds all of the non-terminal 

2276 # transitions. These are transitions in which a dot appears immediately before 

2277 # a non-terminal. Returns a list of tuples of the form (state,N) where state 

2278 # is the state number and N is the nonterminal symbol. 

2279 # 

2280 # The input C is the set of LR(0) items. 

2281 # ----------------------------------------------------------------------------- 

2282 

2283 def find_nonterminal_transitions(self, C): 

2284 trans = [] 

2285 for stateno, state in enumerate(C): 

2286 for p in state: 

2287 if p.lr_index < p.len - 1: 

2288 t = (stateno, p.prod[p.lr_index+1]) 

2289 if t[1] in self.grammar.Nonterminals: 

2290 if t not in trans: 

2291 trans.append(t) 

2292 return trans 

2293 

2294 # ----------------------------------------------------------------------------- 

2295 # dr_relation() 

2296 # 

2297 # Computes the DR(p,A) relationships for non-terminal transitions. The input 

2298 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 

2299 # 

2300 # Returns a list of terminals. 

2301 # ----------------------------------------------------------------------------- 

2302 

2303 def dr_relation(self, C, trans, nullable): 

2304 dr_set = {} 

2305 state, N = trans 

2306 terms = [] 

2307 

2308 g = self.lr0_goto(C[state], N) 

2309 for p in g: 

2310 if p.lr_index < p.len - 1: 

2311 a = p.prod[p.lr_index+1] 

2312 if a in self.grammar.Terminals: 

2313 if a not in terms: 

2314 terms.append(a) 

2315 

2316 # This extra bit is to handle the start state 

2317 if state == 0 and N == self.grammar.Productions[0].prod[0]: 

2318 terms.append('$end') 

2319 

2320 return terms 

2321 

2322 # ----------------------------------------------------------------------------- 

2323 # reads_relation() 

2324 # 

2325 # Computes the READS() relation (p,A) READS (t,C). 

2326 # ----------------------------------------------------------------------------- 

2327 

2328 def reads_relation(self, C, trans, empty): 

2329 # Look for empty transitions 

2330 rel = [] 

2331 state, N = trans 

2332 

2333 g = self.lr0_goto(C[state], N) 

2334 j = self.lr0_cidhash.get(id(g), -1) 

2335 for p in g: 

2336 if p.lr_index < p.len - 1: 

2337 a = p.prod[p.lr_index + 1] 

2338 if a in empty: 

2339 rel.append((j, a)) 

2340 

2341 return rel 

2342 

2343 # ----------------------------------------------------------------------------- 

2344 # compute_lookback_includes() 

2345 # 

2346 # Determines the lookback and includes relations 

2347 # 

2348 # LOOKBACK: 

2349 # 

2350 # This relation is determined by running the LR(0) state machine forward. 

2351 # For example, starting with a production "N : . A B C", we run it forward 

2352 # to obtain "N : A B C ." We then build a relationship between this final 

2353 # state and the starting state. These relationships are stored in a dictionary 

2354 # lookdict. 

2355 # 

2356 # INCLUDES: 

2357 # 

2358 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 

2359 # 

2360 # This relation is used to determine non-terminal transitions that occur 

2361 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 

2362 # if the following holds: 

2363 # 

2364 # B -> LAT, where T -> epsilon and p' -L-> p 

2365 # 

2366 # L is essentially a prefix (which may be empty), T is a suffix that must be 

2367 # able to derive an empty string. State p' must lead to state p with the string L. 

2368 # 

2369 # ----------------------------------------------------------------------------- 

2370 

2371 def compute_lookback_includes(self, C, trans, nullable): 

2372 lookdict = {} # Dictionary of lookback relations 

2373 includedict = {} # Dictionary of include relations 

2374 

2375 # Make a dictionary of non-terminal transitions 

2376 dtrans = {} 

2377 for t in trans: 

2378 dtrans[t] = 1 

2379 

2380 # Loop over all transitions and compute lookbacks and includes 

2381 for state, N in trans: 

2382 lookb = [] 

2383 includes = [] 

2384 for p in C[state]: 

2385 if p.name != N: 

2386 continue 

2387 

2388 # Okay, we have a name match. We now follow the production all the way 

2389 # through the state machine until we get the . on the right hand side 

2390 

2391 lr_index = p.lr_index 

2392 j = state 

2393 while lr_index < p.len - 1: 

2394 lr_index = lr_index + 1 

2395 t = p.prod[lr_index] 

2396 

2397 # Check to see if this symbol and state are a non-terminal transition 

2398 if (j, t) in dtrans: 

2399 # Yes. Okay, there is some chance that this is an includes relation 

2400 # the only way to know for certain is whether the rest of the 

2401 # production derives empty 

2402 

2403 li = lr_index + 1 

2404 while li < p.len: 

2405 if p.prod[li] in self.grammar.Terminals: 

2406 break # No forget it 

2407 if p.prod[li] not in nullable: 

2408 break 

2409 li = li + 1 

2410 else: 

2411 # Appears to be a relation between (j,t) and (state,N) 

2412 includes.append((j, t)) 

2413 

2414 g = self.lr0_goto(C[j], t) # Go to next set 

2415 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 

2416 

2417 # When we get here, j is the final state, now we have to locate the production 

2418 for r in C[j]: 

2419 if r.name != p.name: 

2420 continue 

2421 if r.len != p.len: 

2422 continue 

2423 i = 0 

2424 # This look is comparing a production ". A B C" with "A B C ." 

2425 while i < r.lr_index: 

2426 if r.prod[i] != p.prod[i+1]: 

2427 break 

2428 i = i + 1 

2429 else: 

2430 lookb.append((j, r)) 

2431 for i in includes: 

2432 if i not in includedict: 

2433 includedict[i] = [] 

2434 includedict[i].append((state, N)) 

2435 lookdict[(state, N)] = lookb 

2436 

2437 return lookdict, includedict 

2438 

2439 # ----------------------------------------------------------------------------- 

2440 # compute_read_sets() 

2441 # 

2442 # Given a set of LR(0) items, this function computes the read sets. 

2443 # 

2444 # Inputs: C = Set of LR(0) items 

2445 # ntrans = Set of nonterminal transitions 

2446 # nullable = Set of empty transitions 

2447 # 

2448 # Returns a set containing the read sets 

2449 # ----------------------------------------------------------------------------- 

2450 

2451 def compute_read_sets(self, C, ntrans, nullable): 

2452 FP = lambda x: self.dr_relation(C, x, nullable) 

2453 R = lambda x: self.reads_relation(C, x, nullable) 

2454 F = digraph(ntrans, R, FP) 

2455 return F 

2456 

2457 # ----------------------------------------------------------------------------- 

2458 # compute_follow_sets() 

2459 # 

2460 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 

2461 # and an include set, this function computes the follow sets 

2462 # 

2463 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 

2464 # 

2465 # Inputs: 

2466 # ntrans = Set of nonterminal transitions 

2467 # readsets = Readset (previously computed) 

2468 # inclsets = Include sets (previously computed) 

2469 # 

2470 # Returns a set containing the follow sets 

2471 # ----------------------------------------------------------------------------- 

2472 

2473 def compute_follow_sets(self, ntrans, readsets, inclsets): 

2474 FP = lambda x: readsets[x] 

2475 R = lambda x: inclsets.get(x, []) 

2476 F = digraph(ntrans, R, FP) 

2477 return F 

2478 

2479 # ----------------------------------------------------------------------------- 

2480 # add_lookaheads() 

2481 # 

2482 # Attaches the lookahead symbols to grammar rules. 

2483 # 

2484 # Inputs: lookbacks - Set of lookback relations 

2485 # followset - Computed follow set 

2486 # 

2487 # This function directly attaches the lookaheads to productions contained 

2488 # in the lookbacks set 

2489 # ----------------------------------------------------------------------------- 

2490 

2491 def add_lookaheads(self, lookbacks, followset): 

2492 for trans, lb in lookbacks.items(): 

2493 # Loop over productions in lookback 

2494 for state, p in lb: 

2495 if state not in p.lookaheads: 

2496 p.lookaheads[state] = [] 

2497 f = followset.get(trans, []) 

2498 for a in f: 

2499 if a not in p.lookaheads[state]: 

2500 p.lookaheads[state].append(a) 

2501 

2502 # ----------------------------------------------------------------------------- 

2503 # add_lalr_lookaheads() 

2504 # 

2505 # This function does all of the work of adding lookahead information for use 

2506 # with LALR parsing 

2507 # ----------------------------------------------------------------------------- 

2508 

2509 def add_lalr_lookaheads(self, C): 

2510 # Determine all of the nullable nonterminals 

2511 nullable = self.compute_nullable_nonterminals() 

2512 

2513 # Find all non-terminal transitions 

2514 trans = self.find_nonterminal_transitions(C) 

2515 

2516 # Compute read sets 

2517 readsets = self.compute_read_sets(C, trans, nullable) 

2518 

2519 # Compute lookback/includes relations 

2520 lookd, included = self.compute_lookback_includes(C, trans, nullable) 

2521 

2522 # Compute LALR FOLLOW sets 

2523 followsets = self.compute_follow_sets(trans, readsets, included) 

2524 

2525 # Add all of the lookaheads 

2526 self.add_lookaheads(lookd, followsets) 

2527 

2528 # ----------------------------------------------------------------------------- 

2529 # lr_parse_table() 

2530 # 

2531 # This function constructs the parse tables for SLR or LALR 

2532 # ----------------------------------------------------------------------------- 

2533 def lr_parse_table(self): 

2534 Productions = self.grammar.Productions 

2535 Precedence = self.grammar.Precedence 

2536 goto = self.lr_goto # Goto array 

2537 action = self.lr_action # Action array 

2538 log = self.log # Logger for output 

2539 

2540 actionp = {} # Action production array (temporary) 

2541 

2542 log.info('Parsing method: %s', self.lr_method) 

2543 

2544 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 

2545 # This determines the number of states 

2546 

2547 C = self.lr0_items() 

2548 

2549 if self.lr_method == 'LALR': 

2550 self.add_lalr_lookaheads(C) 

2551 

2552 # Build the parser table, state by state 

2553 st = 0 

2554 for I in C: 

2555 # Loop over each production in I 

2556 actlist = [] # List of actions 

2557 st_action = {} 

2558 st_actionp = {} 

2559 st_goto = {} 

2560 log.info('') 

2561 log.info('state %d', st) 

2562 log.info('') 

2563 for p in I: 

2564 log.info(' (%d) %s', p.number, p) 

2565 log.info('') 

2566 

2567 for p in I: 

2568 if p.len == p.lr_index + 1: 

2569 if p.name == "S'": 

2570 # Start symbol. Accept! 

2571 st_action['$end'] = 0 

2572 st_actionp['$end'] = p 

2573 else: 

2574 # We are at the end of a production. Reduce! 

2575 if self.lr_method == 'LALR': 

2576 laheads = p.lookaheads[st] 

2577 else: 

2578 laheads = self.grammar.Follow[p.name] 

2579 for a in laheads: 

2580 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 

2581 r = st_action.get(a) 

2582 if r is not None: 

2583 # Whoa. Have a shift/reduce or reduce/reduce conflict 

2584 if r > 0: 

2585 # Need to decide on shift or reduce here 

2586 # By default we favor shifting. Need to add 

2587 # some precedence rules here. 

2588 

2589 # Shift precedence comes from the token 

2590 sprec, slevel = Precedence.get(a, ('right', 0)) 

2591 

2592 # Reduce precedence comes from rule being reduced (p) 

2593 rprec, rlevel = Productions[p.number].prec 

2594 

2595 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 

2596 # We really need to reduce here. 

2597 st_action[a] = -p.number 

2598 st_actionp[a] = p 

2599 if not slevel and not rlevel: 

2600 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

2601 self.sr_conflicts.append((st, a, 'reduce')) 

2602 Productions[p.number].reduced += 1 

2603 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

2604 st_action[a] = None 

2605 else: 

2606 # Hmmm. Guess we'll keep the shift 

2607 if not rlevel: 

2608 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

2609 self.sr_conflicts.append((st, a, 'shift')) 

2610 elif r < 0: 

2611 # Reduce/reduce conflict. In this case, we favor the rule 

2612 # that was defined first in the grammar file 

2613 oldp = Productions[-r] 

2614 pp = Productions[p.number] 

2615 if oldp.line > pp.line: 

2616 st_action[a] = -p.number 

2617 st_actionp[a] = p 

2618 chosenp, rejectp = pp, oldp 

2619 Productions[p.number].reduced += 1 

2620 Productions[oldp.number].reduced -= 1 

2621 else: 

2622 chosenp, rejectp = oldp, pp 

2623 self.rr_conflicts.append((st, chosenp, rejectp)) 

2624 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 

2625 a, st_actionp[a].number, st_actionp[a]) 

2626 else: 

2627 raise LALRError('Unknown conflict in state %d' % st) 

2628 else: 

2629 st_action[a] = -p.number 

2630 st_actionp[a] = p 

2631 Productions[p.number].reduced += 1 

2632 else: 

2633 i = p.lr_index 

2634 a = p.prod[i+1] # Get symbol right after the "." 

2635 if a in self.grammar.Terminals: 

2636 g = self.lr0_goto(I, a) 

2637 j = self.lr0_cidhash.get(id(g), -1) 

2638 if j >= 0: 

2639 # We are in a shift state 

2640 actlist.append((a, p, 'shift and go to state %d' % j)) 

2641 r = st_action.get(a) 

2642 if r is not None: 

2643 # Whoa have a shift/reduce or shift/shift conflict 

2644 if r > 0: 

2645 if r != j: 

2646 raise LALRError('Shift/shift conflict in state %d' % st) 

2647 elif r < 0: 

2648 # Do a precedence check. 

2649 # - if precedence of reduce rule is higher, we reduce. 

2650 # - if precedence of reduce is same and left assoc, we reduce. 

2651 # - otherwise we shift 

2652 

2653 # Shift precedence comes from the token 

2654 sprec, slevel = Precedence.get(a, ('right', 0)) 

2655 

2656 # Reduce precedence comes from the rule that could have been reduced 

2657 rprec, rlevel = Productions[st_actionp[a].number].prec 

2658 

2659 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 

2660 # We decide to shift here... highest precedence to shift 

2661 Productions[st_actionp[a].number].reduced -= 1 

2662 st_action[a] = j 

2663 st_actionp[a] = p 

2664 if not rlevel: 

2665 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

2666 self.sr_conflicts.append((st, a, 'shift')) 

2667 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

2668 st_action[a] = None 

2669 else: 

2670 # Hmmm. Guess we'll keep the reduce 

2671 if not slevel and not rlevel: 

2672 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

2673 self.sr_conflicts.append((st, a, 'reduce')) 

2674 

2675 else: 

2676 raise LALRError('Unknown conflict in state %d' % st) 

2677 else: 

2678 st_action[a] = j 

2679 st_actionp[a] = p 

2680 

2681 # Print the actions associated with each terminal 

2682 _actprint = {} 

2683 for a, p, m in actlist: 

2684 if a in st_action: 

2685 if p is st_actionp[a]: 

2686 log.info(' %-15s %s', a, m) 

2687 _actprint[(a, m)] = 1 

2688 log.info('') 

2689 # Print the actions that were not used. (debugging) 

2690 not_used = 0 

2691 for a, p, m in actlist: 

2692 if a in st_action: 

2693 if p is not st_actionp[a]: 

2694 if not (a, m) in _actprint: 

2695 log.debug(' ! %-15s [ %s ]', a, m) 

2696 not_used = 1 

2697 _actprint[(a, m)] = 1 

2698 if not_used: 

2699 log.debug('') 

2700 

2701 # Construct the goto table for this state 

2702 

2703 nkeys = {} 

2704 for ii in I: 

2705 for s in ii.usyms: 

2706 if s in self.grammar.Nonterminals: 

2707 nkeys[s] = None 

2708 for n in nkeys: 

2709 g = self.lr0_goto(I, n) 

2710 j = self.lr0_cidhash.get(id(g), -1) 

2711 if j >= 0: 

2712 st_goto[n] = j 

2713 log.info(' %-30s shift and go to state %d', n, j) 

2714 

2715 action[st] = st_action 

2716 actionp[st] = st_actionp 

2717 goto[st] = st_goto 

2718 st += 1 

2719 

2720 # ----------------------------------------------------------------------------- 

2721 # write() 

2722 # 

2723 # This function writes the LR parsing tables to a file 

2724 # ----------------------------------------------------------------------------- 

2725 

2726 def write_table(self, tabmodule, outputdir='', signature=''): 

2727 if isinstance(tabmodule, types.ModuleType): 

2728 raise IOError("Won't overwrite existing tabmodule") 

2729 

2730 basemodulename = tabmodule.split('.')[-1] 

2731 filename = os.path.join(outputdir, basemodulename) + '.py' 

2732 try: 

2733 f = open(filename, 'w') 

2734 

2735 f.write(''' 

2736# %s 

2737# This file is automatically generated. Do not edit. 

2738_tabversion = %r 

2739 

2740_lr_method = %r 

2741 

2742_lr_signature = %r 

2743 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) 

2744 

2745 # Change smaller to 0 to go back to original tables 

2746 smaller = 1 

2747 

2748 # Factor out names to try and make smaller 

2749 if smaller: 

2750 items = {} 

2751 

2752 for s, nd in self.lr_action.items(): 

2753 for name, v in nd.items(): 

2754 i = items.get(name) 

2755 if not i: 

2756 i = ([], []) 

2757 items[name] = i 

2758 i[0].append(s) 

2759 i[1].append(v) 

2760 

2761 f.write('\n_lr_action_items = {') 

2762 for k, v in items.items(): 

2763 f.write('%r:([' % k) 

2764 for i in v[0]: 

2765 f.write('%r,' % i) 

2766 f.write('],[') 

2767 for i in v[1]: 

2768 f.write('%r,' % i) 

2769 

2770 f.write(']),') 

2771 f.write('}\n') 

2772 

2773 f.write(''' 

2774_lr_action = {} 

2775for _k, _v in _lr_action_items.items(): 

2776 for _x,_y in zip(_v[0],_v[1]): 

2777 if not _x in _lr_action: _lr_action[_x] = {} 

2778 _lr_action[_x][_k] = _y 

2779del _lr_action_items 

2780''') 

2781 

2782 else: 

2783 f.write('\n_lr_action = { ') 

2784 for k, v in self.lr_action.items(): 

2785 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 

2786 f.write('}\n') 

2787 

2788 if smaller: 

2789 # Factor out names to try and make smaller 

2790 items = {} 

2791 

2792 for s, nd in self.lr_goto.items(): 

2793 for name, v in nd.items(): 

2794 i = items.get(name) 

2795 if not i: 

2796 i = ([], []) 

2797 items[name] = i 

2798 i[0].append(s) 

2799 i[1].append(v) 

2800 

2801 f.write('\n_lr_goto_items = {') 

2802 for k, v in items.items(): 

2803 f.write('%r:([' % k) 

2804 for i in v[0]: 

2805 f.write('%r,' % i) 

2806 f.write('],[') 

2807 for i in v[1]: 

2808 f.write('%r,' % i) 

2809 

2810 f.write(']),') 

2811 f.write('}\n') 

2812 

2813 f.write(''' 

2814_lr_goto = {} 

2815for _k, _v in _lr_goto_items.items(): 

2816 for _x, _y in zip(_v[0], _v[1]): 

2817 if not _x in _lr_goto: _lr_goto[_x] = {} 

2818 _lr_goto[_x][_k] = _y 

2819del _lr_goto_items 

2820''') 

2821 else: 

2822 f.write('\n_lr_goto = { ') 

2823 for k, v in self.lr_goto.items(): 

2824 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 

2825 f.write('}\n') 

2826 

2827 # Write production table 

2828 f.write('_lr_productions = [\n') 

2829 for p in self.lr_productions: 

2830 if p.func: 

2831 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, 

2832 p.func, os.path.basename(p.file), p.line)) 

2833 else: 

2834 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) 

2835 f.write(']\n') 

2836 f.close() 

2837 

2838 except IOError as e: 

2839 raise 

2840 

2841 

2842 # ----------------------------------------------------------------------------- 

2843 # pickle_table() 

2844 # 

2845 # This function pickles the LR parsing tables to a supplied file object 

2846 # ----------------------------------------------------------------------------- 

2847 

2848 def pickle_table(self, filename, signature=''): 

2849 try: 

2850 import cPickle as pickle 

2851 except ImportError: 

2852 import pickle 

2853 with open(filename, 'wb') as outf: 

2854 pickle.dump(__tabversion__, outf, pickle_protocol) 

2855 pickle.dump(self.lr_method, outf, pickle_protocol) 

2856 pickle.dump(signature, outf, pickle_protocol) 

2857 pickle.dump(self.lr_action, outf, pickle_protocol) 

2858 pickle.dump(self.lr_goto, outf, pickle_protocol) 

2859 

2860 outp = [] 

2861 for p in self.lr_productions: 

2862 if p.func: 

2863 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 

2864 else: 

2865 outp.append((str(p), p.name, p.len, None, None, None)) 

2866 pickle.dump(outp, outf, pickle_protocol) 

2867 

2868# ----------------------------------------------------------------------------- 

2869# === INTROSPECTION === 

2870# 

2871# The following functions and classes are used to implement the PLY 

2872# introspection features followed by the yacc() function itself. 

2873# ----------------------------------------------------------------------------- 

2874 

2875# ----------------------------------------------------------------------------- 

2876# get_caller_module_dict() 

2877# 

2878# This function returns a dictionary containing all of the symbols defined within 

2879# a caller further down the call stack. This is used to get the environment 

2880# associated with the yacc() call if none was provided. 

2881# ----------------------------------------------------------------------------- 

2882 

2883def get_caller_module_dict(levels): 

2884 f = sys._getframe(levels) 

2885 ldict = f.f_globals.copy() 

2886 if f.f_globals != f.f_locals: 

2887 ldict.update(f.f_locals) 

2888 return ldict 

2889 

2890# ----------------------------------------------------------------------------- 

2891# parse_grammar() 

2892# 

2893# This takes a raw grammar rule string and parses it into production data 

2894# ----------------------------------------------------------------------------- 

2895def parse_grammar(doc, file, line): 

2896 grammar = [] 

2897 # Split the doc string into lines 

2898 pstrings = doc.splitlines() 

2899 lastp = None 

2900 dline = line 

2901 for ps in pstrings: 

2902 dline += 1 

2903 p = ps.split() 

2904 if not p: 

2905 continue 

2906 try: 

2907 if p[0] == '|': 

2908 # This is a continuation of a previous rule 

2909 if not lastp: 

2910 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 

2911 prodname = lastp 

2912 syms = p[1:] 

2913 else: 

2914 prodname = p[0] 

2915 lastp = prodname 

2916 syms = p[2:] 

2917 assign = p[1] 

2918 if assign != ':' and assign != '::=': 

2919 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 

2920 

2921 grammar.append((file, dline, prodname, syms)) 

2922 except SyntaxError: 

2923 raise 

2924 except Exception: 

2925 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 

2926 

2927 return grammar 

2928 

2929# ----------------------------------------------------------------------------- 

2930# ParserReflect() 

2931# 

2932# This class represents information extracted for building a parser including 

2933# start symbol, error function, tokens, precedence list, action functions, 

2934# etc. 

2935# ----------------------------------------------------------------------------- 

2936class ParserReflect(object): 

2937 def __init__(self, pdict, log=None): 

2938 self.pdict = pdict 

2939 self.start = None 

2940 self.error_func = None 

2941 self.tokens = None 

2942 self.modules = set() 

2943 self.grammar = [] 

2944 self.error = False 

2945 

2946 if log is None: 

2947 self.log = PlyLogger(sys.stderr) 

2948 else: 

2949 self.log = log 

2950 

2951 # Get all of the basic information 

2952 def get_all(self): 

2953 self.get_start() 

2954 self.get_error_func() 

2955 self.get_tokens() 

2956 self.get_precedence() 

2957 self.get_pfunctions() 

2958 

2959 # Validate all of the information 

2960 def validate_all(self): 

2961 self.validate_start() 

2962 self.validate_error_func() 

2963 self.validate_tokens() 

2964 self.validate_precedence() 

2965 self.validate_pfunctions() 

2966 self.validate_modules() 

2967 return self.error 

2968 

2969 # Compute a signature over the grammar 

2970 def signature(self): 

2971 parts = [] 

2972 try: 

2973 if self.start: 

2974 parts.append(self.start) 

2975 if self.prec: 

2976 parts.append(''.join([''.join(p) for p in self.prec])) 

2977 if self.tokens: 

2978 parts.append(' '.join(self.tokens)) 

2979 for f in self.pfuncs: 

2980 if f[3]: 

2981 parts.append(f[3]) 

2982 except (TypeError, ValueError): 

2983 pass 

2984 return ''.join(parts) 

2985 

2986 # ----------------------------------------------------------------------------- 

2987 # validate_modules() 

2988 # 

2989 # This method checks to see if there are duplicated p_rulename() functions 

2990 # in the parser module file. Without this function, it is really easy for 

2991 # users to make mistakes by cutting and pasting code fragments (and it's a real 

2992 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 

2993 # we just do a little regular expression pattern matching of def statements 

2994 # to try and detect duplicates. 

2995 # ----------------------------------------------------------------------------- 

2996 

2997 def validate_modules(self): 

2998 # Match def p_funcname( 

2999 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 

3000 

3001 for module in self.modules: 

3002 try: 

3003 lines, linen = inspect.getsourcelines(module) 

3004 except IOError: 

3005 continue 

3006 

3007 counthash = {} 

3008 for linen, line in enumerate(lines): 

3009 linen += 1 

3010 m = fre.match(line) 

3011 if m: 

3012 name = m.group(1) 

3013 prev = counthash.get(name) 

3014 if not prev: 

3015 counthash[name] = linen 

3016 else: 

3017 filename = inspect.getsourcefile(module) 

3018 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 

3019 filename, linen, name, prev) 

3020 

3021 # Get the start symbol 

3022 def get_start(self): 

3023 self.start = self.pdict.get('start') 

3024 

3025 # Validate the start symbol 

3026 def validate_start(self): 

3027 if self.start is not None: 

3028 if not isinstance(self.start, string_types): 

3029 self.log.error("'start' must be a string") 

3030 

3031 # Look for error handler 

3032 def get_error_func(self): 

3033 self.error_func = self.pdict.get('p_error') 

3034 

3035 # Validate the error function 

3036 def validate_error_func(self): 

3037 if self.error_func: 

3038 if isinstance(self.error_func, types.FunctionType): 

3039 ismethod = 0 

3040 elif isinstance(self.error_func, types.MethodType): 

3041 ismethod = 1 

3042 else: 

3043 self.log.error("'p_error' defined, but is not a function or method") 

3044 self.error = True 

3045 return 

3046 

3047 eline = self.error_func.__code__.co_firstlineno 

3048 efile = self.error_func.__code__.co_filename 

3049 module = inspect.getmodule(self.error_func) 

3050 self.modules.add(module) 

3051 

3052 argcount = self.error_func.__code__.co_argcount - ismethod 

3053 if argcount != 1: 

3054 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 

3055 self.error = True 

3056 

3057 # Get the tokens map 

3058 def get_tokens(self): 

3059 tokens = self.pdict.get('tokens') 

3060 if not tokens: 

3061 self.log.error('No token list is defined') 

3062 self.error = True 

3063 return 

3064 

3065 if not isinstance(tokens, (list, tuple)): 

3066 self.log.error('tokens must be a list or tuple') 

3067 self.error = True 

3068 return 

3069 

3070 if not tokens: 

3071 self.log.error('tokens is empty') 

3072 self.error = True 

3073 return 

3074 

3075 self.tokens = tokens 

3076 

3077 # Validate the tokens 

3078 def validate_tokens(self): 

3079 # Validate the tokens. 

3080 if 'error' in self.tokens: 

3081 self.log.error("Illegal token name 'error'. Is a reserved word") 

3082 self.error = True 

3083 return 

3084 

3085 terminals = set() 

3086 for n in self.tokens: 

3087 if n in terminals: 

3088 self.log.warning('Token %r multiply defined', n) 

3089 terminals.add(n) 

3090 

3091 # Get the precedence map (if any) 

3092 def get_precedence(self): 

3093 self.prec = self.pdict.get('precedence') 

3094 

3095 # Validate and parse the precedence map 

3096 def validate_precedence(self): 

3097 preclist = [] 

3098 if self.prec: 

3099 if not isinstance(self.prec, (list, tuple)): 

3100 self.log.error('precedence must be a list or tuple') 

3101 self.error = True 

3102 return 

3103 for level, p in enumerate(self.prec): 

3104 if not isinstance(p, (list, tuple)): 

3105 self.log.error('Bad precedence table') 

3106 self.error = True 

3107 return 

3108 

3109 if len(p) < 2: 

3110 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 

3111 self.error = True 

3112 return 

3113 assoc = p[0] 

3114 if not isinstance(assoc, string_types): 

3115 self.log.error('precedence associativity must be a string') 

3116 self.error = True 

3117 return 

3118 for term in p[1:]: 

3119 if not isinstance(term, string_types): 

3120 self.log.error('precedence items must be strings') 

3121 self.error = True 

3122 return 

3123 preclist.append((term, assoc, level+1)) 

3124 self.preclist = preclist 

3125 

3126 # Get all p_functions from the grammar 

3127 def get_pfunctions(self): 

3128 p_functions = [] 

3129 for name, item in self.pdict.items(): 

3130 if not name.startswith('p_') or name == 'p_error': 

3131 continue 

3132 if isinstance(item, (types.FunctionType, types.MethodType)): 

3133 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) 

3134 module = inspect.getmodule(item) 

3135 p_functions.append((line, module, name, item.__doc__)) 

3136 

3137 # Sort all of the actions by line number; make sure to stringify 

3138 # modules to make them sortable, since `line` may not uniquely sort all 

3139 # p functions 

3140 p_functions.sort(key=lambda p_function: ( 

3141 p_function[0], 

3142 str(p_function[1]), 

3143 p_function[2], 

3144 p_function[3])) 

3145 self.pfuncs = p_functions 

3146 

3147 # Validate all of the p_functions 

3148 def validate_pfunctions(self): 

3149 grammar = [] 

3150 # Check for non-empty symbols 

3151 if len(self.pfuncs) == 0: 

3152 self.log.error('no rules of the form p_rulename are defined') 

3153 self.error = True 

3154 return 

3155 

3156 for line, module, name, doc in self.pfuncs: 

3157 file = inspect.getsourcefile(module) 

3158 func = self.pdict[name] 

3159 if isinstance(func, types.MethodType): 

3160 reqargs = 2 

3161 else: 

3162 reqargs = 1 

3163 if func.__code__.co_argcount > reqargs: 

3164 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 

3165 self.error = True 

3166 elif func.__code__.co_argcount < reqargs: 

3167 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 

3168 self.error = True 

3169 elif not func.__doc__: 

3170 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 

3171 file, line, func.__name__) 

3172 else: 

3173 try: 

3174 parsed_g = parse_grammar(doc, file, line) 

3175 for g in parsed_g: 

3176 grammar.append((name, g)) 

3177 except SyntaxError as e: 

3178 self.log.error(str(e)) 

3179 self.error = True 

3180 

3181 # Looks like a valid grammar rule 

3182 # Mark the file in which defined. 

3183 self.modules.add(module) 

3184 

3185 # Secondary validation step that looks for p_ definitions that are not functions 

3186 # or functions that look like they might be grammar rules. 

3187 

3188 for n, v in self.pdict.items(): 

3189 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 

3190 continue 

3191 if n.startswith('t_'): 

3192 continue 

3193 if n.startswith('p_') and n != 'p_error': 

3194 self.log.warning('%r not defined as a function', n) 

3195 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 

3196 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 

3197 if v.__doc__: 

3198 try: 

3199 doc = v.__doc__.split(' ') 

3200 if doc[1] == ':': 

3201 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 

3202 v.__code__.co_filename, v.__code__.co_firstlineno, n) 

3203 except IndexError: 

3204 pass 

3205 

3206 self.grammar = grammar 

3207 

3208# ----------------------------------------------------------------------------- 

3209# yacc(module) 

3210# 

3211# Build a parser 

3212# ----------------------------------------------------------------------------- 

3213 

3214def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 

3215 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, 

3216 outputdir=None, debuglog=None, errorlog=None, picklefile=None): 

3217 

3218 if tabmodule is None: 

3219 tabmodule = tab_module 

3220 

3221 # Reference to the parsing method of the last built parser 

3222 global parse 

3223 

3224 # If pickling is enabled, table files are not created 

3225 if picklefile: 

3226 write_tables = 0 

3227 

3228 if errorlog is None: 

3229 errorlog = PlyLogger(sys.stderr) 

3230 

3231 # Get the module dictionary used for the parser 

3232 if module: 

3233 _items = [(k, getattr(module, k)) for k in dir(module)] 

3234 pdict = dict(_items) 

3235 # If no __file__ attribute is available, try to obtain it from the __module__ instead 

3236 if '__file__' not in pdict: 

3237 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 

3238 else: 

3239 pdict = get_caller_module_dict(2) 

3240 

3241 if outputdir is None: 

3242 # If no output directory is set, the location of the output files 

3243 # is determined according to the following rules: 

3244 # - If tabmodule specifies a package, files go into that package directory 

3245 # - Otherwise, files go in the same directory as the specifying module 

3246 if isinstance(tabmodule, types.ModuleType): 

3247 srcfile = tabmodule.__file__ 

3248 else: 

3249 if '.' not in tabmodule: 

3250 srcfile = pdict['__file__'] 

3251 else: 

3252 parts = tabmodule.split('.') 

3253 pkgname = '.'.join(parts[:-1]) 

3254 exec('import %s' % pkgname) 

3255 srcfile = getattr(sys.modules[pkgname], '__file__', '') 

3256 outputdir = os.path.dirname(srcfile) 

3257 

3258 # Determine if the module is package of a package or not. 

3259 # If so, fix the tabmodule setting so that tables load correctly 

3260 pkg = pdict.get('__package__') 

3261 if pkg and isinstance(tabmodule, str): 

3262 if '.' not in tabmodule: 

3263 tabmodule = pkg + '.' + tabmodule 

3264 

3265 

3266 

3267 # Set start symbol if it's specified directly using an argument 

3268 if start is not None: 

3269 pdict['start'] = start 

3270 

3271 # Collect parser information from the dictionary 

3272 pinfo = ParserReflect(pdict, log=errorlog) 

3273 pinfo.get_all() 

3274 

3275 if pinfo.error: 

3276 raise YaccError('Unable to build parser') 

3277 

3278 # Check signature against table files (if any) 

3279 signature = pinfo.signature() 

3280 

3281 # Read the tables 

3282 try: 

3283 lr = LRTable() 

3284 if picklefile: 

3285 read_signature = lr.read_pickle(picklefile) 

3286 else: 

3287 read_signature = lr.read_table(tabmodule) 

3288 if optimize or (read_signature == signature): 

3289 try: 

3290 lr.bind_callables(pinfo.pdict) 

3291 parser = LRParser(lr, pinfo.error_func) 

3292 parse = parser.parse 

3293 return parser 

3294 except Exception as e: 

3295 errorlog.warning('There was a problem loading the table file: %r', e) 

3296 except VersionError as e: 

3297 errorlog.warning(str(e)) 

3298 except ImportError: 

3299 pass 

3300 

3301 if debuglog is None: 

3302 if debug: 

3303 try: 

3304 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) 

3305 except IOError as e: 

3306 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 

3307 debuglog = NullLogger() 

3308 else: 

3309 debuglog = NullLogger() 

3310 

3311 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) 

3312 

3313 errors = False 

3314 

3315 # Validate the parser information 

3316 if pinfo.validate_all(): 

3317 raise YaccError('Unable to build parser') 

3318 

3319 if not pinfo.error_func: 

3320 errorlog.warning('no p_error() function is defined') 

3321 

3322 # Create a grammar object 

3323 grammar = Grammar(pinfo.tokens) 

3324 

3325 # Set precedence level for terminals 

3326 for term, assoc, level in pinfo.preclist: 

3327 try: 

3328 grammar.set_precedence(term, assoc, level) 

3329 except GrammarError as e: 

3330 errorlog.warning('%s', e) 

3331 

3332 # Add productions to the grammar 

3333 for funcname, gram in pinfo.grammar: 

3334 file, line, prodname, syms = gram 

3335 try: 

3336 grammar.add_production(prodname, syms, funcname, file, line) 

3337 except GrammarError as e: 

3338 errorlog.error('%s', e) 

3339 errors = True 

3340 

3341 # Set the grammar start symbols 

3342 try: 

3343 if start is None: 

3344 grammar.set_start(pinfo.start) 

3345 else: 

3346 grammar.set_start(start) 

3347 except GrammarError as e: 

3348 errorlog.error(str(e)) 

3349 errors = True 

3350 

3351 if errors: 

3352 raise YaccError('Unable to build parser') 

3353 

3354 # Verify the grammar structure 

3355 undefined_symbols = grammar.undefined_symbols() 

3356 for sym, prod in undefined_symbols: 

3357 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 

3358 errors = True 

3359 

3360 unused_terminals = grammar.unused_terminals() 

3361 if unused_terminals: 

3362 debuglog.info('') 

3363 debuglog.info('Unused terminals:') 

3364 debuglog.info('') 

3365 for term in unused_terminals: 

3366 errorlog.warning('Token %r defined, but not used', term) 

3367 debuglog.info(' %s', term) 

3368 

3369 # Print out all productions to the debug log 

3370 if debug: 

3371 debuglog.info('') 

3372 debuglog.info('Grammar') 

3373 debuglog.info('') 

3374 for n, p in enumerate(grammar.Productions): 

3375 debuglog.info('Rule %-5d %s', n, p) 

3376 

3377 # Find unused non-terminals 

3378 unused_rules = grammar.unused_rules() 

3379 for prod in unused_rules: 

3380 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 

3381 

3382 if len(unused_terminals) == 1: 

3383 errorlog.warning('There is 1 unused token') 

3384 if len(unused_terminals) > 1: 

3385 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 

3386 

3387 if len(unused_rules) == 1: 

3388 errorlog.warning('There is 1 unused rule') 

3389 if len(unused_rules) > 1: 

3390 errorlog.warning('There are %d unused rules', len(unused_rules)) 

3391 

3392 if debug: 

3393 debuglog.info('') 

3394 debuglog.info('Terminals, with rules where they appear') 

3395 debuglog.info('') 

3396 terms = list(grammar.Terminals) 

3397 terms.sort() 

3398 for term in terms: 

3399 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 

3400 

3401 debuglog.info('') 

3402 debuglog.info('Nonterminals, with rules where they appear') 

3403 debuglog.info('') 

3404 nonterms = list(grammar.Nonterminals) 

3405 nonterms.sort() 

3406 for nonterm in nonterms: 

3407 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 

3408 debuglog.info('') 

3409 

3410 if check_recursion: 

3411 unreachable = grammar.find_unreachable() 

3412 for u in unreachable: 

3413 errorlog.warning('Symbol %r is unreachable', u) 

3414 

3415 infinite = grammar.infinite_cycles() 

3416 for inf in infinite: 

3417 errorlog.error('Infinite recursion detected for symbol %r', inf) 

3418 errors = True 

3419 

3420 unused_prec = grammar.unused_precedence() 

3421 for term, assoc in unused_prec: 

3422 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 

3423 errors = True 

3424 

3425 if errors: 

3426 raise YaccError('Unable to build parser') 

3427 

3428 # Run the LRGeneratedTable on the grammar 

3429 if debug: 

3430 errorlog.debug('Generating %s tables', method) 

3431 

3432 lr = LRGeneratedTable(grammar, method, debuglog) 

3433 

3434 if debug: 

3435 num_sr = len(lr.sr_conflicts) 

3436 

3437 # Report shift/reduce and reduce/reduce conflicts 

3438 if num_sr == 1: 

3439 errorlog.warning('1 shift/reduce conflict') 

3440 elif num_sr > 1: 

3441 errorlog.warning('%d shift/reduce conflicts', num_sr) 

3442 

3443 num_rr = len(lr.rr_conflicts) 

3444 if num_rr == 1: 

3445 errorlog.warning('1 reduce/reduce conflict') 

3446 elif num_rr > 1: 

3447 errorlog.warning('%d reduce/reduce conflicts', num_rr) 

3448 

3449 # Write out conflicts to the output file 

3450 if debug and (lr.sr_conflicts or lr.rr_conflicts): 

3451 debuglog.warning('') 

3452 debuglog.warning('Conflicts:') 

3453 debuglog.warning('') 

3454 

3455 for state, tok, resolution in lr.sr_conflicts: 

3456 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 

3457 

3458 already_reported = set() 

3459 for state, rule, rejected in lr.rr_conflicts: 

3460 if (state, id(rule), id(rejected)) in already_reported: 

3461 continue 

3462 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

3463 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 

3464 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

3465 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 

3466 already_reported.add((state, id(rule), id(rejected))) 

3467 

3468 warned_never = [] 

3469 for state, rule, rejected in lr.rr_conflicts: 

3470 if not rejected.reduced and (rejected not in warned_never): 

3471 debuglog.warning('Rule (%s) is never reduced', rejected) 

3472 errorlog.warning('Rule (%s) is never reduced', rejected) 

3473 warned_never.append(rejected) 

3474 

3475 # Write the table file if requested 

3476 if write_tables: 

3477 try: 

3478 lr.write_table(tabmodule, outputdir, signature) 

3479 except IOError as e: 

3480 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 

3481 

3482 # Write a pickled version of the tables 

3483 if picklefile: 

3484 try: 

3485 lr.pickle_table(picklefile, signature) 

3486 except IOError as e: 

3487 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 

3488 

3489 # Build the parser 

3490 lr.bind_callables(pinfo.pdict) 

3491 parser = LRParser(lr, pinfo.error_func) 

3492 

3493 parse = parser.parse 

3494 return parser