Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/ply/yacc.py: 67%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1363 statements  

1# ----------------------------------------------------------------------------- 

2# ply: yacc.py 

3# 

4# Copyright (C) 2001-2022 

5# David M. Beazley (Dabeaz LLC) 

6# All rights reserved. 

7# 

8# Latest version: https://github.com/dabeaz/ply 

9# 

10# Redistribution and use in source and binary forms, with or without 

11# modification, are permitted provided that the following conditions are 

12# met: 

13# 

14# * Redistributions of source code must retain the above copyright notice, 

15# this list of conditions and the following disclaimer. 

16# * Redistributions in binary form must reproduce the above copyright notice, 

17# this list of conditions and the following disclaimer in the documentation 

18# and/or other materials provided with the distribution. 

19# * Neither the name of David Beazley or Dabeaz LLC may be used to 

20# endorse or promote products derived from this software without 

21# specific prior written permission. 

22# 

23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

24# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

25# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

26# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

27# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

28# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

29# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

30# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

31# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

32# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

33# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

34# ----------------------------------------------------------------------------- 

35# 

36# This implements an LR parser that is constructed from grammar rules defined 

37# as Python functions. The grammar is specified by supplying the BNF inside 

38# Python documentation strings. The inspiration for this technique was borrowed 

39# from John Aycock's Spark parsing system. PLY might be viewed as cross between 

40# Spark and the GNU bison utility. 

41# 

42# The current implementation is only somewhat object-oriented. The 

43# LR parser itself is defined in terms of an object (which allows multiple 

44# parsers to co-exist). However, most of the variables used during table 

45# construction are defined in terms of global variables. Users shouldn't 

46# notice unless they are trying to define multiple parsers at the same 

47# time using threads (in which case they should have their head examined). 

48# 

49# This implementation supports both SLR and LALR(1) parsing. LALR(1) 

50# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 

51# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 

52# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 

53# by the more efficient DeRemer and Pennello algorithm. 

54# 

55# :::::::: WARNING ::::::: 

56# 

57# Construction of LR parsing tables is fairly complicated and expensive. 

58# To make this module run fast, a *LOT* of work has been put into 

59# optimization---often at the expensive of readability and what might 

60# consider to be good Python "coding style." Modify the code at your 

61# own risk! 

62# ---------------------------------------------------------------------------- 

63 

64import re 

65import types 

66import sys 

67import inspect 

68 

69#----------------------------------------------------------------------------- 

70# === User configurable parameters === 

71# 

72# Change these to modify the default behavior of yacc (if you wish) 

73#----------------------------------------------------------------------------- 

74 

75yaccdebug = False # Debugging mode. If set, yacc generates a 

76 # a 'parser.out' file in the current directory 

77 

78debug_file = 'parser.out' # Default name of the debugging file 

79error_count = 3 # Number of symbols that must be shifted to leave recovery mode 

80resultlimit = 40 # Size limit of results when running in debug mode. 

81 

82MAXINT = sys.maxsize 

83 

84# This object is a stand-in for a logging object created by the 

85# logging module. PLY will use this by default to create things 

86# such as the parser.out file. If a user wants more detailed 

87# information, they can create their own logging object and pass 

88# it into PLY. 

89 

90class PlyLogger(object): 

91 def __init__(self, f): 

92 self.f = f 

93 

94 def debug(self, msg, *args, **kwargs): 

95 self.f.write((msg % args) + '\n') 

96 

97 info = debug 

98 

99 def warning(self, msg, *args, **kwargs): 

100 self.f.write('WARNING: ' + (msg % args) + '\n') 

101 

102 def error(self, msg, *args, **kwargs): 

103 self.f.write('ERROR: ' + (msg % args) + '\n') 

104 

105 critical = debug 

106 

107# Null logger is used when no output is generated. Does nothing. 

108class NullLogger(object): 

109 def __getattribute__(self, name): 

110 return self 

111 

112 def __call__(self, *args, **kwargs): 

113 return self 

114 

115# Exception raised for yacc-related errors 

116class YaccError(Exception): 

117 pass 

118 

119# Format the result message that the parser produces when running in debug mode. 

120def format_result(r): 

121 repr_str = repr(r) 

122 if '\n' in repr_str: 

123 repr_str = repr(repr_str) 

124 if len(repr_str) > resultlimit: 

125 repr_str = repr_str[:resultlimit] + ' ...' 

126 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 

127 return result 

128 

129# Format stack entries when the parser is running in debug mode 

130def format_stack_entry(r): 

131 repr_str = repr(r) 

132 if '\n' in repr_str: 

133 repr_str = repr(repr_str) 

134 if len(repr_str) < 16: 

135 return repr_str 

136 else: 

137 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 

138 

139#----------------------------------------------------------------------------- 

140# === LR Parsing Engine === 

141# 

142# The following classes are used for the LR parser itself. These are not 

143# used during table construction and are independent of the actual LR 

144# table generation algorithm 

145#----------------------------------------------------------------------------- 

146 

147# This class is used to hold non-terminal grammar symbols during parsing. 

148# It normally has the following attributes set: 

149# .type = Grammar symbol type 

150# .value = Symbol value 

151# .lineno = Starting line number 

152# .endlineno = Ending line number (optional, set automatically) 

153# .lexpos = Starting lex position 

154# .endlexpos = Ending lex position (optional, set automatically) 

155 

156class YaccSymbol: 

157 def __str__(self): 

158 return self.type 

159 

160 def __repr__(self): 

161 return str(self) 

162 

163# This class is a wrapper around the objects actually passed to each 

164# grammar rule. Index lookup and assignment actually assign the 

165# .value attribute of the underlying YaccSymbol object. 

166# The lineno() method returns the line number of a given 

167# item (or 0 if not defined). The linespan() method returns 

168# a tuple of (startline,endline) representing the range of lines 

169# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 

170# representing the range of positional information for a symbol. 

171 

172class YaccProduction: 

173 def __init__(self, s, stack=None): 

174 self.slice = s 

175 self.stack = stack 

176 self.lexer = None 

177 self.parser = None 

178 

179 def __getitem__(self, n): 

180 if isinstance(n, slice): 

181 return [s.value for s in self.slice[n]] 

182 elif n >= 0: 

183 return self.slice[n].value 

184 else: 

185 return self.stack[n].value 

186 

187 def __setitem__(self, n, v): 

188 self.slice[n].value = v 

189 

190 def __getslice__(self, i, j): 

191 return [s.value for s in self.slice[i:j]] 

192 

193 def __len__(self): 

194 return len(self.slice) 

195 

196 def lineno(self, n): 

197 return getattr(self.slice[n], 'lineno', 0) 

198 

199 def set_lineno(self, n, lineno): 

200 self.slice[n].lineno = lineno 

201 

202 def linespan(self, n): 

203 startline = getattr(self.slice[n], 'lineno', 0) 

204 endline = getattr(self.slice[n], 'endlineno', startline) 

205 return startline, endline 

206 

207 def lexpos(self, n): 

208 return getattr(self.slice[n], 'lexpos', 0) 

209 

210 def set_lexpos(self, n, lexpos): 

211 self.slice[n].lexpos = lexpos 

212 

213 def lexspan(self, n): 

214 startpos = getattr(self.slice[n], 'lexpos', 0) 

215 endpos = getattr(self.slice[n], 'endlexpos', startpos) 

216 return startpos, endpos 

217 

218 def error(self): 

219 raise SyntaxError 

220 

221# ----------------------------------------------------------------------------- 

222# == LRParser == 

223# 

224# The LR Parsing engine. 

225# ----------------------------------------------------------------------------- 

226 

227class LRParser: 

228 def __init__(self, lrtab, errorf): 

229 self.productions = lrtab.lr_productions 

230 self.action = lrtab.lr_action 

231 self.goto = lrtab.lr_goto 

232 self.errorfunc = errorf 

233 self.set_defaulted_states() 

234 self.errorok = True 

235 

236 def errok(self): 

237 self.errorok = True 

238 

239 def restart(self): 

240 del self.statestack[:] 

241 del self.symstack[:] 

242 sym = YaccSymbol() 

243 sym.type = '$end' 

244 self.symstack.append(sym) 

245 self.statestack.append(0) 

246 

247 # Defaulted state support. 

248 # This method identifies parser states where there is only one possible reduction action. 

249 # For such states, the parser can make a choose to make a rule reduction without consuming 

250 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 

251 # certain kinds of advanced parsing situations where the lexer and parser interact with 

252 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 

253 # 

254 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 

255 def set_defaulted_states(self): 

256 self.defaulted_states = {} 

257 for state, actions in self.action.items(): 

258 rules = list(actions.values()) 

259 if len(rules) == 1 and rules[0] < 0: 

260 self.defaulted_states[state] = rules[0] 

261 

262 def disable_defaulted_states(self): 

263 self.defaulted_states = {} 

264 

265 # parse(). 

266 # 

267 # This is the core parsing engine. To operate, it requires a lexer object. 

268 # Two options are provided. The debug flag turns on debugging so that you can 

269 # see the various rule reductions and parsing steps. tracking turns on position 

270 # tracking. In this mode, symbols will record the starting/ending line number and 

271 # character index. 

272 

273 def parse(self, input=None, lexer=None, debug=False, tracking=False): 

274 # If debugging has been specified as a flag, turn it into a logging object 

275 if isinstance(debug, int) and debug: 

276 debug = PlyLogger(sys.stderr) 

277 

278 lookahead = None # Current lookahead symbol 

279 lookaheadstack = [] # Stack of lookahead symbols 

280 actions = self.action # Local reference to action table (to avoid lookup on self.) 

281 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

282 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

283 defaulted_states = self.defaulted_states # Local reference to defaulted states 

284 pslice = YaccProduction(None) # Production object passed to grammar rules 

285 errorcount = 0 # Used during error recovery 

286 

287 if debug: 

288 debug.info('PLY: PARSE DEBUG START') 

289 

290 # If no lexer was given, we will try to use the lex module 

291 if not lexer: 

292 from . import lex 

293 lexer = lex.lexer 

294 

295 # Set up the lexer and parser objects on pslice 

296 pslice.lexer = lexer 

297 pslice.parser = self 

298 

299 # If input was supplied, pass to lexer 

300 if input is not None: 

301 lexer.input(input) 

302 

303 # Set the token function 

304 get_token = self.token = lexer.token 

305 

306 # Set up the state and symbol stacks 

307 statestack = self.statestack = [] # Stack of parsing states 

308 symstack = self.symstack = [] # Stack of grammar symbols 

309 pslice.stack = symstack # Put in the production 

310 errtoken = None # Err token 

311 

312 # The start state is assumed to be (0,$end) 

313 

314 statestack.append(0) 

315 sym = YaccSymbol() 

316 sym.type = '$end' 

317 symstack.append(sym) 

318 state = 0 

319 while True: 

320 # Get the next symbol on the input. If a lookahead symbol 

321 # is already set, we just use that. Otherwise, we'll pull 

322 # the next token off of the lookaheadstack or from the lexer 

323 

324 if debug: 

325 debug.debug('State : %s', state) 

326 

327 if state not in defaulted_states: 

328 if not lookahead: 

329 if not lookaheadstack: 

330 lookahead = get_token() # Get the next token 

331 else: 

332 lookahead = lookaheadstack.pop() 

333 if not lookahead: 

334 lookahead = YaccSymbol() 

335 lookahead.type = '$end' 

336 

337 # Check the action table 

338 ltype = lookahead.type 

339 t = actions[state].get(ltype) 

340 else: 

341 t = defaulted_states[state] 

342 if debug: 

343 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 

344 

345 if debug: 

346 debug.debug('Stack : %s', 

347 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

348 

349 if t is not None: 

350 if t > 0: 

351 # shift a symbol on the stack 

352 statestack.append(t) 

353 state = t 

354 

355 if debug: 

356 debug.debug('Action : Shift and goto state %s', t) 

357 

358 symstack.append(lookahead) 

359 lookahead = None 

360 

361 # Decrease error count on successful shift 

362 if errorcount: 

363 errorcount -= 1 

364 continue 

365 

366 if t < 0: 

367 # reduce a symbol on the stack, emit a production 

368 p = prod[-t] 

369 pname = p.name 

370 plen = p.len 

371 

372 # Get production function 

373 sym = YaccSymbol() 

374 sym.type = pname # Production name 

375 sym.value = None 

376 

377 if debug: 

378 if plen: 

379 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 

380 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 

381 goto[statestack[-1-plen]][pname]) 

382 else: 

383 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 

384 goto[statestack[-1]][pname]) 

385 

386 if plen: 

387 targ = symstack[-plen-1:] 

388 targ[0] = sym 

389 

390 if tracking: 

391 t1 = targ[1] 

392 sym.lineno = t1.lineno 

393 sym.lexpos = t1.lexpos 

394 t1 = targ[-1] 

395 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 

396 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 

397 

398 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

399 # The code enclosed in this section is duplicated 

400 # below as a performance optimization. Make sure 

401 # changes get made in both locations. 

402 

403 pslice.slice = targ 

404 

405 try: 

406 # Call the grammar rule with our special slice object 

407 del symstack[-plen:] 

408 self.state = state 

409 p.callable(pslice) 

410 del statestack[-plen:] 

411 if debug: 

412 debug.info('Result : %s', format_result(pslice[0])) 

413 symstack.append(sym) 

414 state = goto[statestack[-1]][pname] 

415 statestack.append(state) 

416 except SyntaxError: 

417 # If an error was set. Enter error recovery state 

418 lookaheadstack.append(lookahead) # Save the current lookahead token 

419 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

420 statestack.pop() # Pop back one state (before the reduce) 

421 state = statestack[-1] 

422 sym.type = 'error' 

423 sym.value = 'error' 

424 lookahead = sym 

425 errorcount = error_count 

426 self.errorok = False 

427 

428 continue 

429 

430 else: 

431 

432 if tracking: 

433 sym.lineno = lexer.lineno 

434 sym.lexpos = lexer.lexpos 

435 

436 targ = [sym] 

437 

438 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

439 # The code enclosed in this section is duplicated 

440 # above as a performance optimization. Make sure 

441 # changes get made in both locations. 

442 

443 pslice.slice = targ 

444 

445 try: 

446 # Call the grammar rule with our special slice object 

447 self.state = state 

448 p.callable(pslice) 

449 if debug: 

450 debug.info('Result : %s', format_result(pslice[0])) 

451 symstack.append(sym) 

452 state = goto[statestack[-1]][pname] 

453 statestack.append(state) 

454 except SyntaxError: 

455 # If an error was set. Enter error recovery state 

456 lookaheadstack.append(lookahead) # Save the current lookahead token 

457 statestack.pop() # Pop back one state (before the reduce) 

458 state = statestack[-1] 

459 sym.type = 'error' 

460 sym.value = 'error' 

461 lookahead = sym 

462 errorcount = error_count 

463 self.errorok = False 

464 

465 continue 

466 

467 if t == 0: 

468 n = symstack[-1] 

469 result = getattr(n, 'value', None) 

470 

471 if debug: 

472 debug.info('Done : Returning %s', format_result(result)) 

473 debug.info('PLY: PARSE DEBUG END') 

474 

475 return result 

476 

477 if t is None: 

478 

479 if debug: 

480 debug.error('Error : %s', 

481 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

482 

483 # We have some kind of parsing error here. To handle 

484 # this, we are going to push the current token onto 

485 # the tokenstack and replace it with an 'error' token. 

486 # If there are any synchronization rules, they may 

487 # catch it. 

488 # 

489 # In addition to pushing the error token, we call call 

490 # the user defined p_error() function if this is the 

491 # first syntax error. This function is only called if 

492 # errorcount == 0. 

493 if errorcount == 0 or self.errorok: 

494 errorcount = error_count 

495 self.errorok = False 

496 errtoken = lookahead 

497 if errtoken.type == '$end': 

498 errtoken = None # End of file! 

499 if self.errorfunc: 

500 if errtoken and not hasattr(errtoken, 'lexer'): 

501 errtoken.lexer = lexer 

502 self.state = state 

503 tok = self.errorfunc(errtoken) 

504 if self.errorok: 

505 # User must have done some kind of panic 

506 # mode recovery on their own. The 

507 # returned token is the next lookahead 

508 lookahead = tok 

509 errtoken = None 

510 continue 

511 else: 

512 if errtoken: 

513 if hasattr(errtoken, 'lineno'): 

514 lineno = lookahead.lineno 

515 else: 

516 lineno = 0 

517 if lineno: 

518 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

519 else: 

520 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

521 else: 

522 sys.stderr.write('yacc: Parse error in input. EOF\n') 

523 return 

524 

525 else: 

526 errorcount = error_count 

527 

528 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

529 # entire parse has been rolled back and we're completely hosed. The token is 

530 # discarded and we just keep going. 

531 

532 if len(statestack) <= 1 and lookahead.type != '$end': 

533 lookahead = None 

534 errtoken = None 

535 state = 0 

536 # Nuke the pushback stack 

537 del lookaheadstack[:] 

538 continue 

539 

540 # case 2: the statestack has a couple of entries on it, but we're 

541 # at the end of the file. nuke the top entry and generate an error token 

542 

543 # Start nuking entries on the stack 

544 if lookahead.type == '$end': 

545 # Whoa. We're really hosed here. Bail out 

546 return 

547 

548 if lookahead.type != 'error': 

549 sym = symstack[-1] 

550 if sym.type == 'error': 

551 # Hmmm. Error is on top of stack, we'll just nuke input 

552 # symbol and continue 

553 if tracking: 

554 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 

555 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 

556 lookahead = None 

557 continue 

558 

559 # Create the error symbol for the first time and make it the new lookahead symbol 

560 t = YaccSymbol() 

561 t.type = 'error' 

562 

563 if hasattr(lookahead, 'lineno'): 

564 t.lineno = t.endlineno = lookahead.lineno 

565 if hasattr(lookahead, 'lexpos'): 

566 t.lexpos = t.endlexpos = lookahead.lexpos 

567 t.value = lookahead 

568 lookaheadstack.append(lookahead) 

569 lookahead = t 

570 else: 

571 sym = symstack.pop() 

572 if tracking: 

573 lookahead.lineno = sym.lineno 

574 lookahead.lexpos = sym.lexpos 

575 statestack.pop() 

576 state = statestack[-1] 

577 

578 continue 

579 

580 # If we'r here, something really bad happened 

581 raise RuntimeError('yacc: internal parser error!!!\n') 

582 

583# ----------------------------------------------------------------------------- 

584# === Grammar Representation === 

585# 

586# The following functions, classes, and variables are used to represent and 

587# manipulate the rules that make up a grammar. 

588# ----------------------------------------------------------------------------- 

589 

590# regex matching identifiers 

591_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 

592 

593# ----------------------------------------------------------------------------- 

594# class Production: 

595# 

596# This class stores the raw information about a single production or grammar rule. 

597# A grammar rule refers to a specification such as this: 

598# 

599# expr : expr PLUS term 

600# 

601# Here are the basic attributes defined on all productions 

602# 

603# name - Name of the production. For example 'expr' 

604# prod - A list of symbols on the right side ['expr','PLUS','term'] 

605# prec - Production precedence level 

606# number - Production number. 

607# func - Function that executes on reduce 

608# file - File where production function is defined 

609# lineno - Line number where production function is defined 

610# 

611# The following attributes are defined or optional. 

612# 

613# len - Length of the production (number of symbols on right hand side) 

614# usyms - Set of unique symbols found in the production 

615# ----------------------------------------------------------------------------- 

616 

617class Production(object): 

618 reduced = 0 

619 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 

620 self.name = name 

621 self.prod = tuple(prod) 

622 self.number = number 

623 self.func = func 

624 self.callable = None 

625 self.file = file 

626 self.line = line 

627 self.prec = precedence 

628 

629 # Internal settings used during table construction 

630 

631 self.len = len(self.prod) # Length of the production 

632 

633 # Create a list of unique production symbols used in the production 

634 self.usyms = [] 

635 for s in self.prod: 

636 if s not in self.usyms: 

637 self.usyms.append(s) 

638 

639 # List of all LR items for the production 

640 self.lr_items = [] 

641 self.lr_next = None 

642 

643 # Create a string representation 

644 if self.prod: 

645 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 

646 else: 

647 self.str = '%s -> <empty>' % self.name 

648 

649 def __str__(self): 

650 return self.str 

651 

652 def __repr__(self): 

653 return 'Production(' + str(self) + ')' 

654 

655 def __len__(self): 

656 return len(self.prod) 

657 

658 def __nonzero__(self): 

659 return 1 

660 

661 def __getitem__(self, index): 

662 return self.prod[index] 

663 

664 # Return the nth lr_item from the production (or None if at the end) 

665 def lr_item(self, n): 

666 if n > len(self.prod): 

667 return None 

668 p = LRItem(self, n) 

669 # Precompute the list of productions immediately following. 

670 try: 

671 p.lr_after = self.Prodnames[p.prod[n+1]] 

672 except (IndexError, KeyError): 

673 p.lr_after = [] 

674 try: 

675 p.lr_before = p.prod[n-1] 

676 except IndexError: 

677 p.lr_before = None 

678 return p 

679 

680 # Bind the production function name to a callable 

681 def bind(self, pdict): 

682 if self.func: 

683 self.callable = pdict[self.func] 

684 

685# ----------------------------------------------------------------------------- 

686# class LRItem 

687# 

688# This class represents a specific stage of parsing a production rule. For 

689# example: 

690# 

691# expr : expr . PLUS term 

692# 

693# In the above, the "." represents the current location of the parse. Here 

694# basic attributes: 

695# 

696# name - Name of the production. For example 'expr' 

697# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 

698# number - Production number. 

699# 

700# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 

701# then lr_next refers to 'expr -> expr PLUS . term' 

702# lr_index - LR item index (location of the ".") in the prod list. 

703# lookaheads - LALR lookahead symbols for this item 

704# len - Length of the production (number of symbols on right hand side) 

705# lr_after - List of all productions that immediately follow 

706# lr_before - Grammar symbol immediately before 

707# ----------------------------------------------------------------------------- 

708 

709class LRItem(object): 

710 def __init__(self, p, n): 

711 self.name = p.name 

712 self.prod = list(p.prod) 

713 self.number = p.number 

714 self.lr_index = n 

715 self.lookaheads = {} 

716 self.prod.insert(n, '.') 

717 self.prod = tuple(self.prod) 

718 self.len = len(self.prod) 

719 self.usyms = p.usyms 

720 

721 def __str__(self): 

722 if self.prod: 

723 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 

724 else: 

725 s = '%s -> <empty>' % self.name 

726 return s 

727 

728 def __repr__(self): 

729 return 'LRItem(' + str(self) + ')' 

730 

731# ----------------------------------------------------------------------------- 

732# rightmost_terminal() 

733# 

734# Return the rightmost terminal from a list of symbols. Used in add_production() 

735# ----------------------------------------------------------------------------- 

736def rightmost_terminal(symbols, terminals): 

737 i = len(symbols) - 1 

738 while i >= 0: 

739 if symbols[i] in terminals: 

740 return symbols[i] 

741 i -= 1 

742 return None 

743 

744# ----------------------------------------------------------------------------- 

745# === GRAMMAR CLASS === 

746# 

747# The following class represents the contents of the specified grammar along 

748# with various computed properties such as first sets, follow sets, LR items, etc. 

749# This data is used for critical parts of the table generation process later. 

750# ----------------------------------------------------------------------------- 

751 

752class GrammarError(YaccError): 

753 pass 

754 

755class Grammar(object): 

756 def __init__(self, terminals): 

757 self.Productions = [None] # A list of all of the productions. The first 

758 # entry is always reserved for the purpose of 

759 # building an augmented grammar 

760 

761 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 

762 # productions of that nonterminal. 

763 

764 self.Prodmap = {} # A dictionary that is only used to detect duplicate 

765 # productions. 

766 

767 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 

768 # list of the rules where they are used. 

769 

770 for term in terminals: 

771 self.Terminals[term] = [] 

772 

773 self.Terminals['error'] = [] 

774 

775 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 

776 # of rule numbers where they are used. 

777 

778 self.First = {} # A dictionary of precomputed FIRST(x) symbols 

779 

780 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 

781 

782 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 

783 # form ('right',level) or ('nonassoc', level) or ('left',level) 

784 

785 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 

786 # This is only used to provide error checking and to generate 

787 # a warning about unused precedence rules. 

788 

789 self.Start = None # Starting symbol for the grammar 

790 

791 

792 def __len__(self): 

793 return len(self.Productions) 

794 

795 def __getitem__(self, index): 

796 return self.Productions[index] 

797 

798 # ----------------------------------------------------------------------------- 

799 # set_precedence() 

800 # 

801 # Sets the precedence for a given terminal. assoc is the associativity such as 

802 # 'left','right', or 'nonassoc'. level is a numeric level. 

803 # 

804 # ----------------------------------------------------------------------------- 

805 

806 def set_precedence(self, term, assoc, level): 

807 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 

808 if term in self.Precedence: 

809 raise GrammarError('Precedence already specified for terminal %r' % term) 

810 if assoc not in ['left', 'right', 'nonassoc']: 

811 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 

812 self.Precedence[term] = (assoc, level) 

813 

814 # ----------------------------------------------------------------------------- 

815 # add_production() 

816 # 

817 # Given an action function, this function assembles a production rule and 

818 # computes its precedence level. 

819 # 

820 # The production rule is supplied as a list of symbols. For example, 

821 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 

822 # symbols ['expr','PLUS','term']. 

823 # 

824 # Precedence is determined by the precedence of the right-most non-terminal 

825 # or the precedence of a terminal specified by %prec. 

826 # 

827 # A variety of error checks are performed to make sure production symbols 

828 # are valid and that %prec is used correctly. 

829 # ----------------------------------------------------------------------------- 

830 

831 def add_production(self, prodname, syms, func=None, file='', line=0): 

832 

833 if prodname in self.Terminals: 

834 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 

835 if prodname == 'error': 

836 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 

837 if not _is_identifier.match(prodname): 

838 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 

839 

840 # Look for literal tokens 

841 for n, s in enumerate(syms): 

842 if s[0] in "'\"": 

843 try: 

844 c = eval(s) 

845 if (len(c) > 1): 

846 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 

847 (file, line, s, prodname)) 

848 if c not in self.Terminals: 

849 self.Terminals[c] = [] 

850 syms[n] = c 

851 continue 

852 except SyntaxError: 

853 pass 

854 if not _is_identifier.match(s) and s != '%prec': 

855 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 

856 

857 # Determine the precedence level 

858 if '%prec' in syms: 

859 if syms[-1] == '%prec': 

860 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 

861 if syms[-2] != '%prec': 

862 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 

863 (file, line)) 

864 precname = syms[-1] 

865 prodprec = self.Precedence.get(precname) 

866 if not prodprec: 

867 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 

868 else: 

869 self.UsedPrecedence.add(precname) 

870 del syms[-2:] # Drop %prec from the rule 

871 else: 

872 # If no %prec, precedence is determined by the rightmost terminal symbol 

873 precname = rightmost_terminal(syms, self.Terminals) 

874 prodprec = self.Precedence.get(precname, ('right', 0)) 

875 

876 # See if the rule is already in the rulemap 

877 map = '%s -> %s' % (prodname, syms) 

878 if map in self.Prodmap: 

879 m = self.Prodmap[map] 

880 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 

881 'Previous definition at %s:%d' % (m.file, m.line)) 

882 

883 # From this point on, everything is valid. Create a new Production instance 

884 pnumber = len(self.Productions) 

885 if prodname not in self.Nonterminals: 

886 self.Nonterminals[prodname] = [] 

887 

888 # Add the production number to Terminals and Nonterminals 

889 for t in syms: 

890 if t in self.Terminals: 

891 self.Terminals[t].append(pnumber) 

892 else: 

893 if t not in self.Nonterminals: 

894 self.Nonterminals[t] = [] 

895 self.Nonterminals[t].append(pnumber) 

896 

897 # Create a production and add it to the list of productions 

898 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 

899 self.Productions.append(p) 

900 self.Prodmap[map] = p 

901 

902 # Add to the global productions list 

903 try: 

904 self.Prodnames[prodname].append(p) 

905 except KeyError: 

906 self.Prodnames[prodname] = [p] 

907 

908 # ----------------------------------------------------------------------------- 

909 # set_start() 

910 # 

911 # Sets the starting symbol and creates the augmented grammar. Production 

912 # rule 0 is S' -> start where start is the start symbol. 

913 # ----------------------------------------------------------------------------- 

914 

915 def set_start(self, start=None): 

916 if not start: 

917 start = self.Productions[1].name 

918 if start not in self.Nonterminals: 

919 raise GrammarError('start symbol %s undefined' % start) 

920 self.Productions[0] = Production(0, "S'", [start]) 

921 self.Nonterminals[start].append(0) 

922 self.Start = start 

923 

924 # ----------------------------------------------------------------------------- 

925 # find_unreachable() 

926 # 

927 # Find all of the nonterminal symbols that can't be reached from the starting 

928 # symbol. Returns a list of nonterminals that can't be reached. 

929 # ----------------------------------------------------------------------------- 

930 

931 def find_unreachable(self): 

932 

933 # Mark all symbols that are reachable from a symbol s 

934 def mark_reachable_from(s): 

935 if s in reachable: 

936 return 

937 reachable.add(s) 

938 for p in self.Prodnames.get(s, []): 

939 for r in p.prod: 

940 mark_reachable_from(r) 

941 

942 reachable = set() 

943 mark_reachable_from(self.Productions[0].prod[0]) 

944 return [s for s in self.Nonterminals if s not in reachable] 

945 

946 # ----------------------------------------------------------------------------- 

947 # infinite_cycles() 

948 # 

949 # This function looks at the various parsing rules and tries to detect 

950 # infinite recursion cycles (grammar rules where there is no possible way 

951 # to derive a string of only terminals). 

952 # ----------------------------------------------------------------------------- 

953 

954 def infinite_cycles(self): 

955 terminates = {} 

956 

957 # Terminals: 

958 for t in self.Terminals: 

959 terminates[t] = True 

960 

961 terminates['$end'] = True 

962 

963 # Nonterminals: 

964 

965 # Initialize to false: 

966 for n in self.Nonterminals: 

967 terminates[n] = False 

968 

969 # Then propagate termination until no change: 

970 while True: 

971 some_change = False 

972 for (n, pl) in self.Prodnames.items(): 

973 # Nonterminal n terminates iff any of its productions terminates. 

974 for p in pl: 

975 # Production p terminates iff all of its rhs symbols terminate. 

976 for s in p.prod: 

977 if not terminates[s]: 

978 # The symbol s does not terminate, 

979 # so production p does not terminate. 

980 p_terminates = False 

981 break 

982 else: 

983 # didn't break from the loop, 

984 # so every symbol s terminates 

985 # so production p terminates. 

986 p_terminates = True 

987 

988 if p_terminates: 

989 # symbol n terminates! 

990 if not terminates[n]: 

991 terminates[n] = True 

992 some_change = True 

993 # Don't need to consider any more productions for this n. 

994 break 

995 

996 if not some_change: 

997 break 

998 

999 infinite = [] 

1000 for (s, term) in terminates.items(): 

1001 if not term: 

1002 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1003 # s is used-but-not-defined, and we've already warned of that, 

1004 # so it would be overkill to say that it's also non-terminating. 

1005 pass 

1006 else: 

1007 infinite.append(s) 

1008 

1009 return infinite 

1010 

1011 # ----------------------------------------------------------------------------- 

1012 # undefined_symbols() 

1013 # 

1014 # Find all symbols that were used the grammar, but not defined as tokens or 

1015 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 

1016 # and prod is the production where the symbol was used. 

1017 # ----------------------------------------------------------------------------- 

1018 def undefined_symbols(self): 

1019 result = [] 

1020 for p in self.Productions: 

1021 if not p: 

1022 continue 

1023 

1024 for s in p.prod: 

1025 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1026 result.append((s, p)) 

1027 return result 

1028 

1029 # ----------------------------------------------------------------------------- 

1030 # unused_terminals() 

1031 # 

1032 # Find all terminals that were defined, but not used by the grammar. Returns 

1033 # a list of all symbols. 

1034 # ----------------------------------------------------------------------------- 

1035 def unused_terminals(self): 

1036 unused_tok = [] 

1037 for s, v in self.Terminals.items(): 

1038 if s != 'error' and not v: 

1039 unused_tok.append(s) 

1040 

1041 return unused_tok 

1042 

1043 # ------------------------------------------------------------------------------ 

1044 # unused_rules() 

1045 # 

1046 # Find all grammar rules that were defined, but not used (maybe not reachable) 

1047 # Returns a list of productions. 

1048 # ------------------------------------------------------------------------------ 

1049 

1050 def unused_rules(self): 

1051 unused_prod = [] 

1052 for s, v in self.Nonterminals.items(): 

1053 if not v: 

1054 p = self.Prodnames[s][0] 

1055 unused_prod.append(p) 

1056 return unused_prod 

1057 

1058 # ----------------------------------------------------------------------------- 

1059 # unused_precedence() 

1060 # 

1061 # Returns a list of tuples (term,precedence) corresponding to precedence 

1062 # rules that were never used by the grammar. term is the name of the terminal 

1063 # on which precedence was applied and precedence is a string such as 'left' or 

1064 # 'right' corresponding to the type of precedence. 

1065 # ----------------------------------------------------------------------------- 

1066 

1067 def unused_precedence(self): 

1068 unused = [] 

1069 for termname in self.Precedence: 

1070 if not (termname in self.Terminals or termname in self.UsedPrecedence): 

1071 unused.append((termname, self.Precedence[termname][0])) 

1072 

1073 return unused 

1074 

1075 # ------------------------------------------------------------------------- 

1076 # _first() 

1077 # 

1078 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 

1079 # 

1080 # During execution of compute_first1, the result may be incomplete. 

1081 # Afterward (e.g., when called from compute_follow()), it will be complete. 

1082 # ------------------------------------------------------------------------- 

1083 def _first(self, beta): 

1084 

1085 # We are computing First(x1,x2,x3,...,xn) 

1086 result = [] 

1087 for x in beta: 

1088 x_produces_empty = False 

1089 

1090 # Add all the non-<empty> symbols of First[x] to the result. 

1091 for f in self.First[x]: 

1092 if f == '<empty>': 

1093 x_produces_empty = True 

1094 else: 

1095 if f not in result: 

1096 result.append(f) 

1097 

1098 if x_produces_empty: 

1099 # We have to consider the next x in beta, 

1100 # i.e. stay in the loop. 

1101 pass 

1102 else: 

1103 # We don't have to consider any further symbols in beta. 

1104 break 

1105 else: 

1106 # There was no 'break' from the loop, 

1107 # so x_produces_empty was true for all x in beta, 

1108 # so beta produces empty as well. 

1109 result.append('<empty>') 

1110 

1111 return result 

1112 

1113 # ------------------------------------------------------------------------- 

1114 # compute_first() 

1115 # 

1116 # Compute the value of FIRST1(X) for all symbols 

1117 # ------------------------------------------------------------------------- 

1118 def compute_first(self): 

1119 if self.First: 

1120 return self.First 

1121 

1122 # Terminals: 

1123 for t in self.Terminals: 

1124 self.First[t] = [t] 

1125 

1126 self.First['$end'] = ['$end'] 

1127 

1128 # Nonterminals: 

1129 

1130 # Initialize to the empty set: 

1131 for n in self.Nonterminals: 

1132 self.First[n] = [] 

1133 

1134 # Then propagate symbols until no change: 

1135 while True: 

1136 some_change = False 

1137 for n in self.Nonterminals: 

1138 for p in self.Prodnames[n]: 

1139 for f in self._first(p.prod): 

1140 if f not in self.First[n]: 

1141 self.First[n].append(f) 

1142 some_change = True 

1143 if not some_change: 

1144 break 

1145 

1146 return self.First 

1147 

1148 # --------------------------------------------------------------------- 

1149 # compute_follow() 

1150 # 

1151 # Computes all of the follow sets for every non-terminal symbol. The 

1152 # follow set is the set of all symbols that might follow a given 

1153 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 

1154 # --------------------------------------------------------------------- 

1155 def compute_follow(self, start=None): 

1156 # If already computed, return the result 

1157 if self.Follow: 

1158 return self.Follow 

1159 

1160 # If first sets not computed yet, do that first. 

1161 if not self.First: 

1162 self.compute_first() 

1163 

1164 # Add '$end' to the follow list of the start symbol 

1165 for k in self.Nonterminals: 

1166 self.Follow[k] = [] 

1167 

1168 if not start: 

1169 start = self.Productions[1].name 

1170 

1171 self.Follow[start] = ['$end'] 

1172 

1173 while True: 

1174 didadd = False 

1175 for p in self.Productions[1:]: 

1176 # Here is the production set 

1177 for i, B in enumerate(p.prod): 

1178 if B in self.Nonterminals: 

1179 # Okay. We got a non-terminal in a production 

1180 fst = self._first(p.prod[i+1:]) 

1181 hasempty = False 

1182 for f in fst: 

1183 if f != '<empty>' and f not in self.Follow[B]: 

1184 self.Follow[B].append(f) 

1185 didadd = True 

1186 if f == '<empty>': 

1187 hasempty = True 

1188 if hasempty or i == (len(p.prod)-1): 

1189 # Add elements of follow(a) to follow(b) 

1190 for f in self.Follow[p.name]: 

1191 if f not in self.Follow[B]: 

1192 self.Follow[B].append(f) 

1193 didadd = True 

1194 if not didadd: 

1195 break 

1196 return self.Follow 

1197 

1198 

1199 # ----------------------------------------------------------------------------- 

1200 # build_lritems() 

1201 # 

1202 # This function walks the list of productions and builds a complete set of the 

1203 # LR items. The LR items are stored in two ways: First, they are uniquely 

1204 # numbered and placed in the list _lritems. Second, a linked list of LR items 

1205 # is built for each production. For example: 

1206 # 

1207 # E -> E PLUS E 

1208 # 

1209 # Creates the list 

1210 # 

1211 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 

1212 # ----------------------------------------------------------------------------- 

1213 

1214 def build_lritems(self): 

1215 for p in self.Productions: 

1216 lastlri = p 

1217 i = 0 

1218 lr_items = [] 

1219 while True: 

1220 if i > len(p): 

1221 lri = None 

1222 else: 

1223 lri = LRItem(p, i) 

1224 # Precompute the list of productions immediately following 

1225 try: 

1226 lri.lr_after = self.Prodnames[lri.prod[i+1]] 

1227 except (IndexError, KeyError): 

1228 lri.lr_after = [] 

1229 try: 

1230 lri.lr_before = lri.prod[i-1] 

1231 except IndexError: 

1232 lri.lr_before = None 

1233 

1234 lastlri.lr_next = lri 

1235 if not lri: 

1236 break 

1237 lr_items.append(lri) 

1238 lastlri = lri 

1239 i += 1 

1240 p.lr_items = lr_items 

1241 

1242# ----------------------------------------------------------------------------- 

1243# === LR Generator === 

1244# 

1245# The following classes and functions are used to generate LR parsing tables on 

1246# a grammar. 

1247# ----------------------------------------------------------------------------- 

1248 

1249# ----------------------------------------------------------------------------- 

1250# digraph() 

1251# traverse() 

1252# 

1253# The following two functions are used to compute set valued functions 

1254# of the form: 

1255# 

1256# F(x) = F'(x) U U{F(y) | x R y} 

1257# 

1258# This is used to compute the values of Read() sets as well as FOLLOW sets 

1259# in LALR(1) generation. 

1260# 

1261# Inputs: X - An input set 

1262# R - A relation 

1263# FP - Set-valued function 

1264# ------------------------------------------------------------------------------ 

1265 

1266def digraph(X, R, FP): 

1267 N = {} 

1268 for x in X: 

1269 N[x] = 0 

1270 stack = [] 

1271 F = {} 

1272 for x in X: 

1273 if N[x] == 0: 

1274 traverse(x, N, stack, F, X, R, FP) 

1275 return F 

1276 

1277def traverse(x, N, stack, F, X, R, FP): 

1278 stack.append(x) 

1279 d = len(stack) 

1280 N[x] = d 

1281 F[x] = FP(x) # F(X) <- F'(x) 

1282 

1283 rel = R(x) # Get y's related to x 

1284 for y in rel: 

1285 if N[y] == 0: 

1286 traverse(y, N, stack, F, X, R, FP) 

1287 N[x] = min(N[x], N[y]) 

1288 for a in F.get(y, []): 

1289 if a not in F[x]: 

1290 F[x].append(a) 

1291 if N[x] == d: 

1292 N[stack[-1]] = MAXINT 

1293 F[stack[-1]] = F[x] 

1294 element = stack.pop() 

1295 while element != x: 

1296 N[stack[-1]] = MAXINT 

1297 F[stack[-1]] = F[x] 

1298 element = stack.pop() 

1299 

1300class LALRError(YaccError): 

1301 pass 

1302 

1303 

1304# ----------------------------------------------------------------------------- 

1305# == LRTable == 

1306# 

1307# This class implements the LR table generation algorithm. There are no 

1308# public methods. 

1309# ----------------------------------------------------------------------------- 

1310 

1311class LRTable: 

1312 def __init__(self, grammar, log=None): 

1313 self.grammar = grammar 

1314 

1315 # Set up the logger 

1316 if not log: 

1317 log = NullLogger() 

1318 self.log = log 

1319 

1320 # Internal attributes 

1321 self.lr_action = {} # Action table 

1322 self.lr_goto = {} # Goto table 

1323 self.lr_productions = grammar.Productions # Copy of grammar Production array 

1324 self.lr_goto_cache = {} # Cache of computed gotos 

1325 self.lr0_cidhash = {} # Cache of closures 

1326 

1327 self._add_count = 0 # Internal counter used to detect cycles 

1328 

1329 # Diagnostic information filled in by the table generator 

1330 self.sr_conflict = 0 

1331 self.rr_conflict = 0 

1332 self.conflicts = [] # List of conflicts 

1333 

1334 self.sr_conflicts = [] 

1335 self.rr_conflicts = [] 

1336 

1337 # Build the tables 

1338 self.grammar.build_lritems() 

1339 self.grammar.compute_first() 

1340 self.grammar.compute_follow() 

1341 self.lr_parse_table() 

1342 

1343 # Bind all production function names to callable objects in pdict 

1344 def bind_callables(self, pdict): 

1345 for p in self.lr_productions: 

1346 p.bind(pdict) 

1347 

1348 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 

1349 

1350 def lr0_closure(self, I): 

1351 self._add_count += 1 

1352 

1353 # Add everything in I to J 

1354 J = I[:] 

1355 didadd = True 

1356 while didadd: 

1357 didadd = False 

1358 for j in J: 

1359 for x in j.lr_after: 

1360 if getattr(x, 'lr0_added', 0) == self._add_count: 

1361 continue 

1362 # Add B --> .G to J 

1363 J.append(x.lr_next) 

1364 x.lr0_added = self._add_count 

1365 didadd = True 

1366 

1367 return J 

1368 

1369 # Compute the LR(0) goto function goto(I,X) where I is a set 

1370 # of LR(0) items and X is a grammar symbol. This function is written 

1371 # in a way that guarantees uniqueness of the generated goto sets 

1372 # (i.e. the same goto set will never be returned as two different Python 

1373 # objects). With uniqueness, we can later do fast set comparisons using 

1374 # id(obj) instead of element-wise comparison. 

1375 

1376 def lr0_goto(self, I, x): 

1377 # First we look for a previously cached entry 

1378 g = self.lr_goto_cache.get((id(I), x)) 

1379 if g: 

1380 return g 

1381 

1382 # Now we generate the goto set in a way that guarantees uniqueness 

1383 # of the result 

1384 

1385 s = self.lr_goto_cache.get(x) 

1386 if not s: 

1387 s = {} 

1388 self.lr_goto_cache[x] = s 

1389 

1390 gs = [] 

1391 for p in I: 

1392 n = p.lr_next 

1393 if n and n.lr_before == x: 

1394 s1 = s.get(id(n)) 

1395 if not s1: 

1396 s1 = {} 

1397 s[id(n)] = s1 

1398 gs.append(n) 

1399 s = s1 

1400 g = s.get('$end') 

1401 if not g: 

1402 if gs: 

1403 g = self.lr0_closure(gs) 

1404 s['$end'] = g 

1405 else: 

1406 s['$end'] = gs 

1407 self.lr_goto_cache[(id(I), x)] = g 

1408 return g 

1409 

1410 # Compute the LR(0) sets of item function 

1411 def lr0_items(self): 

1412 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 

1413 i = 0 

1414 for I in C: 

1415 self.lr0_cidhash[id(I)] = i 

1416 i += 1 

1417 

1418 # Loop over the items in C and each grammar symbols 

1419 i = 0 

1420 while i < len(C): 

1421 I = C[i] 

1422 i += 1 

1423 

1424 # Collect all of the symbols that could possibly be in the goto(I,X) sets 

1425 asyms = {} 

1426 for ii in I: 

1427 for s in ii.usyms: 

1428 asyms[s] = None 

1429 

1430 for x in asyms: 

1431 g = self.lr0_goto(I, x) 

1432 if not g or id(g) in self.lr0_cidhash: 

1433 continue 

1434 self.lr0_cidhash[id(g)] = len(C) 

1435 C.append(g) 

1436 

1437 return C 

1438 

1439 # ----------------------------------------------------------------------------- 

1440 # ==== LALR(1) Parsing ==== 

1441 # 

1442 # LALR(1) parsing is almost exactly the same as SLR except that instead of 

1443 # relying upon Follow() sets when performing reductions, a more selective 

1444 # lookahead set that incorporates the state of the LR(0) machine is utilized. 

1445 # Thus, we mainly just have to focus on calculating the lookahead sets. 

1446 # 

1447 # The method used here is due to DeRemer and Pennelo (1982). 

1448 # 

1449 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 

1450 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 

1451 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 

1452 # 

1453 # Further details can also be found in: 

1454 # 

1455 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 

1456 # McGraw-Hill Book Company, (1985). 

1457 # 

1458 # ----------------------------------------------------------------------------- 

1459 

1460 # ----------------------------------------------------------------------------- 

1461 # compute_nullable_nonterminals() 

1462 # 

1463 # Creates a dictionary containing all of the non-terminals that might produce 

1464 # an empty production. 

1465 # ----------------------------------------------------------------------------- 

1466 

1467 def compute_nullable_nonterminals(self): 

1468 nullable = set() 

1469 num_nullable = 0 

1470 while True: 

1471 for p in self.grammar.Productions[1:]: 

1472 if p.len == 0: 

1473 nullable.add(p.name) 

1474 continue 

1475 for t in p.prod: 

1476 if t not in nullable: 

1477 break 

1478 else: 

1479 nullable.add(p.name) 

1480 if len(nullable) == num_nullable: 

1481 break 

1482 num_nullable = len(nullable) 

1483 return nullable 

1484 

1485 # ----------------------------------------------------------------------------- 

1486 # find_nonterminal_trans(C) 

1487 # 

1488 # Given a set of LR(0) items, this functions finds all of the non-terminal 

1489 # transitions. These are transitions in which a dot appears immediately before 

1490 # a non-terminal. Returns a list of tuples of the form (state,N) where state 

1491 # is the state number and N is the nonterminal symbol. 

1492 # 

1493 # The input C is the set of LR(0) items. 

1494 # ----------------------------------------------------------------------------- 

1495 

1496 def find_nonterminal_transitions(self, C): 

1497 trans = [] 

1498 for stateno, state in enumerate(C): 

1499 for p in state: 

1500 if p.lr_index < p.len - 1: 

1501 t = (stateno, p.prod[p.lr_index+1]) 

1502 if t[1] in self.grammar.Nonterminals: 

1503 if t not in trans: 

1504 trans.append(t) 

1505 return trans 

1506 

1507 # ----------------------------------------------------------------------------- 

1508 # dr_relation() 

1509 # 

1510 # Computes the DR(p,A) relationships for non-terminal transitions. The input 

1511 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 

1512 # 

1513 # Returns a list of terminals. 

1514 # ----------------------------------------------------------------------------- 

1515 

1516 def dr_relation(self, C, trans, nullable): 

1517 state, N = trans 

1518 terms = [] 

1519 

1520 g = self.lr0_goto(C[state], N) 

1521 for p in g: 

1522 if p.lr_index < p.len - 1: 

1523 a = p.prod[p.lr_index+1] 

1524 if a in self.grammar.Terminals: 

1525 if a not in terms: 

1526 terms.append(a) 

1527 

1528 # This extra bit is to handle the start state 

1529 if state == 0 and N == self.grammar.Productions[0].prod[0]: 

1530 terms.append('$end') 

1531 

1532 return terms 

1533 

1534 # ----------------------------------------------------------------------------- 

1535 # reads_relation() 

1536 # 

1537 # Computes the READS() relation (p,A) READS (t,C). 

1538 # ----------------------------------------------------------------------------- 

1539 

1540 def reads_relation(self, C, trans, empty): 

1541 # Look for empty transitions 

1542 rel = [] 

1543 state, N = trans 

1544 

1545 g = self.lr0_goto(C[state], N) 

1546 j = self.lr0_cidhash.get(id(g), -1) 

1547 for p in g: 

1548 if p.lr_index < p.len - 1: 

1549 a = p.prod[p.lr_index + 1] 

1550 if a in empty: 

1551 rel.append((j, a)) 

1552 

1553 return rel 

1554 

1555 # ----------------------------------------------------------------------------- 

1556 # compute_lookback_includes() 

1557 # 

1558 # Determines the lookback and includes relations 

1559 # 

1560 # LOOKBACK: 

1561 # 

1562 # This relation is determined by running the LR(0) state machine forward. 

1563 # For example, starting with a production "N : . A B C", we run it forward 

1564 # to obtain "N : A B C ." We then build a relationship between this final 

1565 # state and the starting state. These relationships are stored in a dictionary 

1566 # lookdict. 

1567 # 

1568 # INCLUDES: 

1569 # 

1570 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 

1571 # 

1572 # This relation is used to determine non-terminal transitions that occur 

1573 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 

1574 # if the following holds: 

1575 # 

1576 # B -> LAT, where T -> epsilon and p' -L-> p 

1577 # 

1578 # L is essentially a prefix (which may be empty), T is a suffix that must be 

1579 # able to derive an empty string. State p' must lead to state p with the string L. 

1580 # 

1581 # ----------------------------------------------------------------------------- 

1582 

1583 def compute_lookback_includes(self, C, trans, nullable): 

1584 lookdict = {} # Dictionary of lookback relations 

1585 includedict = {} # Dictionary of include relations 

1586 

1587 # Make a dictionary of non-terminal transitions 

1588 dtrans = {} 

1589 for t in trans: 

1590 dtrans[t] = 1 

1591 

1592 # Loop over all transitions and compute lookbacks and includes 

1593 for state, N in trans: 

1594 lookb = [] 

1595 includes = [] 

1596 for p in C[state]: 

1597 if p.name != N: 

1598 continue 

1599 

1600 # Okay, we have a name match. We now follow the production all the way 

1601 # through the state machine until we get the . on the right hand side 

1602 

1603 lr_index = p.lr_index 

1604 j = state 

1605 while lr_index < p.len - 1: 

1606 lr_index = lr_index + 1 

1607 t = p.prod[lr_index] 

1608 

1609 # Check to see if this symbol and state are a non-terminal transition 

1610 if (j, t) in dtrans: 

1611 # Yes. Okay, there is some chance that this is an includes relation 

1612 # the only way to know for certain is whether the rest of the 

1613 # production derives empty 

1614 

1615 li = lr_index + 1 

1616 while li < p.len: 

1617 if p.prod[li] in self.grammar.Terminals: 

1618 break # No forget it 

1619 if p.prod[li] not in nullable: 

1620 break 

1621 li = li + 1 

1622 else: 

1623 # Appears to be a relation between (j,t) and (state,N) 

1624 includes.append((j, t)) 

1625 

1626 g = self.lr0_goto(C[j], t) # Go to next set 

1627 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 

1628 

1629 # When we get here, j is the final state, now we have to locate the production 

1630 for r in C[j]: 

1631 if r.name != p.name: 

1632 continue 

1633 if r.len != p.len: 

1634 continue 

1635 i = 0 

1636 # This look is comparing a production ". A B C" with "A B C ." 

1637 while i < r.lr_index: 

1638 if r.prod[i] != p.prod[i+1]: 

1639 break 

1640 i = i + 1 

1641 else: 

1642 lookb.append((j, r)) 

1643 for i in includes: 

1644 if i not in includedict: 

1645 includedict[i] = [] 

1646 includedict[i].append((state, N)) 

1647 lookdict[(state, N)] = lookb 

1648 

1649 return lookdict, includedict 

1650 

1651 # ----------------------------------------------------------------------------- 

1652 # compute_read_sets() 

1653 # 

1654 # Given a set of LR(0) items, this function computes the read sets. 

1655 # 

1656 # Inputs: C = Set of LR(0) items 

1657 # ntrans = Set of nonterminal transitions 

1658 # nullable = Set of empty transitions 

1659 # 

1660 # Returns a set containing the read sets 

1661 # ----------------------------------------------------------------------------- 

1662 

1663 def compute_read_sets(self, C, ntrans, nullable): 

1664 FP = lambda x: self.dr_relation(C, x, nullable) 

1665 R = lambda x: self.reads_relation(C, x, nullable) 

1666 F = digraph(ntrans, R, FP) 

1667 return F 

1668 

1669 # ----------------------------------------------------------------------------- 

1670 # compute_follow_sets() 

1671 # 

1672 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 

1673 # and an include set, this function computes the follow sets 

1674 # 

1675 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 

1676 # 

1677 # Inputs: 

1678 # ntrans = Set of nonterminal transitions 

1679 # readsets = Readset (previously computed) 

1680 # inclsets = Include sets (previously computed) 

1681 # 

1682 # Returns a set containing the follow sets 

1683 # ----------------------------------------------------------------------------- 

1684 

1685 def compute_follow_sets(self, ntrans, readsets, inclsets): 

1686 FP = lambda x: readsets[x] 

1687 R = lambda x: inclsets.get(x, []) 

1688 F = digraph(ntrans, R, FP) 

1689 return F 

1690 

1691 # ----------------------------------------------------------------------------- 

1692 # add_lookaheads() 

1693 # 

1694 # Attaches the lookahead symbols to grammar rules. 

1695 # 

1696 # Inputs: lookbacks - Set of lookback relations 

1697 # followset - Computed follow set 

1698 # 

1699 # This function directly attaches the lookaheads to productions contained 

1700 # in the lookbacks set 

1701 # ----------------------------------------------------------------------------- 

1702 

1703 def add_lookaheads(self, lookbacks, followset): 

1704 for trans, lb in lookbacks.items(): 

1705 # Loop over productions in lookback 

1706 for state, p in lb: 

1707 if state not in p.lookaheads: 

1708 p.lookaheads[state] = [] 

1709 f = followset.get(trans, []) 

1710 for a in f: 

1711 if a not in p.lookaheads[state]: 

1712 p.lookaheads[state].append(a) 

1713 

1714 # ----------------------------------------------------------------------------- 

1715 # add_lalr_lookaheads() 

1716 # 

1717 # This function does all of the work of adding lookahead information for use 

1718 # with LALR parsing 

1719 # ----------------------------------------------------------------------------- 

1720 

1721 def add_lalr_lookaheads(self, C): 

1722 # Determine all of the nullable nonterminals 

1723 nullable = self.compute_nullable_nonterminals() 

1724 

1725 # Find all non-terminal transitions 

1726 trans = self.find_nonterminal_transitions(C) 

1727 

1728 # Compute read sets 

1729 readsets = self.compute_read_sets(C, trans, nullable) 

1730 

1731 # Compute lookback/includes relations 

1732 lookd, included = self.compute_lookback_includes(C, trans, nullable) 

1733 

1734 # Compute LALR FOLLOW sets 

1735 followsets = self.compute_follow_sets(trans, readsets, included) 

1736 

1737 # Add all of the lookaheads 

1738 self.add_lookaheads(lookd, followsets) 

1739 

1740 # ----------------------------------------------------------------------------- 

1741 # lr_parse_table() 

1742 # 

1743 # This function constructs the parse tables for SLR or LALR 

1744 # ----------------------------------------------------------------------------- 

1745 def lr_parse_table(self): 

1746 Productions = self.grammar.Productions 

1747 Precedence = self.grammar.Precedence 

1748 goto = self.lr_goto # Goto array 

1749 action = self.lr_action # Action array 

1750 log = self.log # Logger for output 

1751 

1752 actionp = {} # Action production array (temporary) 

1753 

1754 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 

1755 # This determines the number of states 

1756 

1757 C = self.lr0_items() 

1758 self.add_lalr_lookaheads(C) 

1759 

1760 # Build the parser table, state by state 

1761 st = 0 

1762 for I in C: 

1763 # Loop over each production in I 

1764 actlist = [] # List of actions 

1765 st_action = {} 

1766 st_actionp = {} 

1767 st_goto = {} 

1768 log.info('') 

1769 log.info('state %d', st) 

1770 log.info('') 

1771 for p in I: 

1772 log.info(' (%d) %s', p.number, p) 

1773 log.info('') 

1774 

1775 for p in I: 

1776 if p.len == p.lr_index + 1: 

1777 if p.name == "S'": 

1778 # Start symbol. Accept! 

1779 st_action['$end'] = 0 

1780 st_actionp['$end'] = p 

1781 else: 

1782 # We are at the end of a production. Reduce! 

1783 laheads = p.lookaheads[st] 

1784 for a in laheads: 

1785 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 

1786 r = st_action.get(a) 

1787 if r is not None: 

1788 # Whoa. Have a shift/reduce or reduce/reduce conflict 

1789 if r > 0: 

1790 # Need to decide on shift or reduce here 

1791 # By default we favor shifting. Need to add 

1792 # some precedence rules here. 

1793 

1794 # Shift precedence comes from the token 

1795 sprec, slevel = Precedence.get(a, ('right', 0)) 

1796 

1797 # Reduce precedence comes from rule being reduced (p) 

1798 rprec, rlevel = Productions[p.number].prec 

1799 

1800 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 

1801 # We really need to reduce here. 

1802 st_action[a] = -p.number 

1803 st_actionp[a] = p 

1804 if not slevel and not rlevel: 

1805 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

1806 self.sr_conflicts.append((st, a, 'reduce')) 

1807 Productions[p.number].reduced += 1 

1808 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

1809 st_action[a] = None 

1810 else: 

1811 # Hmmm. Guess we'll keep the shift 

1812 if not rlevel: 

1813 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

1814 self.sr_conflicts.append((st, a, 'shift')) 

1815 elif r < 0: 

1816 # Reduce/reduce conflict. In this case, we favor the rule 

1817 # that was defined first in the grammar file 

1818 oldp = Productions[-r] 

1819 pp = Productions[p.number] 

1820 if oldp.line > pp.line: 

1821 st_action[a] = -p.number 

1822 st_actionp[a] = p 

1823 chosenp, rejectp = pp, oldp 

1824 Productions[p.number].reduced += 1 

1825 Productions[oldp.number].reduced -= 1 

1826 else: 

1827 chosenp, rejectp = oldp, pp 

1828 self.rr_conflicts.append((st, chosenp, rejectp)) 

1829 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 

1830 a, st_actionp[a].number, st_actionp[a]) 

1831 else: 

1832 raise LALRError('Unknown conflict in state %d' % st) 

1833 else: 

1834 st_action[a] = -p.number 

1835 st_actionp[a] = p 

1836 Productions[p.number].reduced += 1 

1837 else: 

1838 i = p.lr_index 

1839 a = p.prod[i+1] # Get symbol right after the "." 

1840 if a in self.grammar.Terminals: 

1841 g = self.lr0_goto(I, a) 

1842 j = self.lr0_cidhash.get(id(g), -1) 

1843 if j >= 0: 

1844 # We are in a shift state 

1845 actlist.append((a, p, 'shift and go to state %d' % j)) 

1846 r = st_action.get(a) 

1847 if r is not None: 

1848 # Whoa have a shift/reduce or shift/shift conflict 

1849 if r > 0: 

1850 if r != j: 

1851 raise LALRError('Shift/shift conflict in state %d' % st) 

1852 elif r < 0: 

1853 # Do a precedence check. 

1854 # - if precedence of reduce rule is higher, we reduce. 

1855 # - if precedence of reduce is same and left assoc, we reduce. 

1856 # - otherwise we shift 

1857 

1858 # Shift precedence comes from the token 

1859 sprec, slevel = Precedence.get(a, ('right', 0)) 

1860 

1861 # Reduce precedence comes from the rule that could have been reduced 

1862 rprec, rlevel = Productions[st_actionp[a].number].prec 

1863 

1864 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 

1865 # We decide to shift here... highest precedence to shift 

1866 Productions[st_actionp[a].number].reduced -= 1 

1867 st_action[a] = j 

1868 st_actionp[a] = p 

1869 if not rlevel: 

1870 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

1871 self.sr_conflicts.append((st, a, 'shift')) 

1872 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

1873 st_action[a] = None 

1874 else: 

1875 # Hmmm. Guess we'll keep the reduce 

1876 if not slevel and not rlevel: 

1877 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

1878 self.sr_conflicts.append((st, a, 'reduce')) 

1879 

1880 else: 

1881 raise LALRError('Unknown conflict in state %d' % st) 

1882 else: 

1883 st_action[a] = j 

1884 st_actionp[a] = p 

1885 

1886 # Print the actions associated with each terminal 

1887 _actprint = {} 

1888 for a, p, m in actlist: 

1889 if a in st_action: 

1890 if p is st_actionp[a]: 

1891 log.info(' %-15s %s', a, m) 

1892 _actprint[(a, m)] = 1 

1893 log.info('') 

1894 # Print the actions that were not used. (debugging) 

1895 not_used = 0 

1896 for a, p, m in actlist: 

1897 if a in st_action: 

1898 if p is not st_actionp[a]: 

1899 if not (a, m) in _actprint: 

1900 log.debug(' ! %-15s [ %s ]', a, m) 

1901 not_used = 1 

1902 _actprint[(a, m)] = 1 

1903 if not_used: 

1904 log.debug('') 

1905 

1906 # Construct the goto table for this state 

1907 

1908 nkeys = {} 

1909 for ii in I: 

1910 for s in ii.usyms: 

1911 if s in self.grammar.Nonterminals: 

1912 nkeys[s] = None 

1913 for n in nkeys: 

1914 g = self.lr0_goto(I, n) 

1915 j = self.lr0_cidhash.get(id(g), -1) 

1916 if j >= 0: 

1917 st_goto[n] = j 

1918 log.info(' %-30s shift and go to state %d', n, j) 

1919 

1920 action[st] = st_action 

1921 actionp[st] = st_actionp 

1922 goto[st] = st_goto 

1923 st += 1 

1924 

1925# ----------------------------------------------------------------------------- 

1926# === INTROSPECTION === 

1927# 

1928# The following functions and classes are used to implement the PLY 

1929# introspection features followed by the yacc() function itself. 

1930# ----------------------------------------------------------------------------- 

1931 

1932# ----------------------------------------------------------------------------- 

1933# get_caller_module_dict() 

1934# 

1935# This function returns a dictionary containing all of the symbols defined within 

1936# a caller further down the call stack. This is used to get the environment 

1937# associated with the yacc() call if none was provided. 

1938# ----------------------------------------------------------------------------- 

1939 

1940def get_caller_module_dict(levels): 

1941 f = sys._getframe(levels) 

1942 ldict = f.f_globals.copy() 

1943 if f.f_globals != f.f_locals: 

1944 ldict.update(f.f_locals) 

1945 return ldict 

1946 

1947# ----------------------------------------------------------------------------- 

1948# parse_grammar() 

1949# 

1950# This takes a raw grammar rule string and parses it into production data 

1951# ----------------------------------------------------------------------------- 

1952def parse_grammar(doc, file, line): 

1953 grammar = [] 

1954 # Split the doc string into lines 

1955 pstrings = doc.splitlines() 

1956 lastp = None 

1957 dline = line 

1958 for ps in pstrings: 

1959 dline += 1 

1960 p = ps.split() 

1961 if not p: 

1962 continue 

1963 try: 

1964 if p[0] == '|': 

1965 # This is a continuation of a previous rule 

1966 if not lastp: 

1967 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 

1968 prodname = lastp 

1969 syms = p[1:] 

1970 else: 

1971 prodname = p[0] 

1972 lastp = prodname 

1973 syms = p[2:] 

1974 assign = p[1] 

1975 if assign != ':' and assign != '::=': 

1976 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 

1977 

1978 grammar.append((file, dline, prodname, syms)) 

1979 except SyntaxError: 

1980 raise 

1981 except Exception: 

1982 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 

1983 

1984 return grammar 

1985 

1986# ----------------------------------------------------------------------------- 

1987# ParserReflect() 

1988# 

1989# This class represents information extracted for building a parser including 

1990# start symbol, error function, tokens, precedence list, action functions, 

1991# etc. 

1992# ----------------------------------------------------------------------------- 

1993class ParserReflect(object): 

1994 def __init__(self, pdict, log=None): 

1995 self.pdict = pdict 

1996 self.start = None 

1997 self.error_func = None 

1998 self.tokens = None 

1999 self.modules = set() 

2000 self.grammar = [] 

2001 self.error = False 

2002 

2003 if log is None: 

2004 self.log = PlyLogger(sys.stderr) 

2005 else: 

2006 self.log = log 

2007 

2008 # Get all of the basic information 

2009 def get_all(self): 

2010 self.get_start() 

2011 self.get_error_func() 

2012 self.get_tokens() 

2013 self.get_precedence() 

2014 self.get_pfunctions() 

2015 

2016 # Validate all of the information 

2017 def validate_all(self): 

2018 self.validate_start() 

2019 self.validate_error_func() 

2020 self.validate_tokens() 

2021 self.validate_precedence() 

2022 self.validate_pfunctions() 

2023 self.validate_modules() 

2024 return self.error 

2025 

2026 # Compute a signature over the grammar 

2027 def signature(self): 

2028 parts = [] 

2029 try: 

2030 if self.start: 

2031 parts.append(self.start) 

2032 if self.prec: 

2033 parts.append(''.join([''.join(p) for p in self.prec])) 

2034 if self.tokens: 

2035 parts.append(' '.join(self.tokens)) 

2036 for f in self.pfuncs: 

2037 if f[3]: 

2038 parts.append(f[3]) 

2039 except (TypeError, ValueError): 

2040 pass 

2041 return ''.join(parts) 

2042 

2043 # ----------------------------------------------------------------------------- 

2044 # validate_modules() 

2045 # 

2046 # This method checks to see if there are duplicated p_rulename() functions 

2047 # in the parser module file. Without this function, it is really easy for 

2048 # users to make mistakes by cutting and pasting code fragments (and it's a real 

2049 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 

2050 # we just do a little regular expression pattern matching of def statements 

2051 # to try and detect duplicates. 

2052 # ----------------------------------------------------------------------------- 

2053 

2054 def validate_modules(self): 

2055 # Match def p_funcname( 

2056 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 

2057 

2058 for module in self.modules: 

2059 try: 

2060 lines, linen = inspect.getsourcelines(module) 

2061 except IOError: 

2062 continue 

2063 

2064 counthash = {} 

2065 for linen, line in enumerate(lines): 

2066 linen += 1 

2067 m = fre.match(line) 

2068 if m: 

2069 name = m.group(1) 

2070 prev = counthash.get(name) 

2071 if not prev: 

2072 counthash[name] = linen 

2073 else: 

2074 filename = inspect.getsourcefile(module) 

2075 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 

2076 filename, linen, name, prev) 

2077 

2078 # Get the start symbol 

2079 def get_start(self): 

2080 self.start = self.pdict.get('start') 

2081 

2082 # Validate the start symbol 

2083 def validate_start(self): 

2084 if self.start is not None: 

2085 if not isinstance(self.start, str): 

2086 self.log.error("'start' must be a string") 

2087 

2088 # Look for error handler 

2089 def get_error_func(self): 

2090 self.error_func = self.pdict.get('p_error') 

2091 

2092 # Validate the error function 

2093 def validate_error_func(self): 

2094 if self.error_func: 

2095 if isinstance(self.error_func, types.FunctionType): 

2096 ismethod = 0 

2097 elif isinstance(self.error_func, types.MethodType): 

2098 ismethod = 1 

2099 else: 

2100 self.log.error("'p_error' defined, but is not a function or method") 

2101 self.error = True 

2102 return 

2103 

2104 eline = self.error_func.__code__.co_firstlineno 

2105 efile = self.error_func.__code__.co_filename 

2106 module = inspect.getmodule(self.error_func) 

2107 self.modules.add(module) 

2108 

2109 argcount = self.error_func.__code__.co_argcount - ismethod 

2110 if argcount != 1: 

2111 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 

2112 self.error = True 

2113 

2114 # Get the tokens map 

2115 def get_tokens(self): 

2116 tokens = self.pdict.get('tokens') 

2117 if not tokens: 

2118 self.log.error('No token list is defined') 

2119 self.error = True 

2120 return 

2121 

2122 if not isinstance(tokens, (list, tuple)): 

2123 self.log.error('tokens must be a list or tuple') 

2124 self.error = True 

2125 return 

2126 

2127 if not tokens: 

2128 self.log.error('tokens is empty') 

2129 self.error = True 

2130 return 

2131 

2132 self.tokens = sorted(tokens) 

2133 

2134 # Validate the tokens 

2135 def validate_tokens(self): 

2136 # Validate the tokens. 

2137 if 'error' in self.tokens: 

2138 self.log.error("Illegal token name 'error'. Is a reserved word") 

2139 self.error = True 

2140 return 

2141 

2142 terminals = set() 

2143 for n in self.tokens: 

2144 if n in terminals: 

2145 self.log.warning('Token %r multiply defined', n) 

2146 terminals.add(n) 

2147 

2148 # Get the precedence map (if any) 

2149 def get_precedence(self): 

2150 self.prec = self.pdict.get('precedence') 

2151 

2152 # Validate and parse the precedence map 

2153 def validate_precedence(self): 

2154 preclist = [] 

2155 if self.prec: 

2156 if not isinstance(self.prec, (list, tuple)): 

2157 self.log.error('precedence must be a list or tuple') 

2158 self.error = True 

2159 return 

2160 for level, p in enumerate(self.prec): 

2161 if not isinstance(p, (list, tuple)): 

2162 self.log.error('Bad precedence table') 

2163 self.error = True 

2164 return 

2165 

2166 if len(p) < 2: 

2167 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 

2168 self.error = True 

2169 return 

2170 assoc = p[0] 

2171 if not isinstance(assoc, str): 

2172 self.log.error('precedence associativity must be a string') 

2173 self.error = True 

2174 return 

2175 for term in p[1:]: 

2176 if not isinstance(term, str): 

2177 self.log.error('precedence items must be strings') 

2178 self.error = True 

2179 return 

2180 preclist.append((term, assoc, level+1)) 

2181 self.preclist = preclist 

2182 

2183 # Get all p_functions from the grammar 

2184 def get_pfunctions(self): 

2185 p_functions = [] 

2186 for name, item in self.pdict.items(): 

2187 if not name.startswith('p_') or name == 'p_error': 

2188 continue 

2189 if isinstance(item, (types.FunctionType, types.MethodType)): 

2190 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) 

2191 module = inspect.getmodule(item) 

2192 p_functions.append((line, module, name, item.__doc__)) 

2193 

2194 # Sort all of the actions by line number; make sure to stringify 

2195 # modules to make them sortable, since `line` may not uniquely sort all 

2196 # p functions 

2197 p_functions.sort(key=lambda p_function: ( 

2198 p_function[0], 

2199 str(p_function[1]), 

2200 p_function[2], 

2201 p_function[3])) 

2202 self.pfuncs = p_functions 

2203 

2204 # Validate all of the p_functions 

2205 def validate_pfunctions(self): 

2206 grammar = [] 

2207 # Check for non-empty symbols 

2208 if len(self.pfuncs) == 0: 

2209 self.log.error('no rules of the form p_rulename are defined') 

2210 self.error = True 

2211 return 

2212 

2213 for line, module, name, doc in self.pfuncs: 

2214 file = inspect.getsourcefile(module) 

2215 func = self.pdict[name] 

2216 if isinstance(func, types.MethodType): 

2217 reqargs = 2 

2218 else: 

2219 reqargs = 1 

2220 if func.__code__.co_argcount > reqargs: 

2221 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 

2222 self.error = True 

2223 elif func.__code__.co_argcount < reqargs: 

2224 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 

2225 self.error = True 

2226 elif not func.__doc__: 

2227 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 

2228 file, line, func.__name__) 

2229 else: 

2230 try: 

2231 parsed_g = parse_grammar(doc, file, line) 

2232 for g in parsed_g: 

2233 grammar.append((name, g)) 

2234 except SyntaxError as e: 

2235 self.log.error(str(e)) 

2236 self.error = True 

2237 

2238 # Looks like a valid grammar rule 

2239 # Mark the file in which defined. 

2240 self.modules.add(module) 

2241 

2242 # Secondary validation step that looks for p_ definitions that are not functions 

2243 # or functions that look like they might be grammar rules. 

2244 

2245 for n, v in self.pdict.items(): 

2246 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 

2247 continue 

2248 if n.startswith('t_'): 

2249 continue 

2250 if n.startswith('p_') and n != 'p_error': 

2251 self.log.warning('%r not defined as a function', n) 

2252 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 

2253 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 

2254 if v.__doc__: 

2255 try: 

2256 doc = v.__doc__.split(' ') 

2257 if doc[1] == ':': 

2258 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 

2259 v.__code__.co_filename, v.__code__.co_firstlineno, n) 

2260 except IndexError: 

2261 pass 

2262 

2263 self.grammar = grammar 

2264 

2265# ----------------------------------------------------------------------------- 

2266# yacc(module) 

2267# 

2268# Build a parser 

2269# ----------------------------------------------------------------------------- 

2270 

2271def yacc(*, debug=yaccdebug, module=None, start=None, 

2272 check_recursion=True, optimize=False, debugfile=debug_file, 

2273 debuglog=None, errorlog=None): 

2274 

2275 # Reference to the parsing method of the last built parser 

2276 global parse 

2277 

2278 if errorlog is None: 

2279 errorlog = PlyLogger(sys.stderr) 

2280 

2281 # Get the module dictionary used for the parser 

2282 if module: 

2283 _items = [(k, getattr(module, k)) for k in dir(module)] 

2284 pdict = dict(_items) 

2285 # If no __file__ or __package__ attributes are available, try to obtain them 

2286 # from the __module__ instead 

2287 if '__file__' not in pdict: 

2288 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 

2289 if '__package__' not in pdict and '__module__' in pdict: 

2290 if hasattr(sys.modules[pdict['__module__']], '__package__'): 

2291 pdict['__package__'] = sys.modules[pdict['__module__']].__package__ 

2292 else: 

2293 pdict = get_caller_module_dict(2) 

2294 

2295 # Set start symbol if it's specified directly using an argument 

2296 if start is not None: 

2297 pdict['start'] = start 

2298 

2299 # Collect parser information from the dictionary 

2300 pinfo = ParserReflect(pdict, log=errorlog) 

2301 pinfo.get_all() 

2302 

2303 if pinfo.error: 

2304 raise YaccError('Unable to build parser') 

2305 

2306 if debuglog is None: 

2307 if debug: 

2308 try: 

2309 debuglog = PlyLogger(open(debugfile, 'w')) 

2310 except IOError as e: 

2311 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 

2312 debuglog = NullLogger() 

2313 else: 

2314 debuglog = NullLogger() 

2315 

2316 debuglog.info('Created by PLY (http://www.dabeaz.com/ply)') 

2317 

2318 errors = False 

2319 

2320 # Validate the parser information 

2321 if pinfo.validate_all(): 

2322 raise YaccError('Unable to build parser') 

2323 

2324 if not pinfo.error_func: 

2325 errorlog.warning('no p_error() function is defined') 

2326 

2327 # Create a grammar object 

2328 grammar = Grammar(pinfo.tokens) 

2329 

2330 # Set precedence level for terminals 

2331 for term, assoc, level in pinfo.preclist: 

2332 try: 

2333 grammar.set_precedence(term, assoc, level) 

2334 except GrammarError as e: 

2335 errorlog.warning('%s', e) 

2336 

2337 # Add productions to the grammar 

2338 for funcname, gram in pinfo.grammar: 

2339 file, line, prodname, syms = gram 

2340 try: 

2341 grammar.add_production(prodname, syms, funcname, file, line) 

2342 except GrammarError as e: 

2343 errorlog.error('%s', e) 

2344 errors = True 

2345 

2346 # Set the grammar start symbols 

2347 try: 

2348 if start is None: 

2349 grammar.set_start(pinfo.start) 

2350 else: 

2351 grammar.set_start(start) 

2352 except GrammarError as e: 

2353 errorlog.error(str(e)) 

2354 errors = True 

2355 

2356 if errors: 

2357 raise YaccError('Unable to build parser') 

2358 

2359 # Verify the grammar structure 

2360 undefined_symbols = grammar.undefined_symbols() 

2361 for sym, prod in undefined_symbols: 

2362 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 

2363 errors = True 

2364 

2365 unused_terminals = grammar.unused_terminals() 

2366 if unused_terminals: 

2367 debuglog.info('') 

2368 debuglog.info('Unused terminals:') 

2369 debuglog.info('') 

2370 for term in unused_terminals: 

2371 errorlog.warning('Token %r defined, but not used', term) 

2372 debuglog.info(' %s', term) 

2373 

2374 # Print out all productions to the debug log 

2375 if debug: 

2376 debuglog.info('') 

2377 debuglog.info('Grammar') 

2378 debuglog.info('') 

2379 for n, p in enumerate(grammar.Productions): 

2380 debuglog.info('Rule %-5d %s', n, p) 

2381 

2382 # Find unused non-terminals 

2383 unused_rules = grammar.unused_rules() 

2384 for prod in unused_rules: 

2385 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 

2386 

2387 if len(unused_terminals) == 1: 

2388 errorlog.warning('There is 1 unused token') 

2389 if len(unused_terminals) > 1: 

2390 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 

2391 

2392 if len(unused_rules) == 1: 

2393 errorlog.warning('There is 1 unused rule') 

2394 if len(unused_rules) > 1: 

2395 errorlog.warning('There are %d unused rules', len(unused_rules)) 

2396 

2397 if debug: 

2398 debuglog.info('') 

2399 debuglog.info('Terminals, with rules where they appear') 

2400 debuglog.info('') 

2401 terms = list(grammar.Terminals) 

2402 terms.sort() 

2403 for term in terms: 

2404 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 

2405 

2406 debuglog.info('') 

2407 debuglog.info('Nonterminals, with rules where they appear') 

2408 debuglog.info('') 

2409 nonterms = list(grammar.Nonterminals) 

2410 nonterms.sort() 

2411 for nonterm in nonterms: 

2412 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 

2413 debuglog.info('') 

2414 

2415 if check_recursion: 

2416 unreachable = grammar.find_unreachable() 

2417 for u in unreachable: 

2418 errorlog.warning('Symbol %r is unreachable', u) 

2419 

2420 infinite = grammar.infinite_cycles() 

2421 for inf in infinite: 

2422 errorlog.error('Infinite recursion detected for symbol %r', inf) 

2423 errors = True 

2424 

2425 unused_prec = grammar.unused_precedence() 

2426 for term, assoc in unused_prec: 

2427 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 

2428 errors = True 

2429 

2430 if errors: 

2431 raise YaccError('Unable to build parser') 

2432 

2433 # Run the LRTable on the grammar 

2434 lr = LRTable(grammar, debuglog) 

2435 

2436 if debug: 

2437 num_sr = len(lr.sr_conflicts) 

2438 

2439 # Report shift/reduce and reduce/reduce conflicts 

2440 if num_sr == 1: 

2441 errorlog.warning('1 shift/reduce conflict') 

2442 elif num_sr > 1: 

2443 errorlog.warning('%d shift/reduce conflicts', num_sr) 

2444 

2445 num_rr = len(lr.rr_conflicts) 

2446 if num_rr == 1: 

2447 errorlog.warning('1 reduce/reduce conflict') 

2448 elif num_rr > 1: 

2449 errorlog.warning('%d reduce/reduce conflicts', num_rr) 

2450 

2451 # Write out conflicts to the output file 

2452 if debug and (lr.sr_conflicts or lr.rr_conflicts): 

2453 debuglog.warning('') 

2454 debuglog.warning('Conflicts:') 

2455 debuglog.warning('') 

2456 

2457 for state, tok, resolution in lr.sr_conflicts: 

2458 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 

2459 

2460 already_reported = set() 

2461 for state, rule, rejected in lr.rr_conflicts: 

2462 if (state, id(rule), id(rejected)) in already_reported: 

2463 continue 

2464 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

2465 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 

2466 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

2467 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 

2468 already_reported.add((state, id(rule), id(rejected))) 

2469 

2470 warned_never = [] 

2471 for state, rule, rejected in lr.rr_conflicts: 

2472 if not rejected.reduced and (rejected not in warned_never): 

2473 debuglog.warning('Rule (%s) is never reduced', rejected) 

2474 errorlog.warning('Rule (%s) is never reduced', rejected) 

2475 warned_never.append(rejected) 

2476 

2477 # Build the parser 

2478 lr.bind_callables(pinfo.pdict) 

2479 parser = LRParser(lr, pinfo.error_func) 

2480 

2481 parse = parser.parse 

2482 return parser