Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pycparser/ply/yacc.py: 19%

1# -----------------------------------------------------------------------------

2# ply: yacc.py

5# David M. Beazley (Dabeaz LLC)

8# Redistribution and use in source and binary forms, with or without

9# modification, are permitted provided that the following conditions are

10# met:

11#

12# * Redistributions of source code must retain the above copyright notice,

13# this list of conditions and the following disclaimer.

14# * Redistributions in binary form must reproduce the above copyright notice,

15# this list of conditions and the following disclaimer in the documentation

16# and/or other materials provided with the distribution.

17# * Neither the name of the David Beazley or Dabeaz LLC may be used to

18# endorse or promote products derived from this software without

19# specific prior written permission.

20#

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

32# -----------------------------------------------------------------------------

33#

34# This implements an LR parser that is constructed from grammar rules defined

35# as Python functions. The grammer is specified by supplying the BNF inside

36# Python documentation strings. The inspiration for this technique was borrowed

37# from John Aycock's Spark parsing system. PLY might be viewed as cross between

38# Spark and the GNU bison utility.

39#

40# The current implementation is only somewhat object-oriented. The

41# LR parser itself is defined in terms of an object (which allows multiple

42# parsers to co-exist). However, most of the variables used during table

43# construction are defined in terms of global variables. Users shouldn't

44# notice unless they are trying to define multiple parsers at the same

45# time using threads (in which case they should have their head examined).

46#

47# This implementation supports both SLR and LALR(1) parsing. LALR(1)

48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),

49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,

50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced

51# by the more efficient DeRemer and Pennello algorithm.

52#

53# :::::::: WARNING :::::::

54#

55# Construction of LR parsing tables is fairly complicated and expensive.

56# To make this module run fast, a *LOT* of work has been put into

57# optimization---often at the expensive of readability and what might

58# consider to be good Python "coding style." Modify the code at your

59# own risk!

60# ----------------------------------------------------------------------------

62import re

63import types

64import sys

65import os.path

66import inspect

67import base64

68import warnings

70__version__ = '3.10'

71__tabversion__ = '3.10'

73#-----------------------------------------------------------------------------

74# === User configurable parameters ===

75#

76# Change these to modify the default behavior of yacc (if you wish)

77#-----------------------------------------------------------------------------

79yaccdebug = True # Debugging mode. If set, yacc generates a

80 # a 'parser.out' file in the current directory

82debug_file = 'parser.out' # Default name of the debugging file

83tab_module = 'parsetab' # Default name of the table module

84default_lr = 'LALR' # Default LR table generation method

86error_count = 3 # Number of symbols that must be shifted to leave recovery mode

88yaccdevel = False # Set to True if developing yacc. This turns off optimized

89 # implementations of certain functions.

91resultlimit = 40 # Size limit of results when running in debug mode.

93pickle_protocol = 0 # Protocol to use when writing pickle files

95# String type-checking compatibility

96if sys.version_info[0] < 3:

97 string_types = basestring

98else:

99 string_types = str

100

101MAXINT = sys.maxsize

102

103# This object is a stand-in for a logging object created by the

104# logging module. PLY will use this by default to create things

105# such as the parser.out file. If a user wants more detailed

106# information, they can create their own logging object and pass

107# it into PLY.

108

109class PlyLogger(object):

110 def __init__(self, f):

111 self.f = f

112

113 def debug(self, msg, *args, **kwargs):

114 self.f.write((msg % args) + '\n')

115

116 info = debug

117

118 def warning(self, msg, *args, **kwargs):

119 self.f.write('WARNING: ' + (msg % args) + '\n')

120

121 def error(self, msg, *args, **kwargs):

122 self.f.write('ERROR: ' + (msg % args) + '\n')

123

124 critical = debug

125

126# Null logger is used when no output is generated. Does nothing.

127class NullLogger(object):

128 def __getattribute__(self, name):

129 return self

130

131 def __call__(self, *args, **kwargs):

132 return self

133

134# Exception raised for yacc-related errors

135class YaccError(Exception):

136 pass

137

138# Format the result message that the parser produces when running in debug mode.

139def format_result(r):

140 repr_str = repr(r)

141 if '\n' in repr_str:

142 repr_str = repr(repr_str)

143 if len(repr_str) > resultlimit:

144 repr_str = repr_str[:resultlimit] + ' ...'

145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)

146 return result

147

148# Format stack entries when the parser is running in debug mode

149def format_stack_entry(r):

150 repr_str = repr(r)

151 if '\n' in repr_str:

152 repr_str = repr(repr_str)

153 if len(repr_str) < 16:

154 return repr_str

155 else:

156 return '<%s @ 0x%x>' % (type(r).__name__, id(r))

157

158# Panic mode error recovery support. This feature is being reworked--much of the

159# code here is to offer a deprecation/backwards compatible transition

160

161_errok = None

162_token = None

163_restart = None

164_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().

165Instead, invoke the methods on the associated parser instance:

166

167 def p_error(p):

168 ...

169 # Use parser.errok(), parser.token(), parser.restart()

170 ...

171

172 parser = yacc.yacc()

173'''

174

175def errok():

176 warnings.warn(_warnmsg)

177 return _errok()

178

179def restart():

180 warnings.warn(_warnmsg)

181 return _restart()

182

183def token():

184 warnings.warn(_warnmsg)

185 return _token()

186

187# Utility function to call the p_error() function with some deprecation hacks

188def call_errorfunc(errorfunc, token, parser):

189 global _errok, _token, _restart

190 _errok = parser.errok

191 _token = parser.token

192 _restart = parser.restart

193 r = errorfunc(token)

194 try:

195 del _errok, _token, _restart

196 except NameError:

197 pass

198 return r

199

200#-----------------------------------------------------------------------------

201# === LR Parsing Engine ===

202#

203# The following classes are used for the LR parser itself. These are not

204# used during table construction and are independent of the actual LR

205# table generation algorithm

206#-----------------------------------------------------------------------------

207

208# This class is used to hold non-terminal grammar symbols during parsing.

209# It normally has the following attributes set:

210# .type = Grammar symbol type

211# .value = Symbol value

212# .lineno = Starting line number

213# .endlineno = Ending line number (optional, set automatically)

214# .lexpos = Starting lex position

215# .endlexpos = Ending lex position (optional, set automatically)

216

217class YaccSymbol:

218 def __str__(self):

219 return self.type

220

221 def __repr__(self):

222 return str(self)

223

224# This class is a wrapper around the objects actually passed to each

225# grammar rule. Index lookup and assignment actually assign the

226# .value attribute of the underlying YaccSymbol object.

227# The lineno() method returns the line number of a given

228# item (or 0 if not defined). The linespan() method returns

229# a tuple of (startline,endline) representing the range of lines

230# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)

231# representing the range of positional information for a symbol.

232

233class YaccProduction:

234 def __init__(self, s, stack=None):

235 self.slice = s

236 self.stack = stack

237 self.lexer = None

238 self.parser = None

239

240 def __getitem__(self, n):

241 if isinstance(n, slice):

242 return [s.value for s in self.slice[n]]

243 elif n >= 0:

244 return self.slice[n].value

245 else:

246 return self.stack[n].value

247

248 def __setitem__(self, n, v):

249 self.slice[n].value = v

250

251 def __getslice__(self, i, j):

252 return [s.value for s in self.slice[i:j]]

253

254 def __len__(self):

255 return len(self.slice)

256

257 def lineno(self, n):

258 return getattr(self.slice[n], 'lineno', 0)

259

260 def set_lineno(self, n, lineno):

261 self.slice[n].lineno = lineno

262

263 def linespan(self, n):

264 startline = getattr(self.slice[n], 'lineno', 0)

265 endline = getattr(self.slice[n], 'endlineno', startline)

266 return startline, endline

267

268 def lexpos(self, n):

269 return getattr(self.slice[n], 'lexpos', 0)

270

271 def lexspan(self, n):

272 startpos = getattr(self.slice[n], 'lexpos', 0)

273 endpos = getattr(self.slice[n], 'endlexpos', startpos)

274 return startpos, endpos

275

276 def error(self):

277 raise SyntaxError

278

279# -----------------------------------------------------------------------------

280# == LRParser ==

281#

282# The LR Parsing engine.

283# -----------------------------------------------------------------------------

284

285class LRParser:

286 def __init__(self, lrtab, errorf):

287 self.productions = lrtab.lr_productions

288 self.action = lrtab.lr_action

289 self.goto = lrtab.lr_goto

290 self.errorfunc = errorf

291 self.set_defaulted_states()

292 self.errorok = True

293

294 def errok(self):

295 self.errorok = True

296

297 def restart(self):

298 del self.statestack[:]

299 del self.symstack[:]

300 sym = YaccSymbol()

301 sym.type = '$end'

302 self.symstack.append(sym)

303 self.statestack.append(0)

304

305 # Defaulted state support.

306 # This method identifies parser states where there is only one possible reduction action.

307 # For such states, the parser can make a choose to make a rule reduction without consuming

308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in

309 # certain kinds of advanced parsing situations where the lexer and parser interact with

310 # each other or change states (i.e., manipulation of scope, lexer states, etc.).

311 #

312 # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions

313 def set_defaulted_states(self):

314 self.defaulted_states = {}

315 for state, actions in self.action.items():

316 rules = list(actions.values())

317 if len(rules) == 1 and rules[0] < 0:

318 self.defaulted_states[state] = rules[0]

319

320 def disable_defaulted_states(self):

321 self.defaulted_states = {}

322

323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):

324 if debug or yaccdevel:

325 if isinstance(debug, int):

326 debug = PlyLogger(sys.stderr)

327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)

328 elif tracking:

329 return self.parseopt(input, lexer, debug, tracking, tokenfunc)

330 else:

331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)

332

333

334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

335 # parsedebug().

336 #

337 # This is the debugging enabled version of parse(). All changes made to the

338 # parsing engine should be made here. Optimized versions of this function

339 # are automatically created by the ply/ygen.py script. This script cuts out

340 # sections enclosed in markers such as this:

341 #

342 # #--! DEBUG

343 # statements

344 # #--! DEBUG

345 #

346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

347

348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):

349 #--! parsedebug-start

350 lookahead = None # Current lookahead symbol

351 lookaheadstack = [] # Stack of lookahead symbols

352 actions = self.action # Local reference to action table (to avoid lookup on self.)

353 goto = self.goto # Local reference to goto table (to avoid lookup on self.)

354 prod = self.productions # Local reference to production list (to avoid lookup on self.)

355 defaulted_states = self.defaulted_states # Local reference to defaulted states

356 pslice = YaccProduction(None) # Production object passed to grammar rules

357 errorcount = 0 # Used during error recovery

358

359 #--! DEBUG

360 debug.info('PLY: PARSE DEBUG START')

361 #--! DEBUG

362

363 # If no lexer was given, we will try to use the lex module

364 if not lexer:

365 from . import lex

366 lexer = lex.lexer

367

368 # Set up the lexer and parser objects on pslice

369 pslice.lexer = lexer

370 pslice.parser = self

371

372 # If input was supplied, pass to lexer

373 if input is not None:

374 lexer.input(input)

375

376 if tokenfunc is None:

377 # Tokenize function

378 get_token = lexer.token

379 else:

380 get_token = tokenfunc

381

382 # Set the parser() token method (sometimes used in error recovery)

383 self.token = get_token

384

385 # Set up the state and symbol stacks

386

387 statestack = [] # Stack of parsing states

388 self.statestack = statestack

389 symstack = [] # Stack of grammar symbols

390 self.symstack = symstack

391

392 pslice.stack = symstack # Put in the production

393 errtoken = None # Err token

394

395 # The start state is assumed to be (0,$end)

396

397 statestack.append(0)

398 sym = YaccSymbol()

399 sym.type = '$end'

400 symstack.append(sym)

401 state = 0

402 while True:

403 # Get the next symbol on the input. If a lookahead symbol

404 # is already set, we just use that. Otherwise, we'll pull

405 # the next token off of the lookaheadstack or from the lexer

406

407 #--! DEBUG

408 debug.debug('')

409 debug.debug('State : %s', state)

410 #--! DEBUG

411

412 if state not in defaulted_states:

413 if not lookahead:

414 if not lookaheadstack:

415 lookahead = get_token() # Get the next token

416 else:

417 lookahead = lookaheadstack.pop()

418 if not lookahead:

419 lookahead = YaccSymbol()

420 lookahead.type = '$end'

421

422 # Check the action table

423 ltype = lookahead.type

424 t = actions[state].get(ltype)

425 else:

426 t = defaulted_states[state]

427 #--! DEBUG

428 debug.debug('Defaulted state %s: Reduce using %d', state, -t)

429 #--! DEBUG

430

431 #--! DEBUG

432 debug.debug('Stack : %s',

433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())

434 #--! DEBUG

435

436 if t is not None:

437 if t > 0:

438 # shift a symbol on the stack

439 statestack.append(t)

440 state = t

441

442 #--! DEBUG

443 debug.debug('Action : Shift and goto state %s', t)

444 #--! DEBUG

445

446 symstack.append(lookahead)

447 lookahead = None

448

449 # Decrease error count on successful shift

450 if errorcount:

451 errorcount -= 1

452 continue

453

454 if t < 0:

455 # reduce a symbol on the stack, emit a production

456 p = prod[-t]

457 pname = p.name

458 plen = p.len

459

460 # Get production function

461 sym = YaccSymbol()

462 sym.type = pname # Production name

463 sym.value = None

464

465 #--! DEBUG

466 if plen:

467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,

468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',

469 goto[statestack[-1-plen]][pname])

470 else:

471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],

472 goto[statestack[-1]][pname])

473

474 #--! DEBUG

475

476 if plen:

477 targ = symstack[-plen-1:]

478 targ[0] = sym

479

480 #--! TRACKING

481 if tracking:

482 t1 = targ[1]

483 sym.lineno = t1.lineno

484 sym.lexpos = t1.lexpos

485 t1 = targ[-1]

486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)

487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)

488 #--! TRACKING

489

490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

491 # The code enclosed in this section is duplicated

492 # below as a performance optimization. Make sure

493 # changes get made in both locations.

494

495 pslice.slice = targ

496

497 try:

498 # Call the grammar rule with our special slice object

499 del symstack[-plen:]

500 self.state = state

501 p.callable(pslice)

502 del statestack[-plen:]

503 #--! DEBUG

504 debug.info('Result : %s', format_result(pslice[0]))

505 #--! DEBUG

506 symstack.append(sym)

507 state = goto[statestack[-1]][pname]

508 statestack.append(state)

509 except SyntaxError:

510 # If an error was set. Enter error recovery state

511 lookaheadstack.append(lookahead) # Save the current lookahead token

512 symstack.extend(targ[1:-1]) # Put the production slice back on the stack

513 statestack.pop() # Pop back one state (before the reduce)

514 state = statestack[-1]

515 sym.type = 'error'

516 sym.value = 'error'

517 lookahead = sym

518 errorcount = error_count

519 self.errorok = False

520

521 continue

522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

523

524 else:

525

526 #--! TRACKING

527 if tracking:

528 sym.lineno = lexer.lineno

529 sym.lexpos = lexer.lexpos

530 #--! TRACKING

531

532 targ = [sym]

533

534 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

535 # The code enclosed in this section is duplicated

536 # above as a performance optimization. Make sure

537 # changes get made in both locations.

538

539 pslice.slice = targ

540

541 try:

542 # Call the grammar rule with our special slice object

543 self.state = state

544 p.callable(pslice)

545 #--! DEBUG

546 debug.info('Result : %s', format_result(pslice[0]))

547 #--! DEBUG

548 symstack.append(sym)

549 state = goto[statestack[-1]][pname]

550 statestack.append(state)

551 except SyntaxError:

552 # If an error was set. Enter error recovery state

553 lookaheadstack.append(lookahead) # Save the current lookahead token

554 statestack.pop() # Pop back one state (before the reduce)

555 state = statestack[-1]

556 sym.type = 'error'

557 sym.value = 'error'

558 lookahead = sym

559 errorcount = error_count

560 self.errorok = False

561

562 continue

563 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

564

565 if t == 0:

566 n = symstack[-1]

567 result = getattr(n, 'value', None)

568 #--! DEBUG

569 debug.info('Done : Returning %s', format_result(result))

570 debug.info('PLY: PARSE DEBUG END')

571 #--! DEBUG

572 return result

573

574 if t is None:

575

576 #--! DEBUG

577 debug.error('Error : %s',

578 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())

579 #--! DEBUG

580

581 # We have some kind of parsing error here. To handle

582 # this, we are going to push the current token onto

583 # the tokenstack and replace it with an 'error' token.

584 # If there are any synchronization rules, they may

585 # catch it.

586 #

587 # In addition to pushing the error token, we call call

588 # the user defined p_error() function if this is the

589 # first syntax error. This function is only called if

590 # errorcount == 0.

591 if errorcount == 0 or self.errorok:

592 errorcount = error_count

593 self.errorok = False

594 errtoken = lookahead

595 if errtoken.type == '$end':

596 errtoken = None # End of file!

597 if self.errorfunc:

598 if errtoken and not hasattr(errtoken, 'lexer'):

599 errtoken.lexer = lexer

600 self.state = state

601 tok = call_errorfunc(self.errorfunc, errtoken, self)

602 if self.errorok:

603 # User must have done some kind of panic

604 # mode recovery on their own. The

605 # returned token is the next lookahead

606 lookahead = tok

607 errtoken = None

608 continue

609 else:

610 if errtoken:

611 if hasattr(errtoken, 'lineno'):

612 lineno = lookahead.lineno

613 else:

614 lineno = 0

615 if lineno:

616 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))

617 else:

618 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)

619 else:

620 sys.stderr.write('yacc: Parse error in input. EOF\n')

621 return

622

623 else:

624 errorcount = error_count

625

626 # case 1: the statestack only has 1 entry on it. If we're in this state, the

627 # entire parse has been rolled back and we're completely hosed. The token is

628 # discarded and we just keep going.

629

630 if len(statestack) <= 1 and lookahead.type != '$end':

631 lookahead = None

632 errtoken = None

633 state = 0

634 # Nuke the pushback stack

635 del lookaheadstack[:]

636 continue

637

638 # case 2: the statestack has a couple of entries on it, but we're

639 # at the end of the file. nuke the top entry and generate an error token

640

641 # Start nuking entries on the stack

642 if lookahead.type == '$end':

643 # Whoa. We're really hosed here. Bail out

644 return

645

646 if lookahead.type != 'error':

647 sym = symstack[-1]

648 if sym.type == 'error':

649 # Hmmm. Error is on top of stack, we'll just nuke input

650 # symbol and continue

651 #--! TRACKING

652 if tracking:

653 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)

654 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)

655 #--! TRACKING

656 lookahead = None

657 continue

658

659 # Create the error symbol for the first time and make it the new lookahead symbol

660 t = YaccSymbol()

661 t.type = 'error'

662

663 if hasattr(lookahead, 'lineno'):

664 t.lineno = t.endlineno = lookahead.lineno

665 if hasattr(lookahead, 'lexpos'):

666 t.lexpos = t.endlexpos = lookahead.lexpos

667 t.value = lookahead

668 lookaheadstack.append(lookahead)

669 lookahead = t

670 else:

671 sym = symstack.pop()

672 #--! TRACKING

673 if tracking:

674 lookahead.lineno = sym.lineno

675 lookahead.lexpos = sym.lexpos

676 #--! TRACKING

677 statestack.pop()

678 state = statestack[-1]

679

680 continue

681

682 # Call an error function here

683 raise RuntimeError('yacc: internal parser error!!!\n')

684

685 #--! parsedebug-end

686

687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

688 # parseopt().

689 #

690 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!

691 # This code is automatically generated by the ply/ygen.py script. Make

692 # changes to the parsedebug() method instead.

693 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

694

695 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):

696 #--! parseopt-start

697 lookahead = None # Current lookahead symbol

698 lookaheadstack = [] # Stack of lookahead symbols

699 actions = self.action # Local reference to action table (to avoid lookup on self.)

700 goto = self.goto # Local reference to goto table (to avoid lookup on self.)

701 prod = self.productions # Local reference to production list (to avoid lookup on self.)

702 defaulted_states = self.defaulted_states # Local reference to defaulted states

703 pslice = YaccProduction(None) # Production object passed to grammar rules

704 errorcount = 0 # Used during error recovery

705

706

707 # If no lexer was given, we will try to use the lex module

708 if not lexer:

709 from . import lex

710 lexer = lex.lexer

711

712 # Set up the lexer and parser objects on pslice

713 pslice.lexer = lexer

714 pslice.parser = self

715

716 # If input was supplied, pass to lexer

717 if input is not None:

718 lexer.input(input)

719

720 if tokenfunc is None:

721 # Tokenize function

722 get_token = lexer.token

723 else:

724 get_token = tokenfunc

725

726 # Set the parser() token method (sometimes used in error recovery)

727 self.token = get_token

728

729 # Set up the state and symbol stacks

730

731 statestack = [] # Stack of parsing states

732 self.statestack = statestack

733 symstack = [] # Stack of grammar symbols

734 self.symstack = symstack

735

736 pslice.stack = symstack # Put in the production

737 errtoken = None # Err token

738

739 # The start state is assumed to be (0,$end)

740

741 statestack.append(0)

742 sym = YaccSymbol()

743 sym.type = '$end'

744 symstack.append(sym)

745 state = 0

746 while True:

747 # Get the next symbol on the input. If a lookahead symbol

748 # is already set, we just use that. Otherwise, we'll pull

749 # the next token off of the lookaheadstack or from the lexer

750

751

752 if state not in defaulted_states:

753 if not lookahead:

754 if not lookaheadstack:

755 lookahead = get_token() # Get the next token

756 else:

757 lookahead = lookaheadstack.pop()

758 if not lookahead:

759 lookahead = YaccSymbol()

760 lookahead.type = '$end'

761

762 # Check the action table

763 ltype = lookahead.type

764 t = actions[state].get(ltype)

765 else:

766 t = defaulted_states[state]

767

768

769 if t is not None:

770 if t > 0:

771 # shift a symbol on the stack

772 statestack.append(t)

773 state = t

774

775

776 symstack.append(lookahead)

777 lookahead = None

778

779 # Decrease error count on successful shift

780 if errorcount:

781 errorcount -= 1

782 continue

783

784 if t < 0:

785 # reduce a symbol on the stack, emit a production

786 p = prod[-t]

787 pname = p.name

788 plen = p.len

789

790 # Get production function

791 sym = YaccSymbol()

792 sym.type = pname # Production name

793 sym.value = None

794

795

796 if plen:

797 targ = symstack[-plen-1:]

798 targ[0] = sym

799

800 #--! TRACKING

801 if tracking:

802 t1 = targ[1]

803 sym.lineno = t1.lineno

804 sym.lexpos = t1.lexpos

805 t1 = targ[-1]

806 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)

807 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)

808 #--! TRACKING

809

810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

811 # The code enclosed in this section is duplicated

812 # below as a performance optimization. Make sure

813 # changes get made in both locations.

814

815 pslice.slice = targ

816

817 try:

818 # Call the grammar rule with our special slice object

819 del symstack[-plen:]

820 self.state = state

821 p.callable(pslice)

822 del statestack[-plen:]

823 symstack.append(sym)

824 state = goto[statestack[-1]][pname]

825 statestack.append(state)

826 except SyntaxError:

827 # If an error was set. Enter error recovery state

828 lookaheadstack.append(lookahead) # Save the current lookahead token

829 symstack.extend(targ[1:-1]) # Put the production slice back on the stack

830 statestack.pop() # Pop back one state (before the reduce)

831 state = statestack[-1]

832 sym.type = 'error'

833 sym.value = 'error'

834 lookahead = sym

835 errorcount = error_count

836 self.errorok = False

837

838 continue

839 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

840

841 else:

842

843 #--! TRACKING

844 if tracking:

845 sym.lineno = lexer.lineno

846 sym.lexpos = lexer.lexpos

847 #--! TRACKING

848

849 targ = [sym]

850

851 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

852 # The code enclosed in this section is duplicated

853 # above as a performance optimization. Make sure

854 # changes get made in both locations.

855

856 pslice.slice = targ

857

858 try:

859 # Call the grammar rule with our special slice object

860 self.state = state

861 p.callable(pslice)

862 symstack.append(sym)

863 state = goto[statestack[-1]][pname]

864 statestack.append(state)

865 except SyntaxError:

866 # If an error was set. Enter error recovery state

867 lookaheadstack.append(lookahead) # Save the current lookahead token

868 statestack.pop() # Pop back one state (before the reduce)

869 state = statestack[-1]

870 sym.type = 'error'

871 sym.value = 'error'

872 lookahead = sym

873 errorcount = error_count

874 self.errorok = False

875

876 continue

877 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

878

879 if t == 0:

880 n = symstack[-1]

881 result = getattr(n, 'value', None)

882 return result

883

884 if t is None:

885

886

887 # We have some kind of parsing error here. To handle

888 # this, we are going to push the current token onto

889 # the tokenstack and replace it with an 'error' token.

890 # If there are any synchronization rules, they may

891 # catch it.

892 #

893 # In addition to pushing the error token, we call call

894 # the user defined p_error() function if this is the

895 # first syntax error. This function is only called if

896 # errorcount == 0.

897 if errorcount == 0 or self.errorok:

898 errorcount = error_count

899 self.errorok = False

900 errtoken = lookahead

901 if errtoken.type == '$end':

902 errtoken = None # End of file!

903 if self.errorfunc:

904 if errtoken and not hasattr(errtoken, 'lexer'):

905 errtoken.lexer = lexer

906 self.state = state

907 tok = call_errorfunc(self.errorfunc, errtoken, self)

908 if self.errorok:

909 # User must have done some kind of panic

910 # mode recovery on their own. The

911 # returned token is the next lookahead

912 lookahead = tok

913 errtoken = None

914 continue

915 else:

916 if errtoken:

917 if hasattr(errtoken, 'lineno'):

918 lineno = lookahead.lineno

919 else:

920 lineno = 0

921 if lineno:

922 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))

923 else:

924 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)

925 else:

926 sys.stderr.write('yacc: Parse error in input. EOF\n')

927 return

928

929 else:

930 errorcount = error_count

931

932 # case 1: the statestack only has 1 entry on it. If we're in this state, the

933 # entire parse has been rolled back and we're completely hosed. The token is

934 # discarded and we just keep going.

935

936 if len(statestack) <= 1 and lookahead.type != '$end':

937 lookahead = None

938 errtoken = None

939 state = 0

940 # Nuke the pushback stack

941 del lookaheadstack[:]

942 continue

943

944 # case 2: the statestack has a couple of entries on it, but we're

945 # at the end of the file. nuke the top entry and generate an error token

946

947 # Start nuking entries on the stack

948 if lookahead.type == '$end':

949 # Whoa. We're really hosed here. Bail out

950 return

951

952 if lookahead.type != 'error':

953 sym = symstack[-1]

954 if sym.type == 'error':

955 # Hmmm. Error is on top of stack, we'll just nuke input

956 # symbol and continue

957 #--! TRACKING

958 if tracking:

959 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)

960 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)

961 #--! TRACKING

962 lookahead = None

963 continue

964

965 # Create the error symbol for the first time and make it the new lookahead symbol

966 t = YaccSymbol()

967 t.type = 'error'

968

969 if hasattr(lookahead, 'lineno'):

970 t.lineno = t.endlineno = lookahead.lineno

971 if hasattr(lookahead, 'lexpos'):

972 t.lexpos = t.endlexpos = lookahead.lexpos

973 t.value = lookahead

974 lookaheadstack.append(lookahead)

975 lookahead = t

976 else:

977 sym = symstack.pop()

978 #--! TRACKING

979 if tracking:

980 lookahead.lineno = sym.lineno

981 lookahead.lexpos = sym.lexpos

982 #--! TRACKING

983 statestack.pop()

984 state = statestack[-1]

985

986 continue

987

988 # Call an error function here

989 raise RuntimeError('yacc: internal parser error!!!\n')

990

991 #--! parseopt-end

992

993 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

994 # parseopt_notrack().

995 #

996 # Optimized version of parseopt() with line number tracking removed.

997 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated

998 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.

999 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1000

1001 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):

1002 #--! parseopt-notrack-start

1003 lookahead = None # Current lookahead symbol

1004 lookaheadstack = [] # Stack of lookahead symbols

1005 actions = self.action # Local reference to action table (to avoid lookup on self.)

1006 goto = self.goto # Local reference to goto table (to avoid lookup on self.)

1007 prod = self.productions # Local reference to production list (to avoid lookup on self.)

1008 defaulted_states = self.defaulted_states # Local reference to defaulted states

1009 pslice = YaccProduction(None) # Production object passed to grammar rules

1010 errorcount = 0 # Used during error recovery

1011

1012

1013 # If no lexer was given, we will try to use the lex module

1014 if not lexer:

1015 from . import lex

1016 lexer = lex.lexer

1017

1018 # Set up the lexer and parser objects on pslice

1019 pslice.lexer = lexer

1020 pslice.parser = self

1021

1022 # If input was supplied, pass to lexer

1023 if input is not None:

1024 lexer.input(input)

1025

1026 if tokenfunc is None:

1027 # Tokenize function

1028 get_token = lexer.token

1029 else:

1030 get_token = tokenfunc

1031

1032 # Set the parser() token method (sometimes used in error recovery)

1033 self.token = get_token

1034

1035 # Set up the state and symbol stacks

1036

1037 statestack = [] # Stack of parsing states

1038 self.statestack = statestack

1039 symstack = [] # Stack of grammar symbols

1040 self.symstack = symstack

1041

1042 pslice.stack = symstack # Put in the production

1043 errtoken = None # Err token

1044

1045 # The start state is assumed to be (0,$end)

1046

1047 statestack.append(0)

1048 sym = YaccSymbol()

1049 sym.type = '$end'

1050 symstack.append(sym)

1051 state = 0

1052 while True:

1053 # Get the next symbol on the input. If a lookahead symbol

1054 # is already set, we just use that. Otherwise, we'll pull

1055 # the next token off of the lookaheadstack or from the lexer

1056

1057

1058 if state not in defaulted_states:

1059 if not lookahead:

1060 if not lookaheadstack:

1061 lookahead = get_token() # Get the next token

1062 else:

1063 lookahead = lookaheadstack.pop()

1064 if not lookahead:

1065 lookahead = YaccSymbol()

1066 lookahead.type = '$end'

1067

1068 # Check the action table

1069 ltype = lookahead.type

1070 t = actions[state].get(ltype)

1071 else:

1072 t = defaulted_states[state]

1073

1074

1075 if t is not None:

1076 if t > 0:

1077 # shift a symbol on the stack

1078 statestack.append(t)

1079 state = t

1080

1081

1082 symstack.append(lookahead)

1083 lookahead = None

1084

1085 # Decrease error count on successful shift

1086 if errorcount:

1087 errorcount -= 1

1088 continue

1089

1090 if t < 0:

1091 # reduce a symbol on the stack, emit a production

1092 p = prod[-t]

1093 pname = p.name

1094 plen = p.len

1095

1096 # Get production function

1097 sym = YaccSymbol()

1098 sym.type = pname # Production name

1099 sym.value = None

1100

1101

1102 if plen:

1103 targ = symstack[-plen-1:]

1104 targ[0] = sym

1105

1106

1107 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1108 # The code enclosed in this section is duplicated

1109 # below as a performance optimization. Make sure

1110 # changes get made in both locations.

1111

1112 pslice.slice = targ

1113

1114 try:

1115 # Call the grammar rule with our special slice object

1116 del symstack[-plen:]

1117 self.state = state

1118 p.callable(pslice)

1119 del statestack[-plen:]

1120 symstack.append(sym)

1121 state = goto[statestack[-1]][pname]

1122 statestack.append(state)

1123 except SyntaxError:

1124 # If an error was set. Enter error recovery state

1125 lookaheadstack.append(lookahead) # Save the current lookahead token

1126 symstack.extend(targ[1:-1]) # Put the production slice back on the stack

1127 statestack.pop() # Pop back one state (before the reduce)

1128 state = statestack[-1]

1129 sym.type = 'error'

1130 sym.value = 'error'

1131 lookahead = sym

1132 errorcount = error_count

1133 self.errorok = False

1134

1135 continue

1136 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1138 else:

1141 targ = [sym]

1143 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1144 # The code enclosed in this section is duplicated

1145 # above as a performance optimization. Make sure

1146 # changes get made in both locations.

1147

1148 pslice.slice = targ

1149

1150 try:

1151 # Call the grammar rule with our special slice object

1152 self.state = state

1153 p.callable(pslice)

1154 symstack.append(sym)

1155 state = goto[statestack[-1]][pname]

1156 statestack.append(state)

1157 except SyntaxError:

1158 # If an error was set. Enter error recovery state

1159 lookaheadstack.append(lookahead) # Save the current lookahead token

1160 statestack.pop() # Pop back one state (before the reduce)

1161 state = statestack[-1]

1162 sym.type = 'error'

1163 sym.value = 'error'

1164 lookahead = sym

1165 errorcount = error_count

1166 self.errorok = False

1167

1168 continue

1169 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1170

1171 if t == 0:

1172 n = symstack[-1]

1173 result = getattr(n, 'value', None)

1174 return result

1175

1176 if t is None:

1177

1178

1179 # We have some kind of parsing error here. To handle

1180 # this, we are going to push the current token onto

1181 # the tokenstack and replace it with an 'error' token.

1182 # If there are any synchronization rules, they may

1183 # catch it.

1184 #

1185 # In addition to pushing the error token, we call call

1186 # the user defined p_error() function if this is the

1187 # first syntax error. This function is only called if

1188 # errorcount == 0.

1189 if errorcount == 0 or self.errorok:

1190 errorcount = error_count

1191 self.errorok = False

1192 errtoken = lookahead

1193 if errtoken.type == '$end':

1194 errtoken = None # End of file!

1195 if self.errorfunc:

1196 if errtoken and not hasattr(errtoken, 'lexer'):

1197 errtoken.lexer = lexer

1198 self.state = state

1199 tok = call_errorfunc(self.errorfunc, errtoken, self)

1200 if self.errorok:

1201 # User must have done some kind of panic

1202 # mode recovery on their own. The

1203 # returned token is the next lookahead

1204 lookahead = tok

1205 errtoken = None

1206 continue

1207 else:

1208 if errtoken:

1209 if hasattr(errtoken, 'lineno'):

1210 lineno = lookahead.lineno

1211 else:

1212 lineno = 0

1213 if lineno:

1214 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))

1215 else:

1216 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)

1217 else:

1218 sys.stderr.write('yacc: Parse error in input. EOF\n')

1219 return

1220

1221 else:

1222 errorcount = error_count

1223

1224 # case 1: the statestack only has 1 entry on it. If we're in this state, the

1225 # entire parse has been rolled back and we're completely hosed. The token is

1226 # discarded and we just keep going.

1227

1228 if len(statestack) <= 1 and lookahead.type != '$end':

1229 lookahead = None

1230 errtoken = None

1231 state = 0

1232 # Nuke the pushback stack

1233 del lookaheadstack[:]

1234 continue

1235

1236 # case 2: the statestack has a couple of entries on it, but we're

1237 # at the end of the file. nuke the top entry and generate an error token

1238

1239 # Start nuking entries on the stack

1240 if lookahead.type == '$end':

1241 # Whoa. We're really hosed here. Bail out

1242 return

1243

1244 if lookahead.type != 'error':

1245 sym = symstack[-1]

1246 if sym.type == 'error':

1247 # Hmmm. Error is on top of stack, we'll just nuke input

1248 # symbol and continue

1249 lookahead = None

1250 continue

1251

1252 # Create the error symbol for the first time and make it the new lookahead symbol

1253 t = YaccSymbol()

1254 t.type = 'error'

1255

1256 if hasattr(lookahead, 'lineno'):

1257 t.lineno = t.endlineno = lookahead.lineno

1258 if hasattr(lookahead, 'lexpos'):

1259 t.lexpos = t.endlexpos = lookahead.lexpos

1260 t.value = lookahead

1261 lookaheadstack.append(lookahead)

1262 lookahead = t

1263 else:

1264 sym = symstack.pop()

1265 statestack.pop()

1266 state = statestack[-1]

1267

1268 continue

1269

1270 # Call an error function here

1271 raise RuntimeError('yacc: internal parser error!!!\n')

1272

1273 #--! parseopt-notrack-end

1274

1275# -----------------------------------------------------------------------------

1276# === Grammar Representation ===

1277#

1278# The following functions, classes, and variables are used to represent and

1279# manipulate the rules that make up a grammar.

1280# -----------------------------------------------------------------------------

1281

1282# regex matching identifiers

1283_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

1284

1285# -----------------------------------------------------------------------------

1286# class Production:

1287#

1288# This class stores the raw information about a single production or grammar rule.

1289# A grammar rule refers to a specification such as this:

1290#

1291# expr : expr PLUS term

1292#

1293# Here are the basic attributes defined on all productions

1294#

1295# name - Name of the production. For example 'expr'

1296# prod - A list of symbols on the right side ['expr','PLUS','term']

1297# prec - Production precedence level

1298# number - Production number.

1299# func - Function that executes on reduce

1300# file - File where production function is defined

1301# lineno - Line number where production function is defined

1302#

1303# The following attributes are defined or optional.

1304#

1305# len - Length of the production (number of symbols on right hand side)

1306# usyms - Set of unique symbols found in the production

1307# -----------------------------------------------------------------------------

1308

1309class Production(object):

1310 reduced = 0

1311 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):

1312 self.name = name

1313 self.prod = tuple(prod)

1314 self.number = number

1315 self.func = func

1316 self.callable = None

1317 self.file = file

1318 self.line = line

1319 self.prec = precedence

1320

1321 # Internal settings used during table construction

1322

1323 self.len = len(self.prod) # Length of the production

1324

1325 # Create a list of unique production symbols used in the production

1326 self.usyms = []

1327 for s in self.prod:

1328 if s not in self.usyms:

1329 self.usyms.append(s)

1330

1331 # List of all LR items for the production

1332 self.lr_items = []

1333 self.lr_next = None

1334

1335 # Create a string representation

1336 if self.prod:

1337 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))

1338 else:

1339 self.str = '%s -> <empty>' % self.name

1340

1341 def __str__(self):

1342 return self.str

1343

1344 def __repr__(self):

1345 return 'Production(' + str(self) + ')'

1346

1347 def __len__(self):

1348 return len(self.prod)

1349

1350 def __nonzero__(self):

1351 return 1

1352

1353 def __getitem__(self, index):

1354 return self.prod[index]

1355

1356 # Return the nth lr_item from the production (or None if at the end)

1357 def lr_item(self, n):

1358 if n > len(self.prod):

1359 return None

1360 p = LRItem(self, n)

1361 # Precompute the list of productions immediately following.

1362 try:

1363 p.lr_after = Prodnames[p.prod[n+1]]

1364 except (IndexError, KeyError):

1365 p.lr_after = []

1366 try:

1367 p.lr_before = p.prod[n-1]

1368 except IndexError:

1369 p.lr_before = None

1370 return p

1371

1372 # Bind the production function name to a callable

1373 def bind(self, pdict):

1374 if self.func:

1375 self.callable = pdict[self.func]

1376

1377# This class serves as a minimal standin for Production objects when

1378# reading table data from files. It only contains information

1379# actually used by the LR parsing engine, plus some additional

1380# debugging information.

1381class MiniProduction(object):

1382 def __init__(self, str, name, len, func, file, line):

1383 self.name = name

1384 self.len = len

1385 self.func = func

1386 self.callable = None

1387 self.file = file

1388 self.line = line

1389 self.str = str

1390

1391 def __str__(self):

1392 return self.str

1393

1394 def __repr__(self):

1395 return 'MiniProduction(%s)' % self.str

1396

1397 # Bind the production function name to a callable

1398 def bind(self, pdict):

1399 if self.func:

1400 self.callable = pdict[self.func]

1401

1402

1403# -----------------------------------------------------------------------------

1404# class LRItem

1405#

1406# This class represents a specific stage of parsing a production rule. For

1407# example:

1408#

1409# expr : expr . PLUS term

1410#

1411# In the above, the "." represents the current location of the parse. Here

1412# basic attributes:

1413#

1414# name - Name of the production. For example 'expr'

1415# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']

1416# number - Production number.

1417#

1418# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'

1419# then lr_next refers to 'expr -> expr PLUS . term'

1420# lr_index - LR item index (location of the ".") in the prod list.

1421# lookaheads - LALR lookahead symbols for this item

1422# len - Length of the production (number of symbols on right hand side)

1423# lr_after - List of all productions that immediately follow

1424# lr_before - Grammar symbol immediately before

1425# -----------------------------------------------------------------------------

1426

1427class LRItem(object):

1428 def __init__(self, p, n):

1429 self.name = p.name

1430 self.prod = list(p.prod)

1431 self.number = p.number

1432 self.lr_index = n

1433 self.lookaheads = {}

1434 self.prod.insert(n, '.')

1435 self.prod = tuple(self.prod)

1436 self.len = len(self.prod)

1437 self.usyms = p.usyms

1438

1439 def __str__(self):

1440 if self.prod:

1441 s = '%s -> %s' % (self.name, ' '.join(self.prod))

1442 else:

1443 s = '%s -> <empty>' % self.name

1444 return s

1445

1446 def __repr__(self):

1447 return 'LRItem(' + str(self) + ')'

1448

1449# -----------------------------------------------------------------------------

1450# rightmost_terminal()

1451#

1452# Return the rightmost terminal from a list of symbols. Used in add_production()

1453# -----------------------------------------------------------------------------

1454def rightmost_terminal(symbols, terminals):

1455 i = len(symbols) - 1

1456 while i >= 0:

1457 if symbols[i] in terminals:

1458 return symbols[i]

1459 i -= 1

1460 return None

1461

1462# -----------------------------------------------------------------------------

1463# === GRAMMAR CLASS ===

1464#

1465# The following class represents the contents of the specified grammar along

1466# with various computed properties such as first sets, follow sets, LR items, etc.

1467# This data is used for critical parts of the table generation process later.

1468# -----------------------------------------------------------------------------

1469

1470class GrammarError(YaccError):

1471 pass

1472

1473class Grammar(object):

1474 def __init__(self, terminals):

1475 self.Productions = [None] # A list of all of the productions. The first

1476 # entry is always reserved for the purpose of

1477 # building an augmented grammar

1478

1479 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all

1480 # productions of that nonterminal.

1481

1482 self.Prodmap = {} # A dictionary that is only used to detect duplicate

1483 # productions.

1484

1485 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a

1486 # list of the rules where they are used.

1487

1488 for term in terminals:

1489 self.Terminals[term] = []

1490

1491 self.Terminals['error'] = []

1492

1493 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list

1494 # of rule numbers where they are used.

1495

1496 self.First = {} # A dictionary of precomputed FIRST(x) symbols

1497

1498 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols

1499

1500 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the

1501 # form ('right',level) or ('nonassoc', level) or ('left',level)

1502

1503 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.

1504 # This is only used to provide error checking and to generate

1505 # a warning about unused precedence rules.

1506

1507 self.Start = None # Starting symbol for the grammar

1508

1509

1510 def __len__(self):

1511 return len(self.Productions)

1512

1513 def __getitem__(self, index):

1514 return self.Productions[index]

1515

1516 # -----------------------------------------------------------------------------

1517 # set_precedence()

1518 #

1519 # Sets the precedence for a given terminal. assoc is the associativity such as

1520 # 'left','right', or 'nonassoc'. level is a numeric level.

1521 #

1522 # -----------------------------------------------------------------------------

1523

1524 def set_precedence(self, term, assoc, level):

1525 assert self.Productions == [None], 'Must call set_precedence() before add_production()'

1526 if term in self.Precedence:

1527 raise GrammarError('Precedence already specified for terminal %r' % term)

1528 if assoc not in ['left', 'right', 'nonassoc']:

1529 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

1530 self.Precedence[term] = (assoc, level)

1531

1532 # -----------------------------------------------------------------------------

1533 # add_production()

1534 #

1535 # Given an action function, this function assembles a production rule and

1536 # computes its precedence level.

1537 #

1538 # The production rule is supplied as a list of symbols. For example,

1539 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

1540 # symbols ['expr','PLUS','term'].

1541 #

1542 # Precedence is determined by the precedence of the right-most non-terminal

1543 # or the precedence of a terminal specified by %prec.

1544 #

1545 # A variety of error checks are performed to make sure production symbols

1546 # are valid and that %prec is used correctly.

1547 # -----------------------------------------------------------------------------

1548

1549 def add_production(self, prodname, syms, func=None, file='', line=0):

1550

1551 if prodname in self.Terminals:

1552 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))

1553 if prodname == 'error':

1554 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))

1555 if not _is_identifier.match(prodname):

1556 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))

1557

1558 # Look for literal tokens

1559 for n, s in enumerate(syms):

1560 if s[0] in "'\"":

1561 try:

1562 c = eval(s)

1563 if (len(c) > 1):

1564 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %

1565 (file, line, s, prodname))

1566 if c not in self.Terminals:

1567 self.Terminals[c] = []

1568 syms[n] = c

1569 continue

1570 except SyntaxError:

1571 pass

1572 if not _is_identifier.match(s) and s != '%prec':

1573 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))

1574

1575 # Determine the precedence level

1576 if '%prec' in syms:

1577 if syms[-1] == '%prec':

1578 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))

1579 if syms[-2] != '%prec':

1580 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %

1581 (file, line))

1582 precname = syms[-1]

1583 prodprec = self.Precedence.get(precname)

1584 if not prodprec:

1585 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))

1586 else:

1587 self.UsedPrecedence.add(precname)

1588 del syms[-2:] # Drop %prec from the rule

1589 else:

1590 # If no %prec, precedence is determined by the rightmost terminal symbol

1591 precname = rightmost_terminal(syms, self.Terminals)

1592 prodprec = self.Precedence.get(precname, ('right', 0))

1593

1594 # See if the rule is already in the rulemap

1595 map = '%s -> %s' % (prodname, syms)

1596 if map in self.Prodmap:

1597 m = self.Prodmap[map]

1598 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +

1599 'Previous definition at %s:%d' % (m.file, m.line))

1600

1601 # From this point on, everything is valid. Create a new Production instance

1602 pnumber = len(self.Productions)

1603 if prodname not in self.Nonterminals:

1604 self.Nonterminals[prodname] = []

1605

1606 # Add the production number to Terminals and Nonterminals

1607 for t in syms:

1608 if t in self.Terminals:

1609 self.Terminals[t].append(pnumber)

1610 else:

1611 if t not in self.Nonterminals:

1612 self.Nonterminals[t] = []

1613 self.Nonterminals[t].append(pnumber)

1614

1615 # Create a production and add it to the list of productions

1616 p = Production(pnumber, prodname, syms, prodprec, func, file, line)

1617 self.Productions.append(p)

1618 self.Prodmap[map] = p

1619

1620 # Add to the global productions list

1621 try:

1622 self.Prodnames[prodname].append(p)

1623 except KeyError:

1624 self.Prodnames[prodname] = [p]

1625

1626 # -----------------------------------------------------------------------------

1627 # set_start()

1628 #

1629 # Sets the starting symbol and creates the augmented grammar. Production

1630 # rule 0 is S' -> start where start is the start symbol.

1631 # -----------------------------------------------------------------------------

1632

1633 def set_start(self, start=None):

1634 if not start:

1635 start = self.Productions[1].name

1636 if start not in self.Nonterminals:

1637 raise GrammarError('start symbol %s undefined' % start)

1638 self.Productions[0] = Production(0, "S'", [start])

1639 self.Nonterminals[start].append(0)

1640 self.Start = start

1641

1642 # -----------------------------------------------------------------------------

1643 # find_unreachable()

1644 #

1645 # Find all of the nonterminal symbols that can't be reached from the starting

1646 # symbol. Returns a list of nonterminals that can't be reached.

1647 # -----------------------------------------------------------------------------

1648

1649 def find_unreachable(self):

1650

1651 # Mark all symbols that are reachable from a symbol s

1652 def mark_reachable_from(s):

1653 if s in reachable:

1654 return

1655 reachable.add(s)

1656 for p in self.Prodnames.get(s, []):

1657 for r in p.prod:

1658 mark_reachable_from(r)

1659

1660 reachable = set()

1661 mark_reachable_from(self.Productions[0].prod[0])

1662 return [s for s in self.Nonterminals if s not in reachable]

1663

1664 # -----------------------------------------------------------------------------

1665 # infinite_cycles()

1666 #

1667 # This function looks at the various parsing rules and tries to detect

1668 # infinite recursion cycles (grammar rules where there is no possible way

1669 # to derive a string of only terminals).

1670 # -----------------------------------------------------------------------------

1671

1672 def infinite_cycles(self):

1673 terminates = {}

1674

1675 # Terminals:

1676 for t in self.Terminals:

1677 terminates[t] = True

1678

1679 terminates['$end'] = True

1680

1681 # Nonterminals:

1682

1683 # Initialize to false:

1684 for n in self.Nonterminals:

1685 terminates[n] = False

1686

1687 # Then propagate termination until no change:

1688 while True:

1689 some_change = False

1690 for (n, pl) in self.Prodnames.items():

1691 # Nonterminal n terminates iff any of its productions terminates.

1692 for p in pl:

1693 # Production p terminates iff all of its rhs symbols terminate.

1694 for s in p.prod:

1695 if not terminates[s]:

1696 # The symbol s does not terminate,

1697 # so production p does not terminate.

1698 p_terminates = False

1699 break

1700 else:

1701 # didn't break from the loop,

1702 # so every symbol s terminates

1703 # so production p terminates.

1704 p_terminates = True

1705

1706 if p_terminates:

1707 # symbol n terminates!

1708 if not terminates[n]:

1709 terminates[n] = True

1710 some_change = True

1711 # Don't need to consider any more productions for this n.

1712 break

1713

1714 if not some_change:

1715 break

1716

1717 infinite = []

1718 for (s, term) in terminates.items():

1719 if not term:

1720 if s not in self.Prodnames and s not in self.Terminals and s != 'error':

1721 # s is used-but-not-defined, and we've already warned of that,

1722 # so it would be overkill to say that it's also non-terminating.

1723 pass

1724 else:

1725 infinite.append(s)

1726

1727 return infinite

1728

1729 # -----------------------------------------------------------------------------

1730 # undefined_symbols()

1731 #

1732 # Find all symbols that were used the grammar, but not defined as tokens or

1733 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol

1734 # and prod is the production where the symbol was used.

1735 # -----------------------------------------------------------------------------

1736 def undefined_symbols(self):

1737 result = []

1738 for p in self.Productions:

1739 if not p:

1740 continue

1741

1742 for s in p.prod:

1743 if s not in self.Prodnames and s not in self.Terminals and s != 'error':

1744 result.append((s, p))

1745 return result

1746

1747 # -----------------------------------------------------------------------------

1748 # unused_terminals()

1749 #

1750 # Find all terminals that were defined, but not used by the grammar. Returns

1751 # a list of all symbols.

1752 # -----------------------------------------------------------------------------

1753 def unused_terminals(self):

1754 unused_tok = []

1755 for s, v in self.Terminals.items():

1756 if s != 'error' and not v:

1757 unused_tok.append(s)

1758

1759 return unused_tok

1760

1761 # ------------------------------------------------------------------------------

1762 # unused_rules()

1763 #

1764 # Find all grammar rules that were defined, but not used (maybe not reachable)

1765 # Returns a list of productions.

1766 # ------------------------------------------------------------------------------

1767

1768 def unused_rules(self):

1769 unused_prod = []

1770 for s, v in self.Nonterminals.items():

1771 if not v:

1772 p = self.Prodnames[s][0]

1773 unused_prod.append(p)

1774 return unused_prod

1775

1776 # -----------------------------------------------------------------------------

1777 # unused_precedence()

1778 #

1779 # Returns a list of tuples (term,precedence) corresponding to precedence

1780 # rules that were never used by the grammar. term is the name of the terminal

1781 # on which precedence was applied and precedence is a string such as 'left' or

1782 # 'right' corresponding to the type of precedence.

1783 # -----------------------------------------------------------------------------

1784

1785 def unused_precedence(self):

1786 unused = []

1787 for termname in self.Precedence:

1788 if not (termname in self.Terminals or termname in self.UsedPrecedence):

1789 unused.append((termname, self.Precedence[termname][0]))

1790

1791 return unused

1792

1793 # -------------------------------------------------------------------------

1794 # _first()

1795 #

1796 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.

1797 #

1798 # During execution of compute_first1, the result may be incomplete.

1799 # Afterward (e.g., when called from compute_follow()), it will be complete.

1800 # -------------------------------------------------------------------------

1801 def _first(self, beta):

1802

1803 # We are computing First(x1,x2,x3,...,xn)

1804 result = []

1805 for x in beta:

1806 x_produces_empty = False

1807

1808 # Add all the non-<empty> symbols of First[x] to the result.

1809 for f in self.First[x]:

1810 if f == '<empty>':

1811 x_produces_empty = True

1812 else:

1813 if f not in result:

1814 result.append(f)

1815

1816 if x_produces_empty:

1817 # We have to consider the next x in beta,

1818 # i.e. stay in the loop.

1819 pass

1820 else:

1821 # We don't have to consider any further symbols in beta.

1822 break

1823 else:

1824 # There was no 'break' from the loop,

1825 # so x_produces_empty was true for all x in beta,

1826 # so beta produces empty as well.

1827 result.append('<empty>')

1828

1829 return result

1830

1831 # -------------------------------------------------------------------------

1832 # compute_first()

1833 #

1834 # Compute the value of FIRST1(X) for all symbols

1835 # -------------------------------------------------------------------------

1836 def compute_first(self):

1837 if self.First:

1838 return self.First

1839

1840 # Terminals:

1841 for t in self.Terminals:

1842 self.First[t] = [t]

1843

1844 self.First['$end'] = ['$end']

1845

1846 # Nonterminals:

1847

1848 # Initialize to the empty set:

1849 for n in self.Nonterminals:

1850 self.First[n] = []

1851

1852 # Then propagate symbols until no change:

1853 while True:

1854 some_change = False

1855 for n in self.Nonterminals:

1856 for p in self.Prodnames[n]:

1857 for f in self._first(p.prod):

1858 if f not in self.First[n]:

1859 self.First[n].append(f)

1860 some_change = True

1861 if not some_change:

1862 break

1863

1864 return self.First

1865

1866 # ---------------------------------------------------------------------

1867 # compute_follow()

1868 #

1869 # Computes all of the follow sets for every non-terminal symbol. The

1870 # follow set is the set of all symbols that might follow a given

1871 # non-terminal. See the Dragon book, 2nd Ed. p. 189.

1872 # ---------------------------------------------------------------------

1873 def compute_follow(self, start=None):

1874 # If already computed, return the result

1875 if self.Follow:

1876 return self.Follow

1877

1878 # If first sets not computed yet, do that first.

1879 if not self.First:

1880 self.compute_first()

1881

1882 # Add '$end' to the follow list of the start symbol

1883 for k in self.Nonterminals:

1884 self.Follow[k] = []

1885

1886 if not start:

1887 start = self.Productions[1].name

1888

1889 self.Follow[start] = ['$end']

1890

1891 while True:

1892 didadd = False

1893 for p in self.Productions[1:]:

1894 # Here is the production set

1895 for i, B in enumerate(p.prod):

1896 if B in self.Nonterminals:

1897 # Okay. We got a non-terminal in a production

1898 fst = self._first(p.prod[i+1:])

1899 hasempty = False

1900 for f in fst:

1901 if f != '<empty>' and f not in self.Follow[B]:

1902 self.Follow[B].append(f)

1903 didadd = True

1904 if f == '<empty>':

1905 hasempty = True

1906 if hasempty or i == (len(p.prod)-1):

1907 # Add elements of follow(a) to follow(b)

1908 for f in self.Follow[p.name]:

1909 if f not in self.Follow[B]:

1910 self.Follow[B].append(f)

1911 didadd = True

1912 if not didadd:

1913 break

1914 return self.Follow

1915

1916

1917 # -----------------------------------------------------------------------------

1918 # build_lritems()

1919 #

1920 # This function walks the list of productions and builds a complete set of the

1921 # LR items. The LR items are stored in two ways: First, they are uniquely

1922 # numbered and placed in the list _lritems. Second, a linked list of LR items

1923 # is built for each production. For example:

1924 #

1925 # E -> E PLUS E

1926 #

1927 # Creates the list

1928 #

1929 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

1930 # -----------------------------------------------------------------------------

1931

1932 def build_lritems(self):

1933 for p in self.Productions:

1934 lastlri = p

1935 i = 0

1936 lr_items = []

1937 while True:

1938 if i > len(p):

1939 lri = None

1940 else:

1941 lri = LRItem(p, i)

1942 # Precompute the list of productions immediately following

1943 try:

1944 lri.lr_after = self.Prodnames[lri.prod[i+1]]

1945 except (IndexError, KeyError):

1946 lri.lr_after = []

1947 try:

1948 lri.lr_before = lri.prod[i-1]

1949 except IndexError:

1950 lri.lr_before = None

1951

1952 lastlri.lr_next = lri

1953 if not lri:

1954 break

1955 lr_items.append(lri)

1956 lastlri = lri

1957 i += 1

1958 p.lr_items = lr_items

1959

1960# -----------------------------------------------------------------------------

1961# == Class LRTable ==

1962#

1963# This basic class represents a basic table of LR parsing information.

1964# Methods for generating the tables are not defined here. They are defined

1965# in the derived class LRGeneratedTable.

1966# -----------------------------------------------------------------------------

1967

1968class VersionError(YaccError):

1969 pass

1970

1971class LRTable(object):

1972 def __init__(self):

1973 self.lr_action = None

1974 self.lr_goto = None

1975 self.lr_productions = None

1976 self.lr_method = None

1977

1978 def read_table(self, module):

1979 if isinstance(module, types.ModuleType):

1980 parsetab = module

1981 else:

1982 exec('import %s' % module)

1983 parsetab = sys.modules[module]

1984

1985 if parsetab._tabversion != __tabversion__:

1986 raise VersionError('yacc table file version is out of date')

1987

1988 self.lr_action = parsetab._lr_action

1989 self.lr_goto = parsetab._lr_goto

1990

1991 self.lr_productions = []

1992 for p in parsetab._lr_productions:

1993 self.lr_productions.append(MiniProduction(*p))

1994

1995 self.lr_method = parsetab._lr_method

1996 return parsetab._lr_signature

1997

1998 def read_pickle(self, filename):

1999 try:

2000 import cPickle as pickle

2001 except ImportError:

2002 import pickle

2003

2004 if not os.path.exists(filename):

2005 raise ImportError

2006

2007 in_f = open(filename, 'rb')

2008

2009 tabversion = pickle.load(in_f)

2010 if tabversion != __tabversion__:

2011 raise VersionError('yacc table file version is out of date')

2012 self.lr_method = pickle.load(in_f)

2013 signature = pickle.load(in_f)

2014 self.lr_action = pickle.load(in_f)

2015 self.lr_goto = pickle.load(in_f)

2016 productions = pickle.load(in_f)

2017

2018 self.lr_productions = []

2019 for p in productions:

2020 self.lr_productions.append(MiniProduction(*p))

2021

2022 in_f.close()

2023 return signature

2024

2025 # Bind all production function names to callable objects in pdict

2026 def bind_callables(self, pdict):

2027 for p in self.lr_productions:

2028 p.bind(pdict)

2029

2030

2031# -----------------------------------------------------------------------------

2032# === LR Generator ===

2033#

2034# The following classes and functions are used to generate LR parsing tables on

2035# a grammar.

2036# -----------------------------------------------------------------------------

2037

2038# -----------------------------------------------------------------------------

2039# digraph()

2040# traverse()

2041#

2042# The following two functions are used to compute set valued functions

2043# of the form:

2044#

2045# F(x) = F'(x) U U{F(y) | x R y}

2046#

2047# This is used to compute the values of Read() sets as well as FOLLOW sets

2048# in LALR(1) generation.

2049#

2050# Inputs: X - An input set

2051# R - A relation

2052# FP - Set-valued function

2053# ------------------------------------------------------------------------------

2054

2055def digraph(X, R, FP):

2056 N = {}

2057 for x in X:

2058 N[x] = 0

2059 stack = []

2060 F = {}

2061 for x in X:

2062 if N[x] == 0:

2063 traverse(x, N, stack, F, X, R, FP)

2064 return F

2065

2066def traverse(x, N, stack, F, X, R, FP):

2067 stack.append(x)

2068 d = len(stack)

2069 N[x] = d

2070 F[x] = FP(x) # F(X) <- F'(x)

2071

2072 rel = R(x) # Get y's related to x

2073 for y in rel:

2074 if N[y] == 0:

2075 traverse(y, N, stack, F, X, R, FP)

2076 N[x] = min(N[x], N[y])

2077 for a in F.get(y, []):

2078 if a not in F[x]:

2079 F[x].append(a)

2080 if N[x] == d:

2081 N[stack[-1]] = MAXINT

2082 F[stack[-1]] = F[x]

2083 element = stack.pop()

2084 while element != x:

2085 N[stack[-1]] = MAXINT

2086 F[stack[-1]] = F[x]

2087 element = stack.pop()

2088

2089class LALRError(YaccError):

2090 pass

2091

2092# -----------------------------------------------------------------------------

2093# == LRGeneratedTable ==

2094#

2095# This class implements the LR table generation algorithm. There are no

2096# public methods except for write()

2097# -----------------------------------------------------------------------------

2098

2099class LRGeneratedTable(LRTable):

2100 def __init__(self, grammar, method='LALR', log=None):

2101 if method not in ['SLR', 'LALR']:

2102 raise LALRError('Unsupported method %s' % method)

2103

2104 self.grammar = grammar

2105 self.lr_method = method

2106

2107 # Set up the logger

2108 if not log:

2109 log = NullLogger()

2110 self.log = log

2111

2112 # Internal attributes

2113 self.lr_action = {} # Action table

2114 self.lr_goto = {} # Goto table

2115 self.lr_productions = grammar.Productions # Copy of grammar Production array

2116 self.lr_goto_cache = {} # Cache of computed gotos

2117 self.lr0_cidhash = {} # Cache of closures

2118

2119 self._add_count = 0 # Internal counter used to detect cycles

2120

2121 # Diagonistic information filled in by the table generator

2122 self.sr_conflict = 0

2123 self.rr_conflict = 0

2124 self.conflicts = [] # List of conflicts

2125

2126 self.sr_conflicts = []

2127 self.rr_conflicts = []

2128

2129 # Build the tables

2130 self.grammar.build_lritems()

2131 self.grammar.compute_first()

2132 self.grammar.compute_follow()

2133 self.lr_parse_table()

2134

2135 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

2136

2137 def lr0_closure(self, I):

2138 self._add_count += 1

2139

2140 # Add everything in I to J

2141 J = I[:]

2142 didadd = True

2143 while didadd:

2144 didadd = False

2145 for j in J:

2146 for x in j.lr_after:

2147 if getattr(x, 'lr0_added', 0) == self._add_count:

2148 continue

2149 # Add B --> .G to J

2150 J.append(x.lr_next)

2151 x.lr0_added = self._add_count

2152 didadd = True

2153

2154 return J

2155

2156 # Compute the LR(0) goto function goto(I,X) where I is a set

2157 # of LR(0) items and X is a grammar symbol. This function is written

2158 # in a way that guarantees uniqueness of the generated goto sets

2159 # (i.e. the same goto set will never be returned as two different Python

2160 # objects). With uniqueness, we can later do fast set comparisons using

2161 # id(obj) instead of element-wise comparison.

2162

2163 def lr0_goto(self, I, x):

2164 # First we look for a previously cached entry

2165 g = self.lr_goto_cache.get((id(I), x))

2166 if g:

2167 return g

2168

2169 # Now we generate the goto set in a way that guarantees uniqueness

2170 # of the result

2171

2172 s = self.lr_goto_cache.get(x)

2173 if not s:

2174 s = {}

2175 self.lr_goto_cache[x] = s

2176

2177 gs = []

2178 for p in I:

2179 n = p.lr_next

2180 if n and n.lr_before == x:

2181 s1 = s.get(id(n))

2182 if not s1:

2183 s1 = {}

2184 s[id(n)] = s1

2185 gs.append(n)

2186 s = s1

2187 g = s.get('$end')

2188 if not g:

2189 if gs:

2190 g = self.lr0_closure(gs)

2191 s['$end'] = g

2192 else:

2193 s['$end'] = gs

2194 self.lr_goto_cache[(id(I), x)] = g

2195 return g

2196

2197 # Compute the LR(0) sets of item function

2198 def lr0_items(self):

2199 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]

2200 i = 0

2201 for I in C:

2202 self.lr0_cidhash[id(I)] = i

2203 i += 1

2204

2205 # Loop over the items in C and each grammar symbols

2206 i = 0

2207 while i < len(C):

2208 I = C[i]

2209 i += 1

2210

2211 # Collect all of the symbols that could possibly be in the goto(I,X) sets

2212 asyms = {}

2213 for ii in I:

2214 for s in ii.usyms:

2215 asyms[s] = None

2216

2217 for x in asyms:

2218 g = self.lr0_goto(I, x)

2219 if not g or id(g) in self.lr0_cidhash:

2220 continue

2221 self.lr0_cidhash[id(g)] = len(C)

2222 C.append(g)

2223

2224 return C

2225

2226 # -----------------------------------------------------------------------------

2227 # ==== LALR(1) Parsing ====

2228 #

2229 # LALR(1) parsing is almost exactly the same as SLR except that instead of

2230 # relying upon Follow() sets when performing reductions, a more selective

2231 # lookahead set that incorporates the state of the LR(0) machine is utilized.

2232 # Thus, we mainly just have to focus on calculating the lookahead sets.

2233 #

2234 # The method used here is due to DeRemer and Pennelo (1982).

2235 #

2236 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

2237 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,

2238 # Vol. 4, No. 4, Oct. 1982, pp. 615-649

2239 #

2240 # Further details can also be found in:

2241 #

2242 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",

2243 # McGraw-Hill Book Company, (1985).

2244 #

2245 # -----------------------------------------------------------------------------

2246

2247 # -----------------------------------------------------------------------------

2248 # compute_nullable_nonterminals()

2249 #

2250 # Creates a dictionary containing all of the non-terminals that might produce

2251 # an empty production.

2252 # -----------------------------------------------------------------------------

2253

2254 def compute_nullable_nonterminals(self):

2255 nullable = set()

2256 num_nullable = 0

2257 while True:

2258 for p in self.grammar.Productions[1:]:

2259 if p.len == 0:

2260 nullable.add(p.name)

2261 continue

2262 for t in p.prod:

2263 if t not in nullable:

2264 break

2265 else:

2266 nullable.add(p.name)

2267 if len(nullable) == num_nullable:

2268 break

2269 num_nullable = len(nullable)

2270 return nullable

2271

2272 # -----------------------------------------------------------------------------

2273 # find_nonterminal_trans(C)

2274 #

2275 # Given a set of LR(0) items, this functions finds all of the non-terminal

2276 # transitions. These are transitions in which a dot appears immediately before

2277 # a non-terminal. Returns a list of tuples of the form (state,N) where state

2278 # is the state number and N is the nonterminal symbol.

2279 #

2280 # The input C is the set of LR(0) items.

2281 # -----------------------------------------------------------------------------

2282

2283 def find_nonterminal_transitions(self, C):

2284 trans = []

2285 for stateno, state in enumerate(C):

2286 for p in state:

2287 if p.lr_index < p.len - 1:

2288 t = (stateno, p.prod[p.lr_index+1])

2289 if t[1] in self.grammar.Nonterminals:

2290 if t not in trans:

2291 trans.append(t)

2292 return trans

2293

2294 # -----------------------------------------------------------------------------

2295 # dr_relation()

2296 #

2297 # Computes the DR(p,A) relationships for non-terminal transitions. The input

2298 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.

2299 #

2300 # Returns a list of terminals.

2301 # -----------------------------------------------------------------------------

2302

2303 def dr_relation(self, C, trans, nullable):

2304 dr_set = {}

2305 state, N = trans

2306 terms = []

2307

2308 g = self.lr0_goto(C[state], N)

2309 for p in g:

2310 if p.lr_index < p.len - 1:

2311 a = p.prod[p.lr_index+1]

2312 if a in self.grammar.Terminals:

2313 if a not in terms:

2314 terms.append(a)

2315

2316 # This extra bit is to handle the start state

2317 if state == 0 and N == self.grammar.Productions[0].prod[0]:

2318 terms.append('$end')

2319

2320 return terms

2321

2322 # -----------------------------------------------------------------------------

2323 # reads_relation()

2324 #

2325 # Computes the READS() relation (p,A) READS (t,C).

2326 # -----------------------------------------------------------------------------

2327

2328 def reads_relation(self, C, trans, empty):

2329 # Look for empty transitions

2330 rel = []

2331 state, N = trans

2332

2333 g = self.lr0_goto(C[state], N)

2334 j = self.lr0_cidhash.get(id(g), -1)

2335 for p in g:

2336 if p.lr_index < p.len - 1:

2337 a = p.prod[p.lr_index + 1]

2338 if a in empty:

2339 rel.append((j, a))

2340

2341 return rel

2342

2343 # -----------------------------------------------------------------------------

2344 # compute_lookback_includes()

2345 #

2346 # Determines the lookback and includes relations

2347 #

2348 # LOOKBACK:

2349 #

2350 # This relation is determined by running the LR(0) state machine forward.

2351 # For example, starting with a production "N : . A B C", we run it forward

2352 # to obtain "N : A B C ." We then build a relationship between this final

2353 # state and the starting state. These relationships are stored in a dictionary

2354 # lookdict.

2355 #

2356 # INCLUDES:

2357 #

2358 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

2359 #

2360 # This relation is used to determine non-terminal transitions that occur

2361 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

2362 # if the following holds:

2363 #

2364 # B -> LAT, where T -> epsilon and p' -L-> p

2365 #

2366 # L is essentially a prefix (which may be empty), T is a suffix that must be

2367 # able to derive an empty string. State p' must lead to state p with the string L.

2368 #

2369 # -----------------------------------------------------------------------------

2370

2371 def compute_lookback_includes(self, C, trans, nullable):

2372 lookdict = {} # Dictionary of lookback relations

2373 includedict = {} # Dictionary of include relations

2374

2375 # Make a dictionary of non-terminal transitions

2376 dtrans = {}

2377 for t in trans:

2378 dtrans[t] = 1

2379

2380 # Loop over all transitions and compute lookbacks and includes

2381 for state, N in trans:

2382 lookb = []

2383 includes = []

2384 for p in C[state]:

2385 if p.name != N:

2386 continue

2387

2388 # Okay, we have a name match. We now follow the production all the way

2389 # through the state machine until we get the . on the right hand side

2390

2391 lr_index = p.lr_index

2392 j = state

2393 while lr_index < p.len - 1:

2394 lr_index = lr_index + 1

2395 t = p.prod[lr_index]

2396

2397 # Check to see if this symbol and state are a non-terminal transition

2398 if (j, t) in dtrans:

2399 # Yes. Okay, there is some chance that this is an includes relation

2400 # the only way to know for certain is whether the rest of the

2401 # production derives empty

2402

2403 li = lr_index + 1

2404 while li < p.len:

2405 if p.prod[li] in self.grammar.Terminals:

2406 break # No forget it

2407 if p.prod[li] not in nullable:

2408 break

2409 li = li + 1

2410 else:

2411 # Appears to be a relation between (j,t) and (state,N)

2412 includes.append((j, t))

2413

2414 g = self.lr0_goto(C[j], t) # Go to next set

2415 j = self.lr0_cidhash.get(id(g), -1) # Go to next state

2416

2417 # When we get here, j is the final state, now we have to locate the production

2418 for r in C[j]:

2419 if r.name != p.name:

2420 continue

2421 if r.len != p.len:

2422 continue

2423 i = 0

2424 # This look is comparing a production ". A B C" with "A B C ."

2425 while i < r.lr_index:

2426 if r.prod[i] != p.prod[i+1]:

2427 break

2428 i = i + 1

2429 else:

2430 lookb.append((j, r))

2431 for i in includes:

2432 if i not in includedict:

2433 includedict[i] = []

2434 includedict[i].append((state, N))

2435 lookdict[(state, N)] = lookb

2436

2437 return lookdict, includedict

2438

2439 # -----------------------------------------------------------------------------

2440 # compute_read_sets()

2441 #

2442 # Given a set of LR(0) items, this function computes the read sets.

2443 #

2444 # Inputs: C = Set of LR(0) items

2445 # ntrans = Set of nonterminal transitions

2446 # nullable = Set of empty transitions

2447 #

2448 # Returns a set containing the read sets

2449 # -----------------------------------------------------------------------------

2450

2451 def compute_read_sets(self, C, ntrans, nullable):

2452 FP = lambda x: self.dr_relation(C, x, nullable)

2453 R = lambda x: self.reads_relation(C, x, nullable)

2454 F = digraph(ntrans, R, FP)

2455 return F

2456

2457 # -----------------------------------------------------------------------------

2458 # compute_follow_sets()

2459 #

2460 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,

2461 # and an include set, this function computes the follow sets

2462 #

2463 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}

2464 #

2465 # Inputs:

2466 # ntrans = Set of nonterminal transitions

2467 # readsets = Readset (previously computed)

2468 # inclsets = Include sets (previously computed)

2469 #

2470 # Returns a set containing the follow sets

2471 # -----------------------------------------------------------------------------

2472

2473 def compute_follow_sets(self, ntrans, readsets, inclsets):

2474 FP = lambda x: readsets[x]

2475 R = lambda x: inclsets.get(x, [])

2476 F = digraph(ntrans, R, FP)

2477 return F

2478

2479 # -----------------------------------------------------------------------------

2480 # add_lookaheads()

2481 #

2482 # Attaches the lookahead symbols to grammar rules.

2483 #

2484 # Inputs: lookbacks - Set of lookback relations

2485 # followset - Computed follow set

2486 #

2487 # This function directly attaches the lookaheads to productions contained

2488 # in the lookbacks set

2489 # -----------------------------------------------------------------------------

2490

2491 def add_lookaheads(self, lookbacks, followset):

2492 for trans, lb in lookbacks.items():

2493 # Loop over productions in lookback

2494 for state, p in lb:

2495 if state not in p.lookaheads:

2496 p.lookaheads[state] = []

2497 f = followset.get(trans, [])

2498 for a in f:

2499 if a not in p.lookaheads[state]:

2500 p.lookaheads[state].append(a)

2501

2502 # -----------------------------------------------------------------------------

2503 # add_lalr_lookaheads()

2504 #

2505 # This function does all of the work of adding lookahead information for use

2506 # with LALR parsing

2507 # -----------------------------------------------------------------------------

2508

2509 def add_lalr_lookaheads(self, C):

2510 # Determine all of the nullable nonterminals

2511 nullable = self.compute_nullable_nonterminals()

2512

2513 # Find all non-terminal transitions

2514 trans = self.find_nonterminal_transitions(C)

2515

2516 # Compute read sets

2517 readsets = self.compute_read_sets(C, trans, nullable)

2518

2519 # Compute lookback/includes relations

2520 lookd, included = self.compute_lookback_includes(C, trans, nullable)

2521

2522 # Compute LALR FOLLOW sets

2523 followsets = self.compute_follow_sets(trans, readsets, included)

2524

2525 # Add all of the lookaheads

2526 self.add_lookaheads(lookd, followsets)

2527

2528 # -----------------------------------------------------------------------------

2529 # lr_parse_table()

2530 #

2531 # This function constructs the parse tables for SLR or LALR

2532 # -----------------------------------------------------------------------------

2533 def lr_parse_table(self):

2534 Productions = self.grammar.Productions

2535 Precedence = self.grammar.Precedence

2536 goto = self.lr_goto # Goto array

2537 action = self.lr_action # Action array

2538 log = self.log # Logger for output

2539

2540 actionp = {} # Action production array (temporary)

2541

2542 log.info('Parsing method: %s', self.lr_method)

2543

2544 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

2545 # This determines the number of states

2546

2547 C = self.lr0_items()

2548

2549 if self.lr_method == 'LALR':

2550 self.add_lalr_lookaheads(C)

2551

2552 # Build the parser table, state by state

2553 st = 0

2554 for I in C:

2555 # Loop over each production in I

2556 actlist = [] # List of actions

2557 st_action = {}

2558 st_actionp = {}

2559 st_goto = {}

2560 log.info('')

2561 log.info('state %d', st)

2562 log.info('')

2563 for p in I:

2564 log.info(' (%d) %s', p.number, p)

2565 log.info('')

2566

2567 for p in I:

2568 if p.len == p.lr_index + 1:

2569 if p.name == "S'":

2570 # Start symbol. Accept!

2571 st_action['$end'] = 0

2572 st_actionp['$end'] = p

2573 else:

2574 # We are at the end of a production. Reduce!

2575 if self.lr_method == 'LALR':

2576 laheads = p.lookaheads[st]

2577 else:

2578 laheads = self.grammar.Follow[p.name]

2579 for a in laheads:

2580 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))

2581 r = st_action.get(a)

2582 if r is not None:

2583 # Whoa. Have a shift/reduce or reduce/reduce conflict

2584 if r > 0:

2585 # Need to decide on shift or reduce here

2586 # By default we favor shifting. Need to add

2587 # some precedence rules here.

2588

2589 # Shift precedence comes from the token

2590 sprec, slevel = Precedence.get(a, ('right', 0))

2591

2592 # Reduce precedence comes from rule being reduced (p)

2593 rprec, rlevel = Productions[p.number].prec

2594

2595 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):

2596 # We really need to reduce here.

2597 st_action[a] = -p.number

2598 st_actionp[a] = p

2599 if not slevel and not rlevel:

2600 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)

2601 self.sr_conflicts.append((st, a, 'reduce'))

2602 Productions[p.number].reduced += 1

2603 elif (slevel == rlevel) and (rprec == 'nonassoc'):

2604 st_action[a] = None

2605 else:

2606 # Hmmm. Guess we'll keep the shift

2607 if not rlevel:

2608 log.info(' ! shift/reduce conflict for %s resolved as shift', a)

2609 self.sr_conflicts.append((st, a, 'shift'))

2610 elif r < 0:

2611 # Reduce/reduce conflict. In this case, we favor the rule

2612 # that was defined first in the grammar file

2613 oldp = Productions[-r]

2614 pp = Productions[p.number]

2615 if oldp.line > pp.line:

2616 st_action[a] = -p.number

2617 st_actionp[a] = p

2618 chosenp, rejectp = pp, oldp

2619 Productions[p.number].reduced += 1

2620 Productions[oldp.number].reduced -= 1

2621 else:

2622 chosenp, rejectp = oldp, pp

2623 self.rr_conflicts.append((st, chosenp, rejectp))

2624 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',

2625 a, st_actionp[a].number, st_actionp[a])

2626 else:

2627 raise LALRError('Unknown conflict in state %d' % st)

2628 else:

2629 st_action[a] = -p.number

2630 st_actionp[a] = p

2631 Productions[p.number].reduced += 1

2632 else:

2633 i = p.lr_index

2634 a = p.prod[i+1] # Get symbol right after the "."

2635 if a in self.grammar.Terminals:

2636 g = self.lr0_goto(I, a)

2637 j = self.lr0_cidhash.get(id(g), -1)

2638 if j >= 0:

2639 # We are in a shift state

2640 actlist.append((a, p, 'shift and go to state %d' % j))

2641 r = st_action.get(a)

2642 if r is not None:

2643 # Whoa have a shift/reduce or shift/shift conflict

2644 if r > 0:

2645 if r != j:

2646 raise LALRError('Shift/shift conflict in state %d' % st)

2647 elif r < 0:

2648 # Do a precedence check.

2649 # - if precedence of reduce rule is higher, we reduce.

2650 # - if precedence of reduce is same and left assoc, we reduce.

2651 # - otherwise we shift

2652

2653 # Shift precedence comes from the token

2654 sprec, slevel = Precedence.get(a, ('right', 0))

2655

2656 # Reduce precedence comes from the rule that could have been reduced

2657 rprec, rlevel = Productions[st_actionp[a].number].prec

2658

2659 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):

2660 # We decide to shift here... highest precedence to shift

2661 Productions[st_actionp[a].number].reduced -= 1

2662 st_action[a] = j

2663 st_actionp[a] = p

2664 if not rlevel:

2665 log.info(' ! shift/reduce conflict for %s resolved as shift', a)

2666 self.sr_conflicts.append((st, a, 'shift'))

2667 elif (slevel == rlevel) and (rprec == 'nonassoc'):

2668 st_action[a] = None

2669 else:

2670 # Hmmm. Guess we'll keep the reduce

2671 if not slevel and not rlevel:

2672 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)

2673 self.sr_conflicts.append((st, a, 'reduce'))

2674

2675 else:

2676 raise LALRError('Unknown conflict in state %d' % st)

2677 else:

2678 st_action[a] = j

2679 st_actionp[a] = p

2680

2681 # Print the actions associated with each terminal

2682 _actprint = {}

2683 for a, p, m in actlist:

2684 if a in st_action:

2685 if p is st_actionp[a]:

2686 log.info(' %-15s %s', a, m)

2687 _actprint[(a, m)] = 1

2688 log.info('')

2689 # Print the actions that were not used. (debugging)

2690 not_used = 0

2691 for a, p, m in actlist:

2692 if a in st_action:

2693 if p is not st_actionp[a]:

2694 if not (a, m) in _actprint:

2695 log.debug(' ! %-15s [ %s ]', a, m)

2696 not_used = 1

2697 _actprint[(a, m)] = 1

2698 if not_used:

2699 log.debug('')

2700

2701 # Construct the goto table for this state

2702

2703 nkeys = {}

2704 for ii in I:

2705 for s in ii.usyms:

2706 if s in self.grammar.Nonterminals:

2707 nkeys[s] = None

2708 for n in nkeys:

2709 g = self.lr0_goto(I, n)

2710 j = self.lr0_cidhash.get(id(g), -1)

2711 if j >= 0:

2712 st_goto[n] = j

2713 log.info(' %-30s shift and go to state %d', n, j)

2714

2715 action[st] = st_action

2716 actionp[st] = st_actionp

2717 goto[st] = st_goto

2718 st += 1

2719

2720 # -----------------------------------------------------------------------------

2721 # write()

2722 #

2723 # This function writes the LR parsing tables to a file

2724 # -----------------------------------------------------------------------------

2725

2726 def write_table(self, tabmodule, outputdir='', signature=''):

2727 if isinstance(tabmodule, types.ModuleType):

2728 raise IOError("Won't overwrite existing tabmodule")

2729

2730 basemodulename = tabmodule.split('.')[-1]

2731 filename = os.path.join(outputdir, basemodulename) + '.py'

2732 try:

2733 f = open(filename, 'w')

2734

2735 f.write('''

2736# %s

2737# This file is automatically generated. Do not edit.

2738_tabversion = %r

2739

2740_lr_method = %r

2741

2742_lr_signature = %r

2743 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))

2744

2745 # Change smaller to 0 to go back to original tables

2746 smaller = 1

2747

2748 # Factor out names to try and make smaller

2749 if smaller:

2750 items = {}

2751

2752 for s, nd in self.lr_action.items():

2753 for name, v in nd.items():

2754 i = items.get(name)

2755 if not i:

2756 i = ([], [])

2757 items[name] = i

2758 i[0].append(s)

2759 i[1].append(v)

2760

2761 f.write('\n_lr_action_items = {')

2762 for k, v in items.items():

2763 f.write('%r:([' % k)

2764 for i in v[0]:

2765 f.write('%r,' % i)

2766 f.write('],[')

2767 for i in v[1]:

2768 f.write('%r,' % i)

2769

2770 f.write(']),')

2771 f.write('}\n')

2772

2773 f.write('''

2774_lr_action = {}

2775for _k, _v in _lr_action_items.items():

2776 for _x,_y in zip(_v[0],_v[1]):

2777 if not _x in _lr_action: _lr_action[_x] = {}

2778 _lr_action[_x][_k] = _y

2779del _lr_action_items

2780''')

2781

2782 else:

2783 f.write('\n_lr_action = { ')

2784 for k, v in self.lr_action.items():

2785 f.write('(%r,%r):%r,' % (k[0], k[1], v))

2786 f.write('}\n')

2787

2788 if smaller:

2789 # Factor out names to try and make smaller

2790 items = {}

2791

2792 for s, nd in self.lr_goto.items():

2793 for name, v in nd.items():

2794 i = items.get(name)

2795 if not i:

2796 i = ([], [])

2797 items[name] = i

2798 i[0].append(s)

2799 i[1].append(v)

2800

2801 f.write('\n_lr_goto_items = {')

2802 for k, v in items.items():

2803 f.write('%r:([' % k)

2804 for i in v[0]:

2805 f.write('%r,' % i)

2806 f.write('],[')

2807 for i in v[1]:

2808 f.write('%r,' % i)

2809

2810 f.write(']),')

2811 f.write('}\n')

2812

2813 f.write('''

2814_lr_goto = {}

2815for _k, _v in _lr_goto_items.items():

2816 for _x, _y in zip(_v[0], _v[1]):

2817 if not _x in _lr_goto: _lr_goto[_x] = {}

2818 _lr_goto[_x][_k] = _y

2819del _lr_goto_items

2820''')

2821 else:

2822 f.write('\n_lr_goto = { ')

2823 for k, v in self.lr_goto.items():

2824 f.write('(%r,%r):%r,' % (k[0], k[1], v))

2825 f.write('}\n')

2826

2827 # Write production table

2828 f.write('_lr_productions = [\n')

2829 for p in self.lr_productions:

2830 if p.func:

2831 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,

2832 p.func, os.path.basename(p.file), p.line))

2833 else:

2834 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))

2835 f.write(']\n')

2836 f.close()

2837

2838 except IOError as e:

2839 raise

2840

2841

2842 # -----------------------------------------------------------------------------

2843 # pickle_table()

2844 #

2845 # This function pickles the LR parsing tables to a supplied file object

2846 # -----------------------------------------------------------------------------

2847

2848 def pickle_table(self, filename, signature=''):

2849 try:

2850 import cPickle as pickle

2851 except ImportError:

2852 import pickle

2853 with open(filename, 'wb') as outf:

2854 pickle.dump(__tabversion__, outf, pickle_protocol)

2855 pickle.dump(self.lr_method, outf, pickle_protocol)

2856 pickle.dump(signature, outf, pickle_protocol)

2857 pickle.dump(self.lr_action, outf, pickle_protocol)

2858 pickle.dump(self.lr_goto, outf, pickle_protocol)

2859

2860 outp = []

2861 for p in self.lr_productions:

2862 if p.func:

2863 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))

2864 else:

2865 outp.append((str(p), p.name, p.len, None, None, None))

2866 pickle.dump(outp, outf, pickle_protocol)

2867

2868# -----------------------------------------------------------------------------

2869# === INTROSPECTION ===

2870#

2871# The following functions and classes are used to implement the PLY

2872# introspection features followed by the yacc() function itself.

2873# -----------------------------------------------------------------------------

2874

2875# -----------------------------------------------------------------------------

2876# get_caller_module_dict()

2877#

2878# This function returns a dictionary containing all of the symbols defined within

2879# a caller further down the call stack. This is used to get the environment

2880# associated with the yacc() call if none was provided.

2881# -----------------------------------------------------------------------------

2882

2883def get_caller_module_dict(levels):

2884 f = sys._getframe(levels)

2885 ldict = f.f_globals.copy()

2886 if f.f_globals != f.f_locals:

2887 ldict.update(f.f_locals)

2888 return ldict

2889

2890# -----------------------------------------------------------------------------

2891# parse_grammar()

2892#

2893# This takes a raw grammar rule string and parses it into production data

2894# -----------------------------------------------------------------------------

2895def parse_grammar(doc, file, line):

2896 grammar = []

2897 # Split the doc string into lines

2898 pstrings = doc.splitlines()

2899 lastp = None

2900 dline = line

2901 for ps in pstrings:

2902 dline += 1

2903 p = ps.split()

2904 if not p:

2905 continue

2906 try:

2907 if p[0] == '|':

2908 # This is a continuation of a previous rule

2909 if not lastp:

2910 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))

2911 prodname = lastp

2912 syms = p[1:]

2913 else:

2914 prodname = p[0]

2915 lastp = prodname

2916 syms = p[2:]

2917 assign = p[1]

2918 if assign != ':' and assign != '::=':

2919 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))

2920

2921 grammar.append((file, dline, prodname, syms))

2922 except SyntaxError:

2923 raise

2924 except Exception:

2925 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))

2926

2927 return grammar

2928

2929# -----------------------------------------------------------------------------

2930# ParserReflect()

2931#

2932# This class represents information extracted for building a parser including

2933# start symbol, error function, tokens, precedence list, action functions,

2934# etc.

2935# -----------------------------------------------------------------------------

2936class ParserReflect(object):

2937 def __init__(self, pdict, log=None):

2938 self.pdict = pdict

2939 self.start = None

2940 self.error_func = None

2941 self.tokens = None

2942 self.modules = set()

2943 self.grammar = []

2944 self.error = False

2945

2946 if log is None:

2947 self.log = PlyLogger(sys.stderr)

2948 else:

2949 self.log = log

2950

2951 # Get all of the basic information

2952 def get_all(self):

2953 self.get_start()

2954 self.get_error_func()

2955 self.get_tokens()

2956 self.get_precedence()

2957 self.get_pfunctions()

2958

2959 # Validate all of the information

2960 def validate_all(self):

2961 self.validate_start()

2962 self.validate_error_func()

2963 self.validate_tokens()

2964 self.validate_precedence()

2965 self.validate_pfunctions()

2966 self.validate_modules()

2967 return self.error

2968

2969 # Compute a signature over the grammar

2970 def signature(self):

2971 parts = []

2972 try:

2973 if self.start:

2974 parts.append(self.start)

2975 if self.prec:

2976 parts.append(''.join([''.join(p) for p in self.prec]))

2977 if self.tokens:

2978 parts.append(' '.join(self.tokens))

2979 for f in self.pfuncs:

2980 if f[3]:

2981 parts.append(f[3])

2982 except (TypeError, ValueError):

2983 pass

2984 return ''.join(parts)

2985

2986 # -----------------------------------------------------------------------------

2987 # validate_modules()

2988 #

2989 # This method checks to see if there are duplicated p_rulename() functions

2990 # in the parser module file. Without this function, it is really easy for

2991 # users to make mistakes by cutting and pasting code fragments (and it's a real

2992 # bugger to try and figure out why the resulting parser doesn't work). Therefore,

2993 # we just do a little regular expression pattern matching of def statements

2994 # to try and detect duplicates.

2995 # -----------------------------------------------------------------------------

2996

2997 def validate_modules(self):

2998 # Match def p_funcname(

2999 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')

3000

3001 for module in self.modules:

3002 try:

3003 lines, linen = inspect.getsourcelines(module)

3004 except IOError:

3005 continue

3006

3007 counthash = {}

3008 for linen, line in enumerate(lines):

3009 linen += 1

3010 m = fre.match(line)

3011 if m:

3012 name = m.group(1)

3013 prev = counthash.get(name)

3014 if not prev:

3015 counthash[name] = linen

3016 else:

3017 filename = inspect.getsourcefile(module)

3018 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',

3019 filename, linen, name, prev)

3020

3021 # Get the start symbol

3022 def get_start(self):

3023 self.start = self.pdict.get('start')

3024

3025 # Validate the start symbol

3026 def validate_start(self):

3027 if self.start is not None:

3028 if not isinstance(self.start, string_types):

3029 self.log.error("'start' must be a string")

3030

3031 # Look for error handler

3032 def get_error_func(self):

3033 self.error_func = self.pdict.get('p_error')

3034

3035 # Validate the error function

3036 def validate_error_func(self):

3037 if self.error_func:

3038 if isinstance(self.error_func, types.FunctionType):

3039 ismethod = 0

3040 elif isinstance(self.error_func, types.MethodType):

3041 ismethod = 1

3042 else:

3043 self.log.error("'p_error' defined, but is not a function or method")

3044 self.error = True

3045 return

3046

3047 eline = self.error_func.__code__.co_firstlineno

3048 efile = self.error_func.__code__.co_filename

3049 module = inspect.getmodule(self.error_func)

3050 self.modules.add(module)

3051

3052 argcount = self.error_func.__code__.co_argcount - ismethod

3053 if argcount != 1:

3054 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)

3055 self.error = True

3056

3057 # Get the tokens map

3058 def get_tokens(self):

3059 tokens = self.pdict.get('tokens')

3060 if not tokens:

3061 self.log.error('No token list is defined')

3062 self.error = True

3063 return

3064

3065 if not isinstance(tokens, (list, tuple)):

3066 self.log.error('tokens must be a list or tuple')

3067 self.error = True

3068 return

3069

3070 if not tokens:

3071 self.log.error('tokens is empty')

3072 self.error = True

3073 return

3074

3075 self.tokens = tokens

3076

3077 # Validate the tokens

3078 def validate_tokens(self):

3079 # Validate the tokens.

3080 if 'error' in self.tokens:

3081 self.log.error("Illegal token name 'error'. Is a reserved word")

3082 self.error = True

3083 return

3084

3085 terminals = set()

3086 for n in self.tokens:

3087 if n in terminals:

3088 self.log.warning('Token %r multiply defined', n)

3089 terminals.add(n)

3090

3091 # Get the precedence map (if any)

3092 def get_precedence(self):

3093 self.prec = self.pdict.get('precedence')

3094

3095 # Validate and parse the precedence map

3096 def validate_precedence(self):

3097 preclist = []

3098 if self.prec:

3099 if not isinstance(self.prec, (list, tuple)):

3100 self.log.error('precedence must be a list or tuple')

3101 self.error = True

3102 return

3103 for level, p in enumerate(self.prec):

3104 if not isinstance(p, (list, tuple)):

3105 self.log.error('Bad precedence table')

3106 self.error = True

3107 return

3108

3109 if len(p) < 2:

3110 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)

3111 self.error = True

3112 return

3113 assoc = p[0]

3114 if not isinstance(assoc, string_types):

3115 self.log.error('precedence associativity must be a string')

3116 self.error = True

3117 return

3118 for term in p[1:]:

3119 if not isinstance(term, string_types):

3120 self.log.error('precedence items must be strings')

3121 self.error = True

3122 return

3123 preclist.append((term, assoc, level+1))

3124 self.preclist = preclist

3125

3126 # Get all p_functions from the grammar

3127 def get_pfunctions(self):

3128 p_functions = []

3129 for name, item in self.pdict.items():

3130 if not name.startswith('p_') or name == 'p_error':

3131 continue

3132 if isinstance(item, (types.FunctionType, types.MethodType)):

3133 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)

3134 module = inspect.getmodule(item)

3135 p_functions.append((line, module, name, item.__doc__))

3136

3137 # Sort all of the actions by line number; make sure to stringify

3138 # modules to make them sortable, since `line` may not uniquely sort all

3139 # p functions

3140 p_functions.sort(key=lambda p_function: (

3141 p_function[0],

3142 str(p_function[1]),

3143 p_function[2],

3144 p_function[3]))

3145 self.pfuncs = p_functions

3146

3147 # Validate all of the p_functions

3148 def validate_pfunctions(self):

3149 grammar = []

3150 # Check for non-empty symbols

3151 if len(self.pfuncs) == 0:

3152 self.log.error('no rules of the form p_rulename are defined')

3153 self.error = True

3154 return

3155

3156 for line, module, name, doc in self.pfuncs:

3157 file = inspect.getsourcefile(module)

3158 func = self.pdict[name]

3159 if isinstance(func, types.MethodType):

3160 reqargs = 2

3161 else:

3162 reqargs = 1

3163 if func.__code__.co_argcount > reqargs:

3164 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)

3165 self.error = True

3166 elif func.__code__.co_argcount < reqargs:

3167 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)

3168 self.error = True

3169 elif not func.__doc__:

3170 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',

3171 file, line, func.__name__)

3172 else:

3173 try:

3174 parsed_g = parse_grammar(doc, file, line)

3175 for g in parsed_g:

3176 grammar.append((name, g))

3177 except SyntaxError as e:

3178 self.log.error(str(e))

3179 self.error = True

3180

3181 # Looks like a valid grammar rule

3182 # Mark the file in which defined.

3183 self.modules.add(module)

3184

3185 # Secondary validation step that looks for p_ definitions that are not functions

3186 # or functions that look like they might be grammar rules.

3187

3188 for n, v in self.pdict.items():

3189 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):

3190 continue

3191 if n.startswith('t_'):

3192 continue

3193 if n.startswith('p_') and n != 'p_error':

3194 self.log.warning('%r not defined as a function', n)

3195 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or

3196 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):

3197 if v.__doc__:

3198 try:

3199 doc = v.__doc__.split(' ')

3200 if doc[1] == ':':

3201 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',

3202 v.__code__.co_filename, v.__code__.co_firstlineno, n)

3203 except IndexError:

3204 pass

3205

3206 self.grammar = grammar

3207

3208# -----------------------------------------------------------------------------

3209# yacc(module)

3210#

3211# Build a parser

3212# -----------------------------------------------------------------------------

3213

3214def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,

3215 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,

3216 outputdir=None, debuglog=None, errorlog=None, picklefile=None):

3217

3218 if tabmodule is None:

3219 tabmodule = tab_module

3220

3221 # Reference to the parsing method of the last built parser

3222 global parse

3223

3224 # If pickling is enabled, table files are not created

3225 if picklefile:

3226 write_tables = 0

3227

3228 if errorlog is None:

3229 errorlog = PlyLogger(sys.stderr)

3230

3231 # Get the module dictionary used for the parser

3232 if module:

3233 _items = [(k, getattr(module, k)) for k in dir(module)]

3234 pdict = dict(_items)

3235 # If no __file__ attribute is available, try to obtain it from the __module__ instead

3236 if '__file__' not in pdict:

3237 pdict['__file__'] = sys.modules[pdict['__module__']].__file__

3238 else:

3239 pdict = get_caller_module_dict(2)

3240

3241 if outputdir is None:

3242 # If no output directory is set, the location of the output files

3243 # is determined according to the following rules:

3244 # - If tabmodule specifies a package, files go into that package directory

3245 # - Otherwise, files go in the same directory as the specifying module

3246 if isinstance(tabmodule, types.ModuleType):

3247 srcfile = tabmodule.__file__

3248 else:

3249 if '.' not in tabmodule:

3250 srcfile = pdict['__file__']

3251 else:

3252 parts = tabmodule.split('.')

3253 pkgname = '.'.join(parts[:-1])

3254 exec('import %s' % pkgname)

3255 srcfile = getattr(sys.modules[pkgname], '__file__', '')

3256 outputdir = os.path.dirname(srcfile)

3257

3258 # Determine if the module is package of a package or not.

3259 # If so, fix the tabmodule setting so that tables load correctly

3260 pkg = pdict.get('__package__')

3261 if pkg and isinstance(tabmodule, str):

3262 if '.' not in tabmodule:

3263 tabmodule = pkg + '.' + tabmodule

3264

3265

3266

3267 # Set start symbol if it's specified directly using an argument

3268 if start is not None:

3269 pdict['start'] = start

3270

3271 # Collect parser information from the dictionary

3272 pinfo = ParserReflect(pdict, log=errorlog)

3273 pinfo.get_all()

3274

3275 if pinfo.error:

3276 raise YaccError('Unable to build parser')

3277

3278 # Check signature against table files (if any)

3279 signature = pinfo.signature()

3280

3281 # Read the tables

3282 try:

3283 lr = LRTable()

3284 if picklefile:

3285 read_signature = lr.read_pickle(picklefile)

3286 else:

3287 read_signature = lr.read_table(tabmodule)

3288 if optimize or (read_signature == signature):

3289 try:

3290 lr.bind_callables(pinfo.pdict)

3291 parser = LRParser(lr, pinfo.error_func)

3292 parse = parser.parse

3293 return parser

3294 except Exception as e:

3295 errorlog.warning('There was a problem loading the table file: %r', e)

3296 except VersionError as e:

3297 errorlog.warning(str(e))

3298 except ImportError:

3299 pass

3300

3301 if debuglog is None:

3302 if debug:

3303 try:

3304 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))

3305 except IOError as e:

3306 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))

3307 debuglog = NullLogger()

3308 else:

3309 debuglog = NullLogger()

3310

3311 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)

3312

3313 errors = False

3314

3315 # Validate the parser information

3316 if pinfo.validate_all():

3317 raise YaccError('Unable to build parser')

3318

3319 if not pinfo.error_func:

3320 errorlog.warning('no p_error() function is defined')

3321

3322 # Create a grammar object

3323 grammar = Grammar(pinfo.tokens)

3324

3325 # Set precedence level for terminals

3326 for term, assoc, level in pinfo.preclist:

3327 try:

3328 grammar.set_precedence(term, assoc, level)

3329 except GrammarError as e:

3330 errorlog.warning('%s', e)

3331

3332 # Add productions to the grammar

3333 for funcname, gram in pinfo.grammar:

3334 file, line, prodname, syms = gram

3335 try:

3336 grammar.add_production(prodname, syms, funcname, file, line)

3337 except GrammarError as e:

3338 errorlog.error('%s', e)

3339 errors = True

3340

3341 # Set the grammar start symbols

3342 try:

3343 if start is None:

3344 grammar.set_start(pinfo.start)

3345 else:

3346 grammar.set_start(start)

3347 except GrammarError as e:

3348 errorlog.error(str(e))

3349 errors = True

3350

3351 if errors:

3352 raise YaccError('Unable to build parser')

3353

3354 # Verify the grammar structure

3355 undefined_symbols = grammar.undefined_symbols()

3356 for sym, prod in undefined_symbols:

3357 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)

3358 errors = True

3359

3360 unused_terminals = grammar.unused_terminals()

3361 if unused_terminals:

3362 debuglog.info('')

3363 debuglog.info('Unused terminals:')

3364 debuglog.info('')

3365 for term in unused_terminals:

3366 errorlog.warning('Token %r defined, but not used', term)

3367 debuglog.info(' %s', term)

3368

3369 # Print out all productions to the debug log

3370 if debug:

3371 debuglog.info('')

3372 debuglog.info('Grammar')

3373 debuglog.info('')

3374 for n, p in enumerate(grammar.Productions):

3375 debuglog.info('Rule %-5d %s', n, p)

3376

3377 # Find unused non-terminals

3378 unused_rules = grammar.unused_rules()

3379 for prod in unused_rules:

3380 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)

3381

3382 if len(unused_terminals) == 1:

3383 errorlog.warning('There is 1 unused token')

3384 if len(unused_terminals) > 1:

3385 errorlog.warning('There are %d unused tokens', len(unused_terminals))

3386

3387 if len(unused_rules) == 1:

3388 errorlog.warning('There is 1 unused rule')

3389 if len(unused_rules) > 1:

3390 errorlog.warning('There are %d unused rules', len(unused_rules))

3391

3392 if debug:

3393 debuglog.info('')

3394 debuglog.info('Terminals, with rules where they appear')

3395 debuglog.info('')

3396 terms = list(grammar.Terminals)

3397 terms.sort()

3398 for term in terms:

3399 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))

3400

3401 debuglog.info('')

3402 debuglog.info('Nonterminals, with rules where they appear')

3403 debuglog.info('')

3404 nonterms = list(grammar.Nonterminals)

3405 nonterms.sort()

3406 for nonterm in nonterms:

3407 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))

3408 debuglog.info('')

3409

3410 if check_recursion:

3411 unreachable = grammar.find_unreachable()

3412 for u in unreachable:

3413 errorlog.warning('Symbol %r is unreachable', u)

3414

3415 infinite = grammar.infinite_cycles()

3416 for inf in infinite:

3417 errorlog.error('Infinite recursion detected for symbol %r', inf)

3418 errors = True

3419

3420 unused_prec = grammar.unused_precedence()

3421 for term, assoc in unused_prec:

3422 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)

3423 errors = True

3424

3425 if errors:

3426 raise YaccError('Unable to build parser')

3427

3428 # Run the LRGeneratedTable on the grammar

3429 if debug:

3430 errorlog.debug('Generating %s tables', method)

3431

3432 lr = LRGeneratedTable(grammar, method, debuglog)

3433

3434 if debug:

3435 num_sr = len(lr.sr_conflicts)

3436

3437 # Report shift/reduce and reduce/reduce conflicts

3438 if num_sr == 1:

3439 errorlog.warning('1 shift/reduce conflict')

3440 elif num_sr > 1:

3441 errorlog.warning('%d shift/reduce conflicts', num_sr)

3442

3443 num_rr = len(lr.rr_conflicts)

3444 if num_rr == 1:

3445 errorlog.warning('1 reduce/reduce conflict')

3446 elif num_rr > 1:

3447 errorlog.warning('%d reduce/reduce conflicts', num_rr)

3448

3449 # Write out conflicts to the output file

3450 if debug and (lr.sr_conflicts or lr.rr_conflicts):

3451 debuglog.warning('')

3452 debuglog.warning('Conflicts:')

3453 debuglog.warning('')

3454

3455 for state, tok, resolution in lr.sr_conflicts:

3456 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)

3457

3458 already_reported = set()

3459 for state, rule, rejected in lr.rr_conflicts:

3460 if (state, id(rule), id(rejected)) in already_reported:

3461 continue

3462 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)

3463 debuglog.warning('rejected rule (%s) in state %d', rejected, state)

3464 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)

3465 errorlog.warning('rejected rule (%s) in state %d', rejected, state)

3466 already_reported.add((state, id(rule), id(rejected)))

3467

3468 warned_never = []

3469 for state, rule, rejected in lr.rr_conflicts:

3470 if not rejected.reduced and (rejected not in warned_never):

3471 debuglog.warning('Rule (%s) is never reduced', rejected)

3472 errorlog.warning('Rule (%s) is never reduced', rejected)

3473 warned_never.append(rejected)

3474

3475 # Write the table file if requested

3476 if write_tables:

3477 try:

3478 lr.write_table(tabmodule, outputdir, signature)

3479 except IOError as e:

3480 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))

3481

3482 # Write a pickled version of the tables

3483 if picklefile:

3484 try:

3485 lr.pickle_table(picklefile, signature)

3486 except IOError as e:

3487 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))

3488

3489 # Build the parser

3490 lr.bind_callables(pinfo.pdict)

3491 parser = LRParser(lr, pinfo.error_func)

3492

3493 parse = parser.parse

3494 return parser