Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/ply/yacc.py: 67%

1# -----------------------------------------------------------------------------

2# ply: yacc.py

5# David M. Beazley (Dabeaz LLC)

8# Latest version: https://github.com/dabeaz/ply

10# Redistribution and use in source and binary forms, with or without

11# modification, are permitted provided that the following conditions are

12# met:

13#

14# * Redistributions of source code must retain the above copyright notice,

15# this list of conditions and the following disclaimer.

16# * Redistributions in binary form must reproduce the above copyright notice,

17# this list of conditions and the following disclaimer in the documentation

18# and/or other materials provided with the distribution.

19# * Neither the name of David Beazley or Dabeaz LLC may be used to

20# endorse or promote products derived from this software without

21# specific prior written permission.

22#

23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

24# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

25# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

26# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

27# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

28# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

29# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

30# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

31# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

32# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

33# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

34# -----------------------------------------------------------------------------

35#

36# This implements an LR parser that is constructed from grammar rules defined

37# as Python functions. The grammar is specified by supplying the BNF inside

38# Python documentation strings. The inspiration for this technique was borrowed

39# from John Aycock's Spark parsing system. PLY might be viewed as cross between

40# Spark and the GNU bison utility.

41#

42# The current implementation is only somewhat object-oriented. The

43# LR parser itself is defined in terms of an object (which allows multiple

44# parsers to co-exist). However, most of the variables used during table

45# construction are defined in terms of global variables. Users shouldn't

46# notice unless they are trying to define multiple parsers at the same

47# time using threads (in which case they should have their head examined).

48#

49# This implementation supports both SLR and LALR(1) parsing. LALR(1)

50# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),

51# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,

52# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced

53# by the more efficient DeRemer and Pennello algorithm.

54#

55# :::::::: WARNING :::::::

56#

57# Construction of LR parsing tables is fairly complicated and expensive.

58# To make this module run fast, a *LOT* of work has been put into

59# optimization---often at the expensive of readability and what might

60# consider to be good Python "coding style." Modify the code at your

61# own risk!

62# ----------------------------------------------------------------------------

64import re

65import types

66import sys

67import inspect

69#-----------------------------------------------------------------------------

70# === User configurable parameters ===

71#

72# Change these to modify the default behavior of yacc (if you wish)

73#-----------------------------------------------------------------------------

75yaccdebug = False # Debugging mode. If set, yacc generates a

76 # a 'parser.out' file in the current directory

78debug_file = 'parser.out' # Default name of the debugging file

79error_count = 3 # Number of symbols that must be shifted to leave recovery mode

80resultlimit = 40 # Size limit of results when running in debug mode.

82MAXINT = sys.maxsize

84# This object is a stand-in for a logging object created by the

85# logging module. PLY will use this by default to create things

86# such as the parser.out file. If a user wants more detailed

87# information, they can create their own logging object and pass

88# it into PLY.

90class PlyLogger(object):

91 def __init__(self, f):

92 self.f = f

94 def debug(self, msg, *args, **kwargs):

95 self.f.write((msg % args) + '\n')

97 info = debug

99 def warning(self, msg, *args, **kwargs):

100 self.f.write('WARNING: ' + (msg % args) + '\n')

101

102 def error(self, msg, *args, **kwargs):

103 self.f.write('ERROR: ' + (msg % args) + '\n')

104

105 critical = debug

106

107# Null logger is used when no output is generated. Does nothing.

108class NullLogger(object):

109 def __getattribute__(self, name):

110 return self

111

112 def __call__(self, *args, **kwargs):

113 return self

114

115# Exception raised for yacc-related errors

116class YaccError(Exception):

117 pass

118

119# Format the result message that the parser produces when running in debug mode.

120def format_result(r):

121 repr_str = repr(r)

122 if '\n' in repr_str:

123 repr_str = repr(repr_str)

124 if len(repr_str) > resultlimit:

125 repr_str = repr_str[:resultlimit] + ' ...'

126 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)

127 return result

128

129# Format stack entries when the parser is running in debug mode

130def format_stack_entry(r):

131 repr_str = repr(r)

132 if '\n' in repr_str:

133 repr_str = repr(repr_str)

134 if len(repr_str) < 16:

135 return repr_str

136 else:

137 return '<%s @ 0x%x>' % (type(r).__name__, id(r))

138

139#-----------------------------------------------------------------------------

140# === LR Parsing Engine ===

141#

142# The following classes are used for the LR parser itself. These are not

143# used during table construction and are independent of the actual LR

144# table generation algorithm

145#-----------------------------------------------------------------------------

146

147# This class is used to hold non-terminal grammar symbols during parsing.

148# It normally has the following attributes set:

149# .type = Grammar symbol type

150# .value = Symbol value

151# .lineno = Starting line number

152# .endlineno = Ending line number (optional, set automatically)

153# .lexpos = Starting lex position

154# .endlexpos = Ending lex position (optional, set automatically)

155

156class YaccSymbol:

157 def __str__(self):

158 return self.type

159

160 def __repr__(self):

161 return str(self)

162

163# This class is a wrapper around the objects actually passed to each

164# grammar rule. Index lookup and assignment actually assign the

165# .value attribute of the underlying YaccSymbol object.

166# The lineno() method returns the line number of a given

167# item (or 0 if not defined). The linespan() method returns

168# a tuple of (startline,endline) representing the range of lines

169# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)

170# representing the range of positional information for a symbol.

171

172class YaccProduction:

173 def __init__(self, s, stack=None):

174 self.slice = s

175 self.stack = stack

176 self.lexer = None

177 self.parser = None

178

179 def __getitem__(self, n):

180 if isinstance(n, slice):

181 return [s.value for s in self.slice[n]]

182 elif n >= 0:

183 return self.slice[n].value

184 else:

185 return self.stack[n].value

186

187 def __setitem__(self, n, v):

188 self.slice[n].value = v

189

190 def __getslice__(self, i, j):

191 return [s.value for s in self.slice[i:j]]

192

193 def __len__(self):

194 return len(self.slice)

195

196 def lineno(self, n):

197 return getattr(self.slice[n], 'lineno', 0)

198

199 def set_lineno(self, n, lineno):

200 self.slice[n].lineno = lineno

201

202 def linespan(self, n):

203 startline = getattr(self.slice[n], 'lineno', 0)

204 endline = getattr(self.slice[n], 'endlineno', startline)

205 return startline, endline

206

207 def lexpos(self, n):

208 return getattr(self.slice[n], 'lexpos', 0)

209

210 def set_lexpos(self, n, lexpos):

211 self.slice[n].lexpos = lexpos

212

213 def lexspan(self, n):

214 startpos = getattr(self.slice[n], 'lexpos', 0)

215 endpos = getattr(self.slice[n], 'endlexpos', startpos)

216 return startpos, endpos

217

218 def error(self):

219 raise SyntaxError

220

221# -----------------------------------------------------------------------------

222# == LRParser ==

223#

224# The LR Parsing engine.

225# -----------------------------------------------------------------------------

226

227class LRParser:

228 def __init__(self, lrtab, errorf):

229 self.productions = lrtab.lr_productions

230 self.action = lrtab.lr_action

231 self.goto = lrtab.lr_goto

232 self.errorfunc = errorf

233 self.set_defaulted_states()

234 self.errorok = True

235

236 def errok(self):

237 self.errorok = True

238

239 def restart(self):

240 del self.statestack[:]

241 del self.symstack[:]

242 sym = YaccSymbol()

243 sym.type = '$end'

244 self.symstack.append(sym)

245 self.statestack.append(0)

246

247 # Defaulted state support.

248 # This method identifies parser states where there is only one possible reduction action.

249 # For such states, the parser can make a choose to make a rule reduction without consuming

250 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in

251 # certain kinds of advanced parsing situations where the lexer and parser interact with

252 # each other or change states (i.e., manipulation of scope, lexer states, etc.).

253 #

254 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions

255 def set_defaulted_states(self):

256 self.defaulted_states = {}

257 for state, actions in self.action.items():

258 rules = list(actions.values())

259 if len(rules) == 1 and rules[0] < 0:

260 self.defaulted_states[state] = rules[0]

261

262 def disable_defaulted_states(self):

263 self.defaulted_states = {}

264

265 # parse().

266 #

267 # This is the core parsing engine. To operate, it requires a lexer object.

268 # Two options are provided. The debug flag turns on debugging so that you can

269 # see the various rule reductions and parsing steps. tracking turns on position

270 # tracking. In this mode, symbols will record the starting/ending line number and

271 # character index.

272

273 def parse(self, input=None, lexer=None, debug=False, tracking=False):

274 # If debugging has been specified as a flag, turn it into a logging object

275 if isinstance(debug, int) and debug:

276 debug = PlyLogger(sys.stderr)

277

278 lookahead = None # Current lookahead symbol

279 lookaheadstack = [] # Stack of lookahead symbols

280 actions = self.action # Local reference to action table (to avoid lookup on self.)

281 goto = self.goto # Local reference to goto table (to avoid lookup on self.)

282 prod = self.productions # Local reference to production list (to avoid lookup on self.)

283 defaulted_states = self.defaulted_states # Local reference to defaulted states

284 pslice = YaccProduction(None) # Production object passed to grammar rules

285 errorcount = 0 # Used during error recovery

286

287 if debug:

288 debug.info('PLY: PARSE DEBUG START')

289

290 # If no lexer was given, we will try to use the lex module

291 if not lexer:

292 from . import lex

293 lexer = lex.lexer

294

295 # Set up the lexer and parser objects on pslice

296 pslice.lexer = lexer

297 pslice.parser = self

298

299 # If input was supplied, pass to lexer

300 if input is not None:

301 lexer.input(input)

302

303 # Set the token function

304 get_token = self.token = lexer.token

305

306 # Set up the state and symbol stacks

307 statestack = self.statestack = [] # Stack of parsing states

308 symstack = self.symstack = [] # Stack of grammar symbols

309 pslice.stack = symstack # Put in the production

310 errtoken = None # Err token

311

312 # The start state is assumed to be (0,$end)

313

314 statestack.append(0)

315 sym = YaccSymbol()

316 sym.type = '$end'

317 symstack.append(sym)

318 state = 0

319 while True:

320 # Get the next symbol on the input. If a lookahead symbol

321 # is already set, we just use that. Otherwise, we'll pull

322 # the next token off of the lookaheadstack or from the lexer

323

324 if debug:

325 debug.debug('State : %s', state)

326

327 if state not in defaulted_states:

328 if not lookahead:

329 if not lookaheadstack:

330 lookahead = get_token() # Get the next token

331 else:

332 lookahead = lookaheadstack.pop()

333 if not lookahead:

334 lookahead = YaccSymbol()

335 lookahead.type = '$end'

336

337 # Check the action table

338 ltype = lookahead.type

339 t = actions[state].get(ltype)

340 else:

341 t = defaulted_states[state]

342 if debug:

343 debug.debug('Defaulted state %s: Reduce using %d', state, -t)

344

345 if debug:

346 debug.debug('Stack : %s',

347 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())

348

349 if t is not None:

350 if t > 0:

351 # shift a symbol on the stack

352 statestack.append(t)

353 state = t

354

355 if debug:

356 debug.debug('Action : Shift and goto state %s', t)

357

358 symstack.append(lookahead)

359 lookahead = None

360

361 # Decrease error count on successful shift

362 if errorcount:

363 errorcount -= 1

364 continue

365

366 if t < 0:

367 # reduce a symbol on the stack, emit a production

368 p = prod[-t]

369 pname = p.name

370 plen = p.len

371

372 # Get production function

373 sym = YaccSymbol()

374 sym.type = pname # Production name

375 sym.value = None

376

377 if debug:

378 if plen:

379 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,

380 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',

381 goto[statestack[-1-plen]][pname])

382 else:

383 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],

384 goto[statestack[-1]][pname])

385

386 if plen:

387 targ = symstack[-plen-1:]

388 targ[0] = sym

389

390 if tracking:

391 t1 = targ[1]

392 sym.lineno = t1.lineno

393 sym.lexpos = t1.lexpos

394 t1 = targ[-1]

395 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)

396 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)

397

398 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

399 # The code enclosed in this section is duplicated

400 # below as a performance optimization. Make sure

401 # changes get made in both locations.

402

403 pslice.slice = targ

404

405 try:

406 # Call the grammar rule with our special slice object

407 del symstack[-plen:]

408 self.state = state

409 p.callable(pslice)

410 del statestack[-plen:]

411 if debug:

412 debug.info('Result : %s', format_result(pslice[0]))

413 symstack.append(sym)

414 state = goto[statestack[-1]][pname]

415 statestack.append(state)

416 except SyntaxError:

417 # If an error was set. Enter error recovery state

418 lookaheadstack.append(lookahead) # Save the current lookahead token

419 symstack.extend(targ[1:-1]) # Put the production slice back on the stack

420 statestack.pop() # Pop back one state (before the reduce)

421 state = statestack[-1]

422 sym.type = 'error'

423 sym.value = 'error'

424 lookahead = sym

425 errorcount = error_count

426 self.errorok = False

427

428 continue

429

430 else:

431

432 if tracking:

433 sym.lineno = lexer.lineno

434 sym.lexpos = lexer.lexpos

435

436 targ = [sym]

437

438 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

439 # The code enclosed in this section is duplicated

440 # above as a performance optimization. Make sure

441 # changes get made in both locations.

442

443 pslice.slice = targ

444

445 try:

446 # Call the grammar rule with our special slice object

447 self.state = state

448 p.callable(pslice)

449 if debug:

450 debug.info('Result : %s', format_result(pslice[0]))

451 symstack.append(sym)

452 state = goto[statestack[-1]][pname]

453 statestack.append(state)

454 except SyntaxError:

455 # If an error was set. Enter error recovery state

456 lookaheadstack.append(lookahead) # Save the current lookahead token

457 statestack.pop() # Pop back one state (before the reduce)

458 state = statestack[-1]

459 sym.type = 'error'

460 sym.value = 'error'

461 lookahead = sym

462 errorcount = error_count

463 self.errorok = False

464

465 continue

466

467 if t == 0:

468 n = symstack[-1]

469 result = getattr(n, 'value', None)

470

471 if debug:

472 debug.info('Done : Returning %s', format_result(result))

473 debug.info('PLY: PARSE DEBUG END')

474

475 return result

476

477 if t is None:

478

479 if debug:

480 debug.error('Error : %s',

481 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())

482

483 # We have some kind of parsing error here. To handle

484 # this, we are going to push the current token onto

485 # the tokenstack and replace it with an 'error' token.

486 # If there are any synchronization rules, they may

487 # catch it.

488 #

489 # In addition to pushing the error token, we call call

490 # the user defined p_error() function if this is the

491 # first syntax error. This function is only called if

492 # errorcount == 0.

493 if errorcount == 0 or self.errorok:

494 errorcount = error_count

495 self.errorok = False

496 errtoken = lookahead

497 if errtoken.type == '$end':

498 errtoken = None # End of file!

499 if self.errorfunc:

500 if errtoken and not hasattr(errtoken, 'lexer'):

501 errtoken.lexer = lexer

502 self.state = state

503 tok = self.errorfunc(errtoken)

504 if self.errorok:

505 # User must have done some kind of panic

506 # mode recovery on their own. The

507 # returned token is the next lookahead

508 lookahead = tok

509 errtoken = None

510 continue

511 else:

512 if errtoken:

513 if hasattr(errtoken, 'lineno'):

514 lineno = lookahead.lineno

515 else:

516 lineno = 0

517 if lineno:

518 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))

519 else:

520 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)

521 else:

522 sys.stderr.write('yacc: Parse error in input. EOF\n')

523 return

524

525 else:

526 errorcount = error_count

527

528 # case 1: the statestack only has 1 entry on it. If we're in this state, the

529 # entire parse has been rolled back and we're completely hosed. The token is

530 # discarded and we just keep going.

531

532 if len(statestack) <= 1 and lookahead.type != '$end':

533 lookahead = None

534 errtoken = None

535 state = 0

536 # Nuke the pushback stack

537 del lookaheadstack[:]

538 continue

539

540 # case 2: the statestack has a couple of entries on it, but we're

541 # at the end of the file. nuke the top entry and generate an error token

542

543 # Start nuking entries on the stack

544 if lookahead.type == '$end':

545 # Whoa. We're really hosed here. Bail out

546 return

547

548 if lookahead.type != 'error':

549 sym = symstack[-1]

550 if sym.type == 'error':

551 # Hmmm. Error is on top of stack, we'll just nuke input

552 # symbol and continue

553 if tracking:

554 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)

555 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)

556 lookahead = None

557 continue

558

559 # Create the error symbol for the first time and make it the new lookahead symbol

560 t = YaccSymbol()

561 t.type = 'error'

562

563 if hasattr(lookahead, 'lineno'):

564 t.lineno = t.endlineno = lookahead.lineno

565 if hasattr(lookahead, 'lexpos'):

566 t.lexpos = t.endlexpos = lookahead.lexpos

567 t.value = lookahead

568 lookaheadstack.append(lookahead)

569 lookahead = t

570 else:

571 sym = symstack.pop()

572 if tracking:

573 lookahead.lineno = sym.lineno

574 lookahead.lexpos = sym.lexpos

575 statestack.pop()

576 state = statestack[-1]

577

578 continue

579

580 # If we'r here, something really bad happened

581 raise RuntimeError('yacc: internal parser error!!!\n')

582

583# -----------------------------------------------------------------------------

584# === Grammar Representation ===

585#

586# The following functions, classes, and variables are used to represent and

587# manipulate the rules that make up a grammar.

588# -----------------------------------------------------------------------------

589

590# regex matching identifiers

591_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

592

593# -----------------------------------------------------------------------------

594# class Production:

595#

596# This class stores the raw information about a single production or grammar rule.

597# A grammar rule refers to a specification such as this:

598#

599# expr : expr PLUS term

600#

601# Here are the basic attributes defined on all productions

602#

603# name - Name of the production. For example 'expr'

604# prod - A list of symbols on the right side ['expr','PLUS','term']

605# prec - Production precedence level

606# number - Production number.

607# func - Function that executes on reduce

608# file - File where production function is defined

609# lineno - Line number where production function is defined

610#

611# The following attributes are defined or optional.

612#

613# len - Length of the production (number of symbols on right hand side)

614# usyms - Set of unique symbols found in the production

615# -----------------------------------------------------------------------------

616

617class Production(object):

618 reduced = 0

619 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):

620 self.name = name

621 self.prod = tuple(prod)

622 self.number = number

623 self.func = func

624 self.callable = None

625 self.file = file

626 self.line = line

627 self.prec = precedence

628

629 # Internal settings used during table construction

630

631 self.len = len(self.prod) # Length of the production

632

633 # Create a list of unique production symbols used in the production

634 self.usyms = []

635 for s in self.prod:

636 if s not in self.usyms:

637 self.usyms.append(s)

638

639 # List of all LR items for the production

640 self.lr_items = []

641 self.lr_next = None

642

643 # Create a string representation

644 if self.prod:

645 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))

646 else:

647 self.str = '%s -> <empty>' % self.name

648

649 def __str__(self):

650 return self.str

651

652 def __repr__(self):

653 return 'Production(' + str(self) + ')'

654

655 def __len__(self):

656 return len(self.prod)

657

658 def __nonzero__(self):

659 return 1

660

661 def __getitem__(self, index):

662 return self.prod[index]

663

664 # Return the nth lr_item from the production (or None if at the end)

665 def lr_item(self, n):

666 if n > len(self.prod):

667 return None

668 p = LRItem(self, n)

669 # Precompute the list of productions immediately following.

670 try:

671 p.lr_after = self.Prodnames[p.prod[n+1]]

672 except (IndexError, KeyError):

673 p.lr_after = []

674 try:

675 p.lr_before = p.prod[n-1]

676 except IndexError:

677 p.lr_before = None

678 return p

679

680 # Bind the production function name to a callable

681 def bind(self, pdict):

682 if self.func:

683 self.callable = pdict[self.func]

684

685# -----------------------------------------------------------------------------

686# class LRItem

687#

688# This class represents a specific stage of parsing a production rule. For

689# example:

690#

691# expr : expr . PLUS term

692#

693# In the above, the "." represents the current location of the parse. Here

694# basic attributes:

695#

696# name - Name of the production. For example 'expr'

697# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']

698# number - Production number.

699#

700# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'

701# then lr_next refers to 'expr -> expr PLUS . term'

702# lr_index - LR item index (location of the ".") in the prod list.

703# lookaheads - LALR lookahead symbols for this item

704# len - Length of the production (number of symbols on right hand side)

705# lr_after - List of all productions that immediately follow

706# lr_before - Grammar symbol immediately before

707# -----------------------------------------------------------------------------

708

709class LRItem(object):

710 def __init__(self, p, n):

711 self.name = p.name

712 self.prod = list(p.prod)

713 self.number = p.number

714 self.lr_index = n

715 self.lookaheads = {}

716 self.prod.insert(n, '.')

717 self.prod = tuple(self.prod)

718 self.len = len(self.prod)

719 self.usyms = p.usyms

720

721 def __str__(self):

722 if self.prod:

723 s = '%s -> %s' % (self.name, ' '.join(self.prod))

724 else:

725 s = '%s -> <empty>' % self.name

726 return s

727

728 def __repr__(self):

729 return 'LRItem(' + str(self) + ')'

730

731# -----------------------------------------------------------------------------

732# rightmost_terminal()

733#

734# Return the rightmost terminal from a list of symbols. Used in add_production()

735# -----------------------------------------------------------------------------

736def rightmost_terminal(symbols, terminals):

737 i = len(symbols) - 1

738 while i >= 0:

739 if symbols[i] in terminals:

740 return symbols[i]

741 i -= 1

742 return None

743

744# -----------------------------------------------------------------------------

745# === GRAMMAR CLASS ===

746#

747# The following class represents the contents of the specified grammar along

748# with various computed properties such as first sets, follow sets, LR items, etc.

749# This data is used for critical parts of the table generation process later.

750# -----------------------------------------------------------------------------

751

752class GrammarError(YaccError):

753 pass

754

755class Grammar(object):

756 def __init__(self, terminals):

757 self.Productions = [None] # A list of all of the productions. The first

758 # entry is always reserved for the purpose of

759 # building an augmented grammar

760

761 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all

762 # productions of that nonterminal.

763

764 self.Prodmap = {} # A dictionary that is only used to detect duplicate

765 # productions.

766

767 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a

768 # list of the rules where they are used.

769

770 for term in terminals:

771 self.Terminals[term] = []

772

773 self.Terminals['error'] = []

774

775 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list

776 # of rule numbers where they are used.

777

778 self.First = {} # A dictionary of precomputed FIRST(x) symbols

779

780 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols

781

782 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the

783 # form ('right',level) or ('nonassoc', level) or ('left',level)

784

785 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.

786 # This is only used to provide error checking and to generate

787 # a warning about unused precedence rules.

788

789 self.Start = None # Starting symbol for the grammar

790

791

792 def __len__(self):

793 return len(self.Productions)

794

795 def __getitem__(self, index):

796 return self.Productions[index]

797

798 # -----------------------------------------------------------------------------

799 # set_precedence()

800 #

801 # Sets the precedence for a given terminal. assoc is the associativity such as

802 # 'left','right', or 'nonassoc'. level is a numeric level.

803 #

804 # -----------------------------------------------------------------------------

805

806 def set_precedence(self, term, assoc, level):

807 assert self.Productions == [None], 'Must call set_precedence() before add_production()'

808 if term in self.Precedence:

809 raise GrammarError('Precedence already specified for terminal %r' % term)

810 if assoc not in ['left', 'right', 'nonassoc']:

811 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

812 self.Precedence[term] = (assoc, level)

813

814 # -----------------------------------------------------------------------------

815 # add_production()

816 #

817 # Given an action function, this function assembles a production rule and

818 # computes its precedence level.

819 #

820 # The production rule is supplied as a list of symbols. For example,

821 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

822 # symbols ['expr','PLUS','term'].

823 #

824 # Precedence is determined by the precedence of the right-most non-terminal

825 # or the precedence of a terminal specified by %prec.

826 #

827 # A variety of error checks are performed to make sure production symbols

828 # are valid and that %prec is used correctly.

829 # -----------------------------------------------------------------------------

830

831 def add_production(self, prodname, syms, func=None, file='', line=0):

832

833 if prodname in self.Terminals:

834 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))

835 if prodname == 'error':

836 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))

837 if not _is_identifier.match(prodname):

838 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))

839

840 # Look for literal tokens

841 for n, s in enumerate(syms):

842 if s[0] in "'\"":

843 try:

844 c = eval(s)

845 if (len(c) > 1):

846 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %

847 (file, line, s, prodname))

848 if c not in self.Terminals:

849 self.Terminals[c] = []

850 syms[n] = c

851 continue

852 except SyntaxError:

853 pass

854 if not _is_identifier.match(s) and s != '%prec':

855 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))

856

857 # Determine the precedence level

858 if '%prec' in syms:

859 if syms[-1] == '%prec':

860 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))

861 if syms[-2] != '%prec':

862 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %

863 (file, line))

864 precname = syms[-1]

865 prodprec = self.Precedence.get(precname)

866 if not prodprec:

867 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))

868 else:

869 self.UsedPrecedence.add(precname)

870 del syms[-2:] # Drop %prec from the rule

871 else:

872 # If no %prec, precedence is determined by the rightmost terminal symbol

873 precname = rightmost_terminal(syms, self.Terminals)

874 prodprec = self.Precedence.get(precname, ('right', 0))

875

876 # See if the rule is already in the rulemap

877 map = '%s -> %s' % (prodname, syms)

878 if map in self.Prodmap:

879 m = self.Prodmap[map]

880 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +

881 'Previous definition at %s:%d' % (m.file, m.line))

882

883 # From this point on, everything is valid. Create a new Production instance

884 pnumber = len(self.Productions)

885 if prodname not in self.Nonterminals:

886 self.Nonterminals[prodname] = []

887

888 # Add the production number to Terminals and Nonterminals

889 for t in syms:

890 if t in self.Terminals:

891 self.Terminals[t].append(pnumber)

892 else:

893 if t not in self.Nonterminals:

894 self.Nonterminals[t] = []

895 self.Nonterminals[t].append(pnumber)

896

897 # Create a production and add it to the list of productions

898 p = Production(pnumber, prodname, syms, prodprec, func, file, line)

899 self.Productions.append(p)

900 self.Prodmap[map] = p

901

902 # Add to the global productions list

903 try:

904 self.Prodnames[prodname].append(p)

905 except KeyError:

906 self.Prodnames[prodname] = [p]

907

908 # -----------------------------------------------------------------------------

909 # set_start()

910 #

911 # Sets the starting symbol and creates the augmented grammar. Production

912 # rule 0 is S' -> start where start is the start symbol.

913 # -----------------------------------------------------------------------------

914

915 def set_start(self, start=None):

916 if not start:

917 start = self.Productions[1].name

918 if start not in self.Nonterminals:

919 raise GrammarError('start symbol %s undefined' % start)

920 self.Productions[0] = Production(0, "S'", [start])

921 self.Nonterminals[start].append(0)

922 self.Start = start

923

924 # -----------------------------------------------------------------------------

925 # find_unreachable()

926 #

927 # Find all of the nonterminal symbols that can't be reached from the starting

928 # symbol. Returns a list of nonterminals that can't be reached.

929 # -----------------------------------------------------------------------------

930

931 def find_unreachable(self):

932

933 # Mark all symbols that are reachable from a symbol s

934 def mark_reachable_from(s):

935 if s in reachable:

936 return

937 reachable.add(s)

938 for p in self.Prodnames.get(s, []):

939 for r in p.prod:

940 mark_reachable_from(r)

941

942 reachable = set()

943 mark_reachable_from(self.Productions[0].prod[0])

944 return [s for s in self.Nonterminals if s not in reachable]

945

946 # -----------------------------------------------------------------------------

947 # infinite_cycles()

948 #

949 # This function looks at the various parsing rules and tries to detect

950 # infinite recursion cycles (grammar rules where there is no possible way

951 # to derive a string of only terminals).

952 # -----------------------------------------------------------------------------

953

954 def infinite_cycles(self):

955 terminates = {}

956

957 # Terminals:

958 for t in self.Terminals:

959 terminates[t] = True

960

961 terminates['$end'] = True

962

963 # Nonterminals:

964

965 # Initialize to false:

966 for n in self.Nonterminals:

967 terminates[n] = False

968

969 # Then propagate termination until no change:

970 while True:

971 some_change = False

972 for (n, pl) in self.Prodnames.items():

973 # Nonterminal n terminates iff any of its productions terminates.

974 for p in pl:

975 # Production p terminates iff all of its rhs symbols terminate.

976 for s in p.prod:

977 if not terminates[s]:

978 # The symbol s does not terminate,

979 # so production p does not terminate.

980 p_terminates = False

981 break

982 else:

983 # didn't break from the loop,

984 # so every symbol s terminates

985 # so production p terminates.

986 p_terminates = True

987

988 if p_terminates:

989 # symbol n terminates!

990 if not terminates[n]:

991 terminates[n] = True

992 some_change = True

993 # Don't need to consider any more productions for this n.

994 break

995

996 if not some_change:

997 break

998

999 infinite = []

1000 for (s, term) in terminates.items():

1001 if not term:

1002 if s not in self.Prodnames and s not in self.Terminals and s != 'error':

1003 # s is used-but-not-defined, and we've already warned of that,

1004 # so it would be overkill to say that it's also non-terminating.

1005 pass

1006 else:

1007 infinite.append(s)

1008

1009 return infinite

1010

1011 # -----------------------------------------------------------------------------

1012 # undefined_symbols()

1013 #

1014 # Find all symbols that were used the grammar, but not defined as tokens or

1015 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol

1016 # and prod is the production where the symbol was used.

1017 # -----------------------------------------------------------------------------

1018 def undefined_symbols(self):

1019 result = []

1020 for p in self.Productions:

1021 if not p:

1022 continue

1023

1024 for s in p.prod:

1025 if s not in self.Prodnames and s not in self.Terminals and s != 'error':

1026 result.append((s, p))

1027 return result

1028

1029 # -----------------------------------------------------------------------------

1030 # unused_terminals()

1031 #

1032 # Find all terminals that were defined, but not used by the grammar. Returns

1033 # a list of all symbols.

1034 # -----------------------------------------------------------------------------

1035 def unused_terminals(self):

1036 unused_tok = []

1037 for s, v in self.Terminals.items():

1038 if s != 'error' and not v:

1039 unused_tok.append(s)

1040

1041 return unused_tok

1042

1043 # ------------------------------------------------------------------------------

1044 # unused_rules()

1045 #

1046 # Find all grammar rules that were defined, but not used (maybe not reachable)

1047 # Returns a list of productions.

1048 # ------------------------------------------------------------------------------

1049

1050 def unused_rules(self):

1051 unused_prod = []

1052 for s, v in self.Nonterminals.items():

1053 if not v:

1054 p = self.Prodnames[s][0]

1055 unused_prod.append(p)

1056 return unused_prod

1057

1058 # -----------------------------------------------------------------------------

1059 # unused_precedence()

1060 #

1061 # Returns a list of tuples (term,precedence) corresponding to precedence

1062 # rules that were never used by the grammar. term is the name of the terminal

1063 # on which precedence was applied and precedence is a string such as 'left' or

1064 # 'right' corresponding to the type of precedence.

1065 # -----------------------------------------------------------------------------

1066

1067 def unused_precedence(self):

1068 unused = []

1069 for termname in self.Precedence:

1070 if not (termname in self.Terminals or termname in self.UsedPrecedence):

1071 unused.append((termname, self.Precedence[termname][0]))

1072

1073 return unused

1074

1075 # -------------------------------------------------------------------------

1076 # _first()

1077 #

1078 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.

1079 #

1080 # During execution of compute_first1, the result may be incomplete.

1081 # Afterward (e.g., when called from compute_follow()), it will be complete.

1082 # -------------------------------------------------------------------------

1083 def _first(self, beta):

1084

1085 # We are computing First(x1,x2,x3,...,xn)

1086 result = []

1087 for x in beta:

1088 x_produces_empty = False

1089

1090 # Add all the non-<empty> symbols of First[x] to the result.

1091 for f in self.First[x]:

1092 if f == '<empty>':

1093 x_produces_empty = True

1094 else:

1095 if f not in result:

1096 result.append(f)

1097

1098 if x_produces_empty:

1099 # We have to consider the next x in beta,

1100 # i.e. stay in the loop.

1101 pass

1102 else:

1103 # We don't have to consider any further symbols in beta.

1104 break

1105 else:

1106 # There was no 'break' from the loop,

1107 # so x_produces_empty was true for all x in beta,

1108 # so beta produces empty as well.

1109 result.append('<empty>')

1110

1111 return result

1112

1113 # -------------------------------------------------------------------------

1114 # compute_first()

1115 #

1116 # Compute the value of FIRST1(X) for all symbols

1117 # -------------------------------------------------------------------------

1118 def compute_first(self):

1119 if self.First:

1120 return self.First

1121

1122 # Terminals:

1123 for t in self.Terminals:

1124 self.First[t] = [t]

1125

1126 self.First['$end'] = ['$end']

1127

1128 # Nonterminals:

1129

1130 # Initialize to the empty set:

1131 for n in self.Nonterminals:

1132 self.First[n] = []

1133

1134 # Then propagate symbols until no change:

1135 while True:

1136 some_change = False

1137 for n in self.Nonterminals:

1138 for p in self.Prodnames[n]:

1139 for f in self._first(p.prod):

1140 if f not in self.First[n]:

1141 self.First[n].append(f)

1142 some_change = True

1143 if not some_change:

1144 break

1145

1146 return self.First

1147

1148 # ---------------------------------------------------------------------

1149 # compute_follow()

1150 #

1151 # Computes all of the follow sets for every non-terminal symbol. The

1152 # follow set is the set of all symbols that might follow a given

1153 # non-terminal. See the Dragon book, 2nd Ed. p. 189.

1154 # ---------------------------------------------------------------------

1155 def compute_follow(self, start=None):

1156 # If already computed, return the result

1157 if self.Follow:

1158 return self.Follow

1159

1160 # If first sets not computed yet, do that first.

1161 if not self.First:

1162 self.compute_first()

1163

1164 # Add '$end' to the follow list of the start symbol

1165 for k in self.Nonterminals:

1166 self.Follow[k] = []

1167

1168 if not start:

1169 start = self.Productions[1].name

1170

1171 self.Follow[start] = ['$end']

1172

1173 while True:

1174 didadd = False

1175 for p in self.Productions[1:]:

1176 # Here is the production set

1177 for i, B in enumerate(p.prod):

1178 if B in self.Nonterminals:

1179 # Okay. We got a non-terminal in a production

1180 fst = self._first(p.prod[i+1:])

1181 hasempty = False

1182 for f in fst:

1183 if f != '<empty>' and f not in self.Follow[B]:

1184 self.Follow[B].append(f)

1185 didadd = True

1186 if f == '<empty>':

1187 hasempty = True

1188 if hasempty or i == (len(p.prod)-1):

1189 # Add elements of follow(a) to follow(b)

1190 for f in self.Follow[p.name]:

1191 if f not in self.Follow[B]:

1192 self.Follow[B].append(f)

1193 didadd = True

1194 if not didadd:

1195 break

1196 return self.Follow

1197

1198

1199 # -----------------------------------------------------------------------------

1200 # build_lritems()

1201 #

1202 # This function walks the list of productions and builds a complete set of the

1203 # LR items. The LR items are stored in two ways: First, they are uniquely

1204 # numbered and placed in the list _lritems. Second, a linked list of LR items

1205 # is built for each production. For example:

1206 #

1207 # E -> E PLUS E

1208 #

1209 # Creates the list

1210 #

1211 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

1212 # -----------------------------------------------------------------------------

1213

1214 def build_lritems(self):

1215 for p in self.Productions:

1216 lastlri = p

1217 i = 0

1218 lr_items = []

1219 while True:

1220 if i > len(p):

1221 lri = None

1222 else:

1223 lri = LRItem(p, i)

1224 # Precompute the list of productions immediately following

1225 try:

1226 lri.lr_after = self.Prodnames[lri.prod[i+1]]

1227 except (IndexError, KeyError):

1228 lri.lr_after = []

1229 try:

1230 lri.lr_before = lri.prod[i-1]

1231 except IndexError:

1232 lri.lr_before = None

1233

1234 lastlri.lr_next = lri

1235 if not lri:

1236 break

1237 lr_items.append(lri)

1238 lastlri = lri

1239 i += 1

1240 p.lr_items = lr_items

1241

1242# -----------------------------------------------------------------------------

1243# === LR Generator ===

1244#

1245# The following classes and functions are used to generate LR parsing tables on

1246# a grammar.

1247# -----------------------------------------------------------------------------

1248

1249# -----------------------------------------------------------------------------

1250# digraph()

1251# traverse()

1252#

1253# The following two functions are used to compute set valued functions

1254# of the form:

1255#

1256# F(x) = F'(x) U U{F(y) | x R y}

1257#

1258# This is used to compute the values of Read() sets as well as FOLLOW sets

1259# in LALR(1) generation.

1260#

1261# Inputs: X - An input set

1262# R - A relation

1263# FP - Set-valued function

1264# ------------------------------------------------------------------------------

1265

1266def digraph(X, R, FP):

1267 N = {}

1268 for x in X:

1269 N[x] = 0

1270 stack = []

1271 F = {}

1272 for x in X:

1273 if N[x] == 0:

1274 traverse(x, N, stack, F, X, R, FP)

1275 return F

1276

1277def traverse(x, N, stack, F, X, R, FP):

1278 stack.append(x)

1279 d = len(stack)

1280 N[x] = d

1281 F[x] = FP(x) # F(X) <- F'(x)

1282

1283 rel = R(x) # Get y's related to x

1284 for y in rel:

1285 if N[y] == 0:

1286 traverse(y, N, stack, F, X, R, FP)

1287 N[x] = min(N[x], N[y])

1288 for a in F.get(y, []):

1289 if a not in F[x]:

1290 F[x].append(a)

1291 if N[x] == d:

1292 N[stack[-1]] = MAXINT

1293 F[stack[-1]] = F[x]

1294 element = stack.pop()

1295 while element != x:

1296 N[stack[-1]] = MAXINT

1297 F[stack[-1]] = F[x]

1298 element = stack.pop()

1299

1300class LALRError(YaccError):

1301 pass

1302

1303

1304# -----------------------------------------------------------------------------

1305# == LRTable ==

1306#

1307# This class implements the LR table generation algorithm. There are no

1308# public methods.

1309# -----------------------------------------------------------------------------

1310

1311class LRTable:

1312 def __init__(self, grammar, log=None):

1313 self.grammar = grammar

1314

1315 # Set up the logger

1316 if not log:

1317 log = NullLogger()

1318 self.log = log

1319

1320 # Internal attributes

1321 self.lr_action = {} # Action table

1322 self.lr_goto = {} # Goto table

1323 self.lr_productions = grammar.Productions # Copy of grammar Production array

1324 self.lr_goto_cache = {} # Cache of computed gotos

1325 self.lr0_cidhash = {} # Cache of closures

1326

1327 self._add_count = 0 # Internal counter used to detect cycles

1328

1329 # Diagnostic information filled in by the table generator

1330 self.sr_conflict = 0

1331 self.rr_conflict = 0

1332 self.conflicts = [] # List of conflicts

1333

1334 self.sr_conflicts = []

1335 self.rr_conflicts = []

1336

1337 # Build the tables

1338 self.grammar.build_lritems()

1339 self.grammar.compute_first()

1340 self.grammar.compute_follow()

1341 self.lr_parse_table()

1342

1343 # Bind all production function names to callable objects in pdict

1344 def bind_callables(self, pdict):

1345 for p in self.lr_productions:

1346 p.bind(pdict)

1347

1348 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

1349

1350 def lr0_closure(self, I):

1351 self._add_count += 1

1352

1353 # Add everything in I to J

1354 J = I[:]

1355 didadd = True

1356 while didadd:

1357 didadd = False

1358 for j in J:

1359 for x in j.lr_after:

1360 if getattr(x, 'lr0_added', 0) == self._add_count:

1361 continue

1362 # Add B --> .G to J

1363 J.append(x.lr_next)

1364 x.lr0_added = self._add_count

1365 didadd = True

1366

1367 return J

1368

1369 # Compute the LR(0) goto function goto(I,X) where I is a set

1370 # of LR(0) items and X is a grammar symbol. This function is written

1371 # in a way that guarantees uniqueness of the generated goto sets

1372 # (i.e. the same goto set will never be returned as two different Python

1373 # objects). With uniqueness, we can later do fast set comparisons using

1374 # id(obj) instead of element-wise comparison.

1375

1376 def lr0_goto(self, I, x):

1377 # First we look for a previously cached entry

1378 g = self.lr_goto_cache.get((id(I), x))

1379 if g:

1380 return g

1381

1382 # Now we generate the goto set in a way that guarantees uniqueness

1383 # of the result

1384

1385 s = self.lr_goto_cache.get(x)

1386 if not s:

1387 s = {}

1388 self.lr_goto_cache[x] = s

1389

1390 gs = []

1391 for p in I:

1392 n = p.lr_next

1393 if n and n.lr_before == x:

1394 s1 = s.get(id(n))

1395 if not s1:

1396 s1 = {}

1397 s[id(n)] = s1

1398 gs.append(n)

1399 s = s1

1400 g = s.get('$end')

1401 if not g:

1402 if gs:

1403 g = self.lr0_closure(gs)

1404 s['$end'] = g

1405 else:

1406 s['$end'] = gs

1407 self.lr_goto_cache[(id(I), x)] = g

1408 return g

1409

1410 # Compute the LR(0) sets of item function

1411 def lr0_items(self):

1412 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]

1413 i = 0

1414 for I in C:

1415 self.lr0_cidhash[id(I)] = i

1416 i += 1

1417

1418 # Loop over the items in C and each grammar symbols

1419 i = 0

1420 while i < len(C):

1421 I = C[i]

1422 i += 1

1423

1424 # Collect all of the symbols that could possibly be in the goto(I,X) sets

1425 asyms = {}

1426 for ii in I:

1427 for s in ii.usyms:

1428 asyms[s] = None

1429

1430 for x in asyms:

1431 g = self.lr0_goto(I, x)

1432 if not g or id(g) in self.lr0_cidhash:

1433 continue

1434 self.lr0_cidhash[id(g)] = len(C)

1435 C.append(g)

1436

1437 return C

1438

1439 # -----------------------------------------------------------------------------

1440 # ==== LALR(1) Parsing ====

1441 #

1442 # LALR(1) parsing is almost exactly the same as SLR except that instead of

1443 # relying upon Follow() sets when performing reductions, a more selective

1444 # lookahead set that incorporates the state of the LR(0) machine is utilized.

1445 # Thus, we mainly just have to focus on calculating the lookahead sets.

1446 #

1447 # The method used here is due to DeRemer and Pennelo (1982).

1448 #

1449 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

1450 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,

1451 # Vol. 4, No. 4, Oct. 1982, pp. 615-649

1452 #

1453 # Further details can also be found in:

1454 #

1455 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",

1456 # McGraw-Hill Book Company, (1985).

1457 #

1458 # -----------------------------------------------------------------------------

1459

1460 # -----------------------------------------------------------------------------

1461 # compute_nullable_nonterminals()

1462 #

1463 # Creates a dictionary containing all of the non-terminals that might produce

1464 # an empty production.

1465 # -----------------------------------------------------------------------------

1466

1467 def compute_nullable_nonterminals(self):

1468 nullable = set()

1469 num_nullable = 0

1470 while True:

1471 for p in self.grammar.Productions[1:]:

1472 if p.len == 0:

1473 nullable.add(p.name)

1474 continue

1475 for t in p.prod:

1476 if t not in nullable:

1477 break

1478 else:

1479 nullable.add(p.name)

1480 if len(nullable) == num_nullable:

1481 break

1482 num_nullable = len(nullable)

1483 return nullable

1484

1485 # -----------------------------------------------------------------------------

1486 # find_nonterminal_trans(C)

1487 #

1488 # Given a set of LR(0) items, this functions finds all of the non-terminal

1489 # transitions. These are transitions in which a dot appears immediately before

1490 # a non-terminal. Returns a list of tuples of the form (state,N) where state

1491 # is the state number and N is the nonterminal symbol.

1492 #

1493 # The input C is the set of LR(0) items.

1494 # -----------------------------------------------------------------------------

1495

1496 def find_nonterminal_transitions(self, C):

1497 trans = []

1498 for stateno, state in enumerate(C):

1499 for p in state:

1500 if p.lr_index < p.len - 1:

1501 t = (stateno, p.prod[p.lr_index+1])

1502 if t[1] in self.grammar.Nonterminals:

1503 if t not in trans:

1504 trans.append(t)

1505 return trans

1506

1507 # -----------------------------------------------------------------------------

1508 # dr_relation()

1509 #

1510 # Computes the DR(p,A) relationships for non-terminal transitions. The input

1511 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.

1512 #

1513 # Returns a list of terminals.

1514 # -----------------------------------------------------------------------------

1515

1516 def dr_relation(self, C, trans, nullable):

1517 state, N = trans

1518 terms = []

1519

1520 g = self.lr0_goto(C[state], N)

1521 for p in g:

1522 if p.lr_index < p.len - 1:

1523 a = p.prod[p.lr_index+1]

1524 if a in self.grammar.Terminals:

1525 if a not in terms:

1526 terms.append(a)

1527

1528 # This extra bit is to handle the start state

1529 if state == 0 and N == self.grammar.Productions[0].prod[0]:

1530 terms.append('$end')

1531

1532 return terms

1533

1534 # -----------------------------------------------------------------------------

1535 # reads_relation()

1536 #

1537 # Computes the READS() relation (p,A) READS (t,C).

1538 # -----------------------------------------------------------------------------

1539

1540 def reads_relation(self, C, trans, empty):

1541 # Look for empty transitions

1542 rel = []

1543 state, N = trans

1544

1545 g = self.lr0_goto(C[state], N)

1546 j = self.lr0_cidhash.get(id(g), -1)

1547 for p in g:

1548 if p.lr_index < p.len - 1:

1549 a = p.prod[p.lr_index + 1]

1550 if a in empty:

1551 rel.append((j, a))

1552

1553 return rel

1554

1555 # -----------------------------------------------------------------------------

1556 # compute_lookback_includes()

1557 #

1558 # Determines the lookback and includes relations

1559 #

1560 # LOOKBACK:

1561 #

1562 # This relation is determined by running the LR(0) state machine forward.

1563 # For example, starting with a production "N : . A B C", we run it forward

1564 # to obtain "N : A B C ." We then build a relationship between this final

1565 # state and the starting state. These relationships are stored in a dictionary

1566 # lookdict.

1567 #

1568 # INCLUDES:

1569 #

1570 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

1571 #

1572 # This relation is used to determine non-terminal transitions that occur

1573 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

1574 # if the following holds:

1575 #

1576 # B -> LAT, where T -> epsilon and p' -L-> p

1577 #

1578 # L is essentially a prefix (which may be empty), T is a suffix that must be

1579 # able to derive an empty string. State p' must lead to state p with the string L.

1580 #

1581 # -----------------------------------------------------------------------------

1582

1583 def compute_lookback_includes(self, C, trans, nullable):

1584 lookdict = {} # Dictionary of lookback relations

1585 includedict = {} # Dictionary of include relations

1586

1587 # Make a dictionary of non-terminal transitions

1588 dtrans = {}

1589 for t in trans:

1590 dtrans[t] = 1

1591

1592 # Loop over all transitions and compute lookbacks and includes

1593 for state, N in trans:

1594 lookb = []

1595 includes = []

1596 for p in C[state]:

1597 if p.name != N:

1598 continue

1599

1600 # Okay, we have a name match. We now follow the production all the way

1601 # through the state machine until we get the . on the right hand side

1602

1603 lr_index = p.lr_index

1604 j = state

1605 while lr_index < p.len - 1:

1606 lr_index = lr_index + 1

1607 t = p.prod[lr_index]

1608

1609 # Check to see if this symbol and state are a non-terminal transition

1610 if (j, t) in dtrans:

1611 # Yes. Okay, there is some chance that this is an includes relation

1612 # the only way to know for certain is whether the rest of the

1613 # production derives empty

1614

1615 li = lr_index + 1

1616 while li < p.len:

1617 if p.prod[li] in self.grammar.Terminals:

1618 break # No forget it

1619 if p.prod[li] not in nullable:

1620 break

1621 li = li + 1

1622 else:

1623 # Appears to be a relation between (j,t) and (state,N)

1624 includes.append((j, t))

1625

1626 g = self.lr0_goto(C[j], t) # Go to next set

1627 j = self.lr0_cidhash.get(id(g), -1) # Go to next state

1628

1629 # When we get here, j is the final state, now we have to locate the production

1630 for r in C[j]:

1631 if r.name != p.name:

1632 continue

1633 if r.len != p.len:

1634 continue

1635 i = 0

1636 # This look is comparing a production ". A B C" with "A B C ."

1637 while i < r.lr_index:

1638 if r.prod[i] != p.prod[i+1]:

1639 break

1640 i = i + 1

1641 else:

1642 lookb.append((j, r))

1643 for i in includes:

1644 if i not in includedict:

1645 includedict[i] = []

1646 includedict[i].append((state, N))

1647 lookdict[(state, N)] = lookb

1648

1649 return lookdict, includedict

1650

1651 # -----------------------------------------------------------------------------

1652 # compute_read_sets()

1653 #

1654 # Given a set of LR(0) items, this function computes the read sets.

1655 #

1656 # Inputs: C = Set of LR(0) items

1657 # ntrans = Set of nonterminal transitions

1658 # nullable = Set of empty transitions

1659 #

1660 # Returns a set containing the read sets

1661 # -----------------------------------------------------------------------------

1662

1663 def compute_read_sets(self, C, ntrans, nullable):

1664 FP = lambda x: self.dr_relation(C, x, nullable)

1665 R = lambda x: self.reads_relation(C, x, nullable)

1666 F = digraph(ntrans, R, FP)

1667 return F

1668

1669 # -----------------------------------------------------------------------------

1670 # compute_follow_sets()

1671 #

1672 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,

1673 # and an include set, this function computes the follow sets

1674 #

1675 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}

1676 #

1677 # Inputs:

1678 # ntrans = Set of nonterminal transitions

1679 # readsets = Readset (previously computed)

1680 # inclsets = Include sets (previously computed)

1681 #

1682 # Returns a set containing the follow sets

1683 # -----------------------------------------------------------------------------

1684

1685 def compute_follow_sets(self, ntrans, readsets, inclsets):

1686 FP = lambda x: readsets[x]

1687 R = lambda x: inclsets.get(x, [])

1688 F = digraph(ntrans, R, FP)

1689 return F

1690

1691 # -----------------------------------------------------------------------------

1692 # add_lookaheads()

1693 #

1694 # Attaches the lookahead symbols to grammar rules.

1695 #

1696 # Inputs: lookbacks - Set of lookback relations

1697 # followset - Computed follow set

1698 #

1699 # This function directly attaches the lookaheads to productions contained

1700 # in the lookbacks set

1701 # -----------------------------------------------------------------------------

1702

1703 def add_lookaheads(self, lookbacks, followset):

1704 for trans, lb in lookbacks.items():

1705 # Loop over productions in lookback

1706 for state, p in lb:

1707 if state not in p.lookaheads:

1708 p.lookaheads[state] = []

1709 f = followset.get(trans, [])

1710 for a in f:

1711 if a not in p.lookaheads[state]:

1712 p.lookaheads[state].append(a)

1713

1714 # -----------------------------------------------------------------------------

1715 # add_lalr_lookaheads()

1716 #

1717 # This function does all of the work of adding lookahead information for use

1718 # with LALR parsing

1719 # -----------------------------------------------------------------------------

1720

1721 def add_lalr_lookaheads(self, C):

1722 # Determine all of the nullable nonterminals

1723 nullable = self.compute_nullable_nonterminals()

1724

1725 # Find all non-terminal transitions

1726 trans = self.find_nonterminal_transitions(C)

1727

1728 # Compute read sets

1729 readsets = self.compute_read_sets(C, trans, nullable)

1730

1731 # Compute lookback/includes relations

1732 lookd, included = self.compute_lookback_includes(C, trans, nullable)

1733

1734 # Compute LALR FOLLOW sets

1735 followsets = self.compute_follow_sets(trans, readsets, included)

1736

1737 # Add all of the lookaheads

1738 self.add_lookaheads(lookd, followsets)

1739

1740 # -----------------------------------------------------------------------------

1741 # lr_parse_table()

1742 #

1743 # This function constructs the parse tables for SLR or LALR

1744 # -----------------------------------------------------------------------------

1745 def lr_parse_table(self):

1746 Productions = self.grammar.Productions

1747 Precedence = self.grammar.Precedence

1748 goto = self.lr_goto # Goto array

1749 action = self.lr_action # Action array

1750 log = self.log # Logger for output

1751

1752 actionp = {} # Action production array (temporary)

1753

1754 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

1755 # This determines the number of states

1756

1757 C = self.lr0_items()

1758 self.add_lalr_lookaheads(C)

1759

1760 # Build the parser table, state by state

1761 st = 0

1762 for I in C:

1763 # Loop over each production in I

1764 actlist = [] # List of actions

1765 st_action = {}

1766 st_actionp = {}

1767 st_goto = {}

1768 log.info('')

1769 log.info('state %d', st)

1770 log.info('')

1771 for p in I:

1772 log.info(' (%d) %s', p.number, p)

1773 log.info('')

1774

1775 for p in I:

1776 if p.len == p.lr_index + 1:

1777 if p.name == "S'":

1778 # Start symbol. Accept!

1779 st_action['$end'] = 0

1780 st_actionp['$end'] = p

1781 else:

1782 # We are at the end of a production. Reduce!

1783 laheads = p.lookaheads[st]

1784 for a in laheads:

1785 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))

1786 r = st_action.get(a)

1787 if r is not None:

1788 # Whoa. Have a shift/reduce or reduce/reduce conflict

1789 if r > 0:

1790 # Need to decide on shift or reduce here

1791 # By default we favor shifting. Need to add

1792 # some precedence rules here.

1793

1794 # Shift precedence comes from the token

1795 sprec, slevel = Precedence.get(a, ('right', 0))

1796

1797 # Reduce precedence comes from rule being reduced (p)

1798 rprec, rlevel = Productions[p.number].prec

1799

1800 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):

1801 # We really need to reduce here.

1802 st_action[a] = -p.number

1803 st_actionp[a] = p

1804 if not slevel and not rlevel:

1805 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)

1806 self.sr_conflicts.append((st, a, 'reduce'))

1807 Productions[p.number].reduced += 1

1808 elif (slevel == rlevel) and (rprec == 'nonassoc'):

1809 st_action[a] = None

1810 else:

1811 # Hmmm. Guess we'll keep the shift

1812 if not rlevel:

1813 log.info(' ! shift/reduce conflict for %s resolved as shift', a)

1814 self.sr_conflicts.append((st, a, 'shift'))

1815 elif r < 0:

1816 # Reduce/reduce conflict. In this case, we favor the rule

1817 # that was defined first in the grammar file

1818 oldp = Productions[-r]

1819 pp = Productions[p.number]

1820 if oldp.line > pp.line:

1821 st_action[a] = -p.number

1822 st_actionp[a] = p

1823 chosenp, rejectp = pp, oldp

1824 Productions[p.number].reduced += 1

1825 Productions[oldp.number].reduced -= 1

1826 else:

1827 chosenp, rejectp = oldp, pp

1828 self.rr_conflicts.append((st, chosenp, rejectp))

1829 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',

1830 a, st_actionp[a].number, st_actionp[a])

1831 else:

1832 raise LALRError('Unknown conflict in state %d' % st)

1833 else:

1834 st_action[a] = -p.number

1835 st_actionp[a] = p

1836 Productions[p.number].reduced += 1

1837 else:

1838 i = p.lr_index

1839 a = p.prod[i+1] # Get symbol right after the "."

1840 if a in self.grammar.Terminals:

1841 g = self.lr0_goto(I, a)

1842 j = self.lr0_cidhash.get(id(g), -1)

1843 if j >= 0:

1844 # We are in a shift state

1845 actlist.append((a, p, 'shift and go to state %d' % j))

1846 r = st_action.get(a)

1847 if r is not None:

1848 # Whoa have a shift/reduce or shift/shift conflict

1849 if r > 0:

1850 if r != j:

1851 raise LALRError('Shift/shift conflict in state %d' % st)

1852 elif r < 0:

1853 # Do a precedence check.

1854 # - if precedence of reduce rule is higher, we reduce.

1855 # - if precedence of reduce is same and left assoc, we reduce.

1856 # - otherwise we shift

1857

1858 # Shift precedence comes from the token

1859 sprec, slevel = Precedence.get(a, ('right', 0))

1860

1861 # Reduce precedence comes from the rule that could have been reduced

1862 rprec, rlevel = Productions[st_actionp[a].number].prec

1863

1864 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):

1865 # We decide to shift here... highest precedence to shift

1866 Productions[st_actionp[a].number].reduced -= 1

1867 st_action[a] = j

1868 st_actionp[a] = p

1869 if not rlevel:

1870 log.info(' ! shift/reduce conflict for %s resolved as shift', a)

1871 self.sr_conflicts.append((st, a, 'shift'))

1872 elif (slevel == rlevel) and (rprec == 'nonassoc'):

1873 st_action[a] = None

1874 else:

1875 # Hmmm. Guess we'll keep the reduce

1876 if not slevel and not rlevel:

1877 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)

1878 self.sr_conflicts.append((st, a, 'reduce'))

1879

1880 else:

1881 raise LALRError('Unknown conflict in state %d' % st)

1882 else:

1883 st_action[a] = j

1884 st_actionp[a] = p

1885

1886 # Print the actions associated with each terminal

1887 _actprint = {}

1888 for a, p, m in actlist:

1889 if a in st_action:

1890 if p is st_actionp[a]:

1891 log.info(' %-15s %s', a, m)

1892 _actprint[(a, m)] = 1

1893 log.info('')

1894 # Print the actions that were not used. (debugging)

1895 not_used = 0

1896 for a, p, m in actlist:

1897 if a in st_action:

1898 if p is not st_actionp[a]:

1899 if not (a, m) in _actprint:

1900 log.debug(' ! %-15s [ %s ]', a, m)

1901 not_used = 1

1902 _actprint[(a, m)] = 1

1903 if not_used:

1904 log.debug('')

1905

1906 # Construct the goto table for this state

1907

1908 nkeys = {}

1909 for ii in I:

1910 for s in ii.usyms:

1911 if s in self.grammar.Nonterminals:

1912 nkeys[s] = None

1913 for n in nkeys:

1914 g = self.lr0_goto(I, n)

1915 j = self.lr0_cidhash.get(id(g), -1)

1916 if j >= 0:

1917 st_goto[n] = j

1918 log.info(' %-30s shift and go to state %d', n, j)

1919

1920 action[st] = st_action

1921 actionp[st] = st_actionp

1922 goto[st] = st_goto

1923 st += 1

1924

1925# -----------------------------------------------------------------------------

1926# === INTROSPECTION ===

1927#

1928# The following functions and classes are used to implement the PLY

1929# introspection features followed by the yacc() function itself.

1930# -----------------------------------------------------------------------------

1931

1932# -----------------------------------------------------------------------------

1933# get_caller_module_dict()

1934#

1935# This function returns a dictionary containing all of the symbols defined within

1936# a caller further down the call stack. This is used to get the environment

1937# associated with the yacc() call if none was provided.

1938# -----------------------------------------------------------------------------

1939

1940def get_caller_module_dict(levels):

1941 f = sys._getframe(levels)

1942 ldict = f.f_globals.copy()

1943 if f.f_globals != f.f_locals:

1944 ldict.update(f.f_locals)

1945 return ldict

1946

1947# -----------------------------------------------------------------------------

1948# parse_grammar()

1949#

1950# This takes a raw grammar rule string and parses it into production data

1951# -----------------------------------------------------------------------------

1952def parse_grammar(doc, file, line):

1953 grammar = []

1954 # Split the doc string into lines

1955 pstrings = doc.splitlines()

1956 lastp = None

1957 dline = line

1958 for ps in pstrings:

1959 dline += 1

1960 p = ps.split()

1961 if not p:

1962 continue

1963 try:

1964 if p[0] == '|':

1965 # This is a continuation of a previous rule

1966 if not lastp:

1967 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))

1968 prodname = lastp

1969 syms = p[1:]

1970 else:

1971 prodname = p[0]

1972 lastp = prodname

1973 syms = p[2:]

1974 assign = p[1]

1975 if assign != ':' and assign != '::=':

1976 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))

1977

1978 grammar.append((file, dline, prodname, syms))

1979 except SyntaxError:

1980 raise

1981 except Exception:

1982 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))

1983

1984 return grammar

1985

1986# -----------------------------------------------------------------------------

1987# ParserReflect()

1988#

1989# This class represents information extracted for building a parser including

1990# start symbol, error function, tokens, precedence list, action functions,

1991# etc.

1992# -----------------------------------------------------------------------------

1993class ParserReflect(object):

1994 def __init__(self, pdict, log=None):

1995 self.pdict = pdict

1996 self.start = None

1997 self.error_func = None

1998 self.tokens = None

1999 self.modules = set()

2000 self.grammar = []

2001 self.error = False

2002

2003 if log is None:

2004 self.log = PlyLogger(sys.stderr)

2005 else:

2006 self.log = log

2007

2008 # Get all of the basic information

2009 def get_all(self):

2010 self.get_start()

2011 self.get_error_func()

2012 self.get_tokens()

2013 self.get_precedence()

2014 self.get_pfunctions()

2015

2016 # Validate all of the information

2017 def validate_all(self):

2018 self.validate_start()

2019 self.validate_error_func()

2020 self.validate_tokens()

2021 self.validate_precedence()

2022 self.validate_pfunctions()

2023 self.validate_modules()

2024 return self.error

2025

2026 # Compute a signature over the grammar

2027 def signature(self):

2028 parts = []

2029 try:

2030 if self.start:

2031 parts.append(self.start)

2032 if self.prec:

2033 parts.append(''.join([''.join(p) for p in self.prec]))

2034 if self.tokens:

2035 parts.append(' '.join(self.tokens))

2036 for f in self.pfuncs:

2037 if f[3]:

2038 parts.append(f[3])

2039 except (TypeError, ValueError):

2040 pass

2041 return ''.join(parts)

2042

2043 # -----------------------------------------------------------------------------

2044 # validate_modules()

2045 #

2046 # This method checks to see if there are duplicated p_rulename() functions

2047 # in the parser module file. Without this function, it is really easy for

2048 # users to make mistakes by cutting and pasting code fragments (and it's a real

2049 # bugger to try and figure out why the resulting parser doesn't work). Therefore,

2050 # we just do a little regular expression pattern matching of def statements

2051 # to try and detect duplicates.

2052 # -----------------------------------------------------------------------------

2053

2054 def validate_modules(self):

2055 # Match def p_funcname(

2056 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')

2057

2058 for module in self.modules:

2059 try:

2060 lines, linen = inspect.getsourcelines(module)

2061 except IOError:

2062 continue

2063

2064 counthash = {}

2065 for linen, line in enumerate(lines):

2066 linen += 1

2067 m = fre.match(line)

2068 if m:

2069 name = m.group(1)

2070 prev = counthash.get(name)

2071 if not prev:

2072 counthash[name] = linen

2073 else:

2074 filename = inspect.getsourcefile(module)

2075 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',

2076 filename, linen, name, prev)

2077

2078 # Get the start symbol

2079 def get_start(self):

2080 self.start = self.pdict.get('start')

2081

2082 # Validate the start symbol

2083 def validate_start(self):

2084 if self.start is not None:

2085 if not isinstance(self.start, str):

2086 self.log.error("'start' must be a string")

2087

2088 # Look for error handler

2089 def get_error_func(self):

2090 self.error_func = self.pdict.get('p_error')

2091

2092 # Validate the error function

2093 def validate_error_func(self):

2094 if self.error_func:

2095 if isinstance(self.error_func, types.FunctionType):

2096 ismethod = 0

2097 elif isinstance(self.error_func, types.MethodType):

2098 ismethod = 1

2099 else:

2100 self.log.error("'p_error' defined, but is not a function or method")

2101 self.error = True

2102 return

2103

2104 eline = self.error_func.__code__.co_firstlineno

2105 efile = self.error_func.__code__.co_filename

2106 module = inspect.getmodule(self.error_func)

2107 self.modules.add(module)

2108

2109 argcount = self.error_func.__code__.co_argcount - ismethod

2110 if argcount != 1:

2111 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)

2112 self.error = True

2113

2114 # Get the tokens map

2115 def get_tokens(self):

2116 tokens = self.pdict.get('tokens')

2117 if not tokens:

2118 self.log.error('No token list is defined')

2119 self.error = True

2120 return

2121

2122 if not isinstance(tokens, (list, tuple)):

2123 self.log.error('tokens must be a list or tuple')

2124 self.error = True

2125 return

2126

2127 if not tokens:

2128 self.log.error('tokens is empty')

2129 self.error = True

2130 return

2131

2132 self.tokens = sorted(tokens)

2133

2134 # Validate the tokens

2135 def validate_tokens(self):

2136 # Validate the tokens.

2137 if 'error' in self.tokens:

2138 self.log.error("Illegal token name 'error'. Is a reserved word")

2139 self.error = True

2140 return

2141

2142 terminals = set()

2143 for n in self.tokens:

2144 if n in terminals:

2145 self.log.warning('Token %r multiply defined', n)

2146 terminals.add(n)

2147

2148 # Get the precedence map (if any)

2149 def get_precedence(self):

2150 self.prec = self.pdict.get('precedence')

2151

2152 # Validate and parse the precedence map

2153 def validate_precedence(self):

2154 preclist = []

2155 if self.prec:

2156 if not isinstance(self.prec, (list, tuple)):

2157 self.log.error('precedence must be a list or tuple')

2158 self.error = True

2159 return

2160 for level, p in enumerate(self.prec):

2161 if not isinstance(p, (list, tuple)):

2162 self.log.error('Bad precedence table')

2163 self.error = True

2164 return

2165

2166 if len(p) < 2:

2167 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)

2168 self.error = True

2169 return

2170 assoc = p[0]

2171 if not isinstance(assoc, str):

2172 self.log.error('precedence associativity must be a string')

2173 self.error = True

2174 return

2175 for term in p[1:]:

2176 if not isinstance(term, str):

2177 self.log.error('precedence items must be strings')

2178 self.error = True

2179 return

2180 preclist.append((term, assoc, level+1))

2181 self.preclist = preclist

2182

2183 # Get all p_functions from the grammar

2184 def get_pfunctions(self):

2185 p_functions = []

2186 for name, item in self.pdict.items():

2187 if not name.startswith('p_') or name == 'p_error':

2188 continue

2189 if isinstance(item, (types.FunctionType, types.MethodType)):

2190 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)

2191 module = inspect.getmodule(item)

2192 p_functions.append((line, module, name, item.__doc__))

2193

2194 # Sort all of the actions by line number; make sure to stringify

2195 # modules to make them sortable, since `line` may not uniquely sort all

2196 # p functions

2197 p_functions.sort(key=lambda p_function: (

2198 p_function[0],

2199 str(p_function[1]),

2200 p_function[2],

2201 p_function[3]))

2202 self.pfuncs = p_functions

2203

2204 # Validate all of the p_functions

2205 def validate_pfunctions(self):

2206 grammar = []

2207 # Check for non-empty symbols

2208 if len(self.pfuncs) == 0:

2209 self.log.error('no rules of the form p_rulename are defined')

2210 self.error = True

2211 return

2212

2213 for line, module, name, doc in self.pfuncs:

2214 file = inspect.getsourcefile(module)

2215 func = self.pdict[name]

2216 if isinstance(func, types.MethodType):

2217 reqargs = 2

2218 else:

2219 reqargs = 1

2220 if func.__code__.co_argcount > reqargs:

2221 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)

2222 self.error = True

2223 elif func.__code__.co_argcount < reqargs:

2224 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)

2225 self.error = True

2226 elif not func.__doc__:

2227 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',

2228 file, line, func.__name__)

2229 else:

2230 try:

2231 parsed_g = parse_grammar(doc, file, line)

2232 for g in parsed_g:

2233 grammar.append((name, g))

2234 except SyntaxError as e:

2235 self.log.error(str(e))

2236 self.error = True

2237

2238 # Looks like a valid grammar rule

2239 # Mark the file in which defined.

2240 self.modules.add(module)

2241

2242 # Secondary validation step that looks for p_ definitions that are not functions

2243 # or functions that look like they might be grammar rules.

2244

2245 for n, v in self.pdict.items():

2246 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):

2247 continue

2248 if n.startswith('t_'):

2249 continue

2250 if n.startswith('p_') and n != 'p_error':

2251 self.log.warning('%r not defined as a function', n)

2252 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or

2253 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):

2254 if v.__doc__:

2255 try:

2256 doc = v.__doc__.split(' ')

2257 if doc[1] == ':':

2258 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',

2259 v.__code__.co_filename, v.__code__.co_firstlineno, n)

2260 except IndexError:

2261 pass

2262

2263 self.grammar = grammar

2264

2265# -----------------------------------------------------------------------------

2266# yacc(module)

2267#

2268# Build a parser

2269# -----------------------------------------------------------------------------

2270

2271def yacc(*, debug=yaccdebug, module=None, start=None,

2272 check_recursion=True, optimize=False, debugfile=debug_file,

2273 debuglog=None, errorlog=None):

2274

2275 # Reference to the parsing method of the last built parser

2276 global parse

2277

2278 if errorlog is None:

2279 errorlog = PlyLogger(sys.stderr)

2280

2281 # Get the module dictionary used for the parser

2282 if module:

2283 _items = [(k, getattr(module, k)) for k in dir(module)]

2284 pdict = dict(_items)

2285 # If no __file__ or __package__ attributes are available, try to obtain them

2286 # from the __module__ instead

2287 if '__file__' not in pdict:

2288 pdict['__file__'] = sys.modules[pdict['__module__']].__file__

2289 if '__package__' not in pdict and '__module__' in pdict:

2290 if hasattr(sys.modules[pdict['__module__']], '__package__'):

2291 pdict['__package__'] = sys.modules[pdict['__module__']].__package__

2292 else:

2293 pdict = get_caller_module_dict(2)

2294

2295 # Set start symbol if it's specified directly using an argument

2296 if start is not None:

2297 pdict['start'] = start

2298

2299 # Collect parser information from the dictionary

2300 pinfo = ParserReflect(pdict, log=errorlog)

2301 pinfo.get_all()

2302

2303 if pinfo.error:

2304 raise YaccError('Unable to build parser')

2305

2306 if debuglog is None:

2307 if debug:

2308 try:

2309 debuglog = PlyLogger(open(debugfile, 'w'))

2310 except IOError as e:

2311 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))

2312 debuglog = NullLogger()

2313 else:

2314 debuglog = NullLogger()

2315

2316 debuglog.info('Created by PLY (http://www.dabeaz.com/ply)')

2317

2318 errors = False

2319

2320 # Validate the parser information

2321 if pinfo.validate_all():

2322 raise YaccError('Unable to build parser')

2323

2324 if not pinfo.error_func:

2325 errorlog.warning('no p_error() function is defined')

2326

2327 # Create a grammar object

2328 grammar = Grammar(pinfo.tokens)

2329

2330 # Set precedence level for terminals

2331 for term, assoc, level in pinfo.preclist:

2332 try:

2333 grammar.set_precedence(term, assoc, level)

2334 except GrammarError as e:

2335 errorlog.warning('%s', e)

2336

2337 # Add productions to the grammar

2338 for funcname, gram in pinfo.grammar:

2339 file, line, prodname, syms = gram

2340 try:

2341 grammar.add_production(prodname, syms, funcname, file, line)

2342 except GrammarError as e:

2343 errorlog.error('%s', e)

2344 errors = True

2345

2346 # Set the grammar start symbols

2347 try:

2348 if start is None:

2349 grammar.set_start(pinfo.start)

2350 else:

2351 grammar.set_start(start)

2352 except GrammarError as e:

2353 errorlog.error(str(e))

2354 errors = True

2355

2356 if errors:

2357 raise YaccError('Unable to build parser')

2358

2359 # Verify the grammar structure

2360 undefined_symbols = grammar.undefined_symbols()

2361 for sym, prod in undefined_symbols:

2362 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)

2363 errors = True

2364

2365 unused_terminals = grammar.unused_terminals()

2366 if unused_terminals:

2367 debuglog.info('')

2368 debuglog.info('Unused terminals:')

2369 debuglog.info('')

2370 for term in unused_terminals:

2371 errorlog.warning('Token %r defined, but not used', term)

2372 debuglog.info(' %s', term)

2373

2374 # Print out all productions to the debug log

2375 if debug:

2376 debuglog.info('')

2377 debuglog.info('Grammar')

2378 debuglog.info('')

2379 for n, p in enumerate(grammar.Productions):

2380 debuglog.info('Rule %-5d %s', n, p)

2381

2382 # Find unused non-terminals

2383 unused_rules = grammar.unused_rules()

2384 for prod in unused_rules:

2385 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)

2386

2387 if len(unused_terminals) == 1:

2388 errorlog.warning('There is 1 unused token')

2389 if len(unused_terminals) > 1:

2390 errorlog.warning('There are %d unused tokens', len(unused_terminals))

2391

2392 if len(unused_rules) == 1:

2393 errorlog.warning('There is 1 unused rule')

2394 if len(unused_rules) > 1:

2395 errorlog.warning('There are %d unused rules', len(unused_rules))

2396

2397 if debug:

2398 debuglog.info('')

2399 debuglog.info('Terminals, with rules where they appear')

2400 debuglog.info('')

2401 terms = list(grammar.Terminals)

2402 terms.sort()

2403 for term in terms:

2404 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))

2405

2406 debuglog.info('')

2407 debuglog.info('Nonterminals, with rules where they appear')

2408 debuglog.info('')

2409 nonterms = list(grammar.Nonterminals)

2410 nonterms.sort()

2411 for nonterm in nonterms:

2412 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))

2413 debuglog.info('')

2414

2415 if check_recursion:

2416 unreachable = grammar.find_unreachable()

2417 for u in unreachable:

2418 errorlog.warning('Symbol %r is unreachable', u)

2419

2420 infinite = grammar.infinite_cycles()

2421 for inf in infinite:

2422 errorlog.error('Infinite recursion detected for symbol %r', inf)

2423 errors = True

2424

2425 unused_prec = grammar.unused_precedence()

2426 for term, assoc in unused_prec:

2427 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)

2428 errors = True

2429

2430 if errors:

2431 raise YaccError('Unable to build parser')

2432

2433 # Run the LRTable on the grammar

2434 lr = LRTable(grammar, debuglog)

2435

2436 if debug:

2437 num_sr = len(lr.sr_conflicts)

2438

2439 # Report shift/reduce and reduce/reduce conflicts

2440 if num_sr == 1:

2441 errorlog.warning('1 shift/reduce conflict')

2442 elif num_sr > 1:

2443 errorlog.warning('%d shift/reduce conflicts', num_sr)

2444

2445 num_rr = len(lr.rr_conflicts)

2446 if num_rr == 1:

2447 errorlog.warning('1 reduce/reduce conflict')

2448 elif num_rr > 1:

2449 errorlog.warning('%d reduce/reduce conflicts', num_rr)

2450

2451 # Write out conflicts to the output file

2452 if debug and (lr.sr_conflicts or lr.rr_conflicts):

2453 debuglog.warning('')

2454 debuglog.warning('Conflicts:')

2455 debuglog.warning('')

2456

2457 for state, tok, resolution in lr.sr_conflicts:

2458 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)

2459

2460 already_reported = set()

2461 for state, rule, rejected in lr.rr_conflicts:

2462 if (state, id(rule), id(rejected)) in already_reported:

2463 continue

2464 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)

2465 debuglog.warning('rejected rule (%s) in state %d', rejected, state)

2466 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)

2467 errorlog.warning('rejected rule (%s) in state %d', rejected, state)

2468 already_reported.add((state, id(rule), id(rejected)))

2469

2470 warned_never = []

2471 for state, rule, rejected in lr.rr_conflicts:

2472 if not rejected.reduced and (rejected not in warned_never):

2473 debuglog.warning('Rule (%s) is never reduced', rejected)

2474 errorlog.warning('Rule (%s) is never reduced', rejected)

2475 warned_never.append(rejected)

2476

2477 # Build the parser

2478 lr.bind_callables(pinfo.pdict)

2479 parser = LRParser(lr, pinfo.error_func)

2480

2481 parse = parser.parse

2482 return parser