1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2017
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammer is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62import re
63import types
64import sys
65import os.path
66import inspect
67import base64
68import warnings
69
70__version__ = '3.10'
71__tabversion__ = '3.10'
72
73#-----------------------------------------------------------------------------
74# === User configurable parameters ===
75#
76# Change these to modify the default behavior of yacc (if you wish)
77#-----------------------------------------------------------------------------
78
79yaccdebug = True # Debugging mode. If set, yacc generates a
80 # a 'parser.out' file in the current directory
81
82debug_file = 'parser.out' # Default name of the debugging file
83tab_module = 'parsetab' # Default name of the table module
84default_lr = 'LALR' # Default LR table generation method
85
86error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87
88yaccdevel = False # Set to True if developing yacc. This turns off optimized
89 # implementations of certain functions.
90
91resultlimit = 40 # Size limit of results when running in debug mode.
92
93pickle_protocol = 0 # Protocol to use when writing pickle files
94
95# String type-checking compatibility
96if sys.version_info[0] < 3:
97 string_types = basestring
98else:
99 string_types = str
100
101MAXINT = sys.maxsize
102
103# This object is a stand-in for a logging object created by the
104# logging module. PLY will use this by default to create things
105# such as the parser.out file. If a user wants more detailed
106# information, they can create their own logging object and pass
107# it into PLY.
108
109class PlyLogger(object):
110 def __init__(self, f):
111 self.f = f
112
113 def debug(self, msg, *args, **kwargs):
114 self.f.write((msg % args) + '\n')
115
116 info = debug
117
118 def warning(self, msg, *args, **kwargs):
119 self.f.write('WARNING: ' + (msg % args) + '\n')
120
121 def error(self, msg, *args, **kwargs):
122 self.f.write('ERROR: ' + (msg % args) + '\n')
123
124 critical = debug
125
126# Null logger is used when no output is generated. Does nothing.
127class NullLogger(object):
128 def __getattribute__(self, name):
129 return self
130
131 def __call__(self, *args, **kwargs):
132 return self
133
134# Exception raised for yacc-related errors
135class YaccError(Exception):
136 pass
137
138# Format the result message that the parser produces when running in debug mode.
139def format_result(r):
140 repr_str = repr(r)
141 if '\n' in repr_str:
142 repr_str = repr(repr_str)
143 if len(repr_str) > resultlimit:
144 repr_str = repr_str[:resultlimit] + ' ...'
145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
146 return result
147
148# Format stack entries when the parser is running in debug mode
149def format_stack_entry(r):
150 repr_str = repr(r)
151 if '\n' in repr_str:
152 repr_str = repr(repr_str)
153 if len(repr_str) < 16:
154 return repr_str
155 else:
156 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
157
158# Panic mode error recovery support. This feature is being reworked--much of the
159# code here is to offer a deprecation/backwards compatible transition
160
161_errok = None
162_token = None
163_restart = None
164_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
165Instead, invoke the methods on the associated parser instance:
166
167 def p_error(p):
168 ...
169 # Use parser.errok(), parser.token(), parser.restart()
170 ...
171
172 parser = yacc.yacc()
173'''
174
175def errok():
176 warnings.warn(_warnmsg)
177 return _errok()
178
179def restart():
180 warnings.warn(_warnmsg)
181 return _restart()
182
183def token():
184 warnings.warn(_warnmsg)
185 return _token()
186
187# Utility function to call the p_error() function with some deprecation hacks
188def call_errorfunc(errorfunc, token, parser):
189 global _errok, _token, _restart
190 _errok = parser.errok
191 _token = parser.token
192 _restart = parser.restart
193 r = errorfunc(token)
194 try:
195 del _errok, _token, _restart
196 except NameError:
197 pass
198 return r
199
200#-----------------------------------------------------------------------------
201# === LR Parsing Engine ===
202#
203# The following classes are used for the LR parser itself. These are not
204# used during table construction and are independent of the actual LR
205# table generation algorithm
206#-----------------------------------------------------------------------------
207
208# This class is used to hold non-terminal grammar symbols during parsing.
209# It normally has the following attributes set:
210# .type = Grammar symbol type
211# .value = Symbol value
212# .lineno = Starting line number
213# .endlineno = Ending line number (optional, set automatically)
214# .lexpos = Starting lex position
215# .endlexpos = Ending lex position (optional, set automatically)
216
217class YaccSymbol:
218 def __str__(self):
219 return self.type
220
221 def __repr__(self):
222 return str(self)
223
224# This class is a wrapper around the objects actually passed to each
225# grammar rule. Index lookup and assignment actually assign the
226# .value attribute of the underlying YaccSymbol object.
227# The lineno() method returns the line number of a given
228# item (or 0 if not defined). The linespan() method returns
229# a tuple of (startline,endline) representing the range of lines
230# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
231# representing the range of positional information for a symbol.
232
233class YaccProduction:
234 def __init__(self, s, stack=None):
235 self.slice = s
236 self.stack = stack
237 self.lexer = None
238 self.parser = None
239
240 def __getitem__(self, n):
241 if isinstance(n, slice):
242 return [s.value for s in self.slice[n]]
243 elif n >= 0:
244 return self.slice[n].value
245 else:
246 return self.stack[n].value
247
248 def __setitem__(self, n, v):
249 self.slice[n].value = v
250
251 def __getslice__(self, i, j):
252 return [s.value for s in self.slice[i:j]]
253
254 def __len__(self):
255 return len(self.slice)
256
257 def lineno(self, n):
258 return getattr(self.slice[n], 'lineno', 0)
259
260 def set_lineno(self, n, lineno):
261 self.slice[n].lineno = lineno
262
263 def linespan(self, n):
264 startline = getattr(self.slice[n], 'lineno', 0)
265 endline = getattr(self.slice[n], 'endlineno', startline)
266 return startline, endline
267
268 def lexpos(self, n):
269 return getattr(self.slice[n], 'lexpos', 0)
270
271 def lexspan(self, n):
272 startpos = getattr(self.slice[n], 'lexpos', 0)
273 endpos = getattr(self.slice[n], 'endlexpos', startpos)
274 return startpos, endpos
275
276 def error(self):
277 raise SyntaxError
278
279# -----------------------------------------------------------------------------
280# == LRParser ==
281#
282# The LR Parsing engine.
283# -----------------------------------------------------------------------------
284
285class LRParser:
286 def __init__(self, lrtab, errorf):
287 self.productions = lrtab.lr_productions
288 self.action = lrtab.lr_action
289 self.goto = lrtab.lr_goto
290 self.errorfunc = errorf
291 self.set_defaulted_states()
292 self.errorok = True
293
294 def errok(self):
295 self.errorok = True
296
297 def restart(self):
298 del self.statestack[:]
299 del self.symstack[:]
300 sym = YaccSymbol()
301 sym.type = '$end'
302 self.symstack.append(sym)
303 self.statestack.append(0)
304
305 # Defaulted state support.
306 # This method identifies parser states where there is only one possible reduction action.
307 # For such states, the parser can make a choose to make a rule reduction without consuming
308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
309 # certain kinds of advanced parsing situations where the lexer and parser interact with
310 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
311 #
312 # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
313 def set_defaulted_states(self):
314 self.defaulted_states = {}
315 for state, actions in self.action.items():
316 rules = list(actions.values())
317 if len(rules) == 1 and rules[0] < 0:
318 self.defaulted_states[state] = rules[0]
319
320 def disable_defaulted_states(self):
321 self.defaulted_states = {}
322
323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
324 if debug or yaccdevel:
325 if isinstance(debug, int):
326 debug = PlyLogger(sys.stderr)
327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
328 elif tracking:
329 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
330 else:
331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
332
333
334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
335 # parsedebug().
336 #
337 # This is the debugging enabled version of parse(). All changes made to the
338 # parsing engine should be made here. Optimized versions of this function
339 # are automatically created by the ply/ygen.py script. This script cuts out
340 # sections enclosed in markers such as this:
341 #
342 # #--! DEBUG
343 # statements
344 # #--! DEBUG
345 #
346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
347
348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
349 #--! parsedebug-start
350 lookahead = None # Current lookahead symbol
351 lookaheadstack = [] # Stack of lookahead symbols
352 actions = self.action # Local reference to action table (to avoid lookup on self.)
353 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
354 prod = self.productions # Local reference to production list (to avoid lookup on self.)
355 defaulted_states = self.defaulted_states # Local reference to defaulted states
356 pslice = YaccProduction(None) # Production object passed to grammar rules
357 errorcount = 0 # Used during error recovery
358
359 #--! DEBUG
360 debug.info('PLY: PARSE DEBUG START')
361 #--! DEBUG
362
363 # If no lexer was given, we will try to use the lex module
364 if not lexer:
365 from . import lex
366 lexer = lex.lexer
367
368 # Set up the lexer and parser objects on pslice
369 pslice.lexer = lexer
370 pslice.parser = self
371
372 # If input was supplied, pass to lexer
373 if input is not None:
374 lexer.input(input)
375
376 if tokenfunc is None:
377 # Tokenize function
378 get_token = lexer.token
379 else:
380 get_token = tokenfunc
381
382 # Set the parser() token method (sometimes used in error recovery)
383 self.token = get_token
384
385 # Set up the state and symbol stacks
386
387 statestack = [] # Stack of parsing states
388 self.statestack = statestack
389 symstack = [] # Stack of grammar symbols
390 self.symstack = symstack
391
392 pslice.stack = symstack # Put in the production
393 errtoken = None # Err token
394
395 # The start state is assumed to be (0,$end)
396
397 statestack.append(0)
398 sym = YaccSymbol()
399 sym.type = '$end'
400 symstack.append(sym)
401 state = 0
402 while True:
403 # Get the next symbol on the input. If a lookahead symbol
404 # is already set, we just use that. Otherwise, we'll pull
405 # the next token off of the lookaheadstack or from the lexer
406
407 #--! DEBUG
408 debug.debug('')
409 debug.debug('State : %s', state)
410 #--! DEBUG
411
412 if state not in defaulted_states:
413 if not lookahead:
414 if not lookaheadstack:
415 lookahead = get_token() # Get the next token
416 else:
417 lookahead = lookaheadstack.pop()
418 if not lookahead:
419 lookahead = YaccSymbol()
420 lookahead.type = '$end'
421
422 # Check the action table
423 ltype = lookahead.type
424 t = actions[state].get(ltype)
425 else:
426 t = defaulted_states[state]
427 #--! DEBUG
428 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
429 #--! DEBUG
430
431 #--! DEBUG
432 debug.debug('Stack : %s',
433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
434 #--! DEBUG
435
436 if t is not None:
437 if t > 0:
438 # shift a symbol on the stack
439 statestack.append(t)
440 state = t
441
442 #--! DEBUG
443 debug.debug('Action : Shift and goto state %s', t)
444 #--! DEBUG
445
446 symstack.append(lookahead)
447 lookahead = None
448
449 # Decrease error count on successful shift
450 if errorcount:
451 errorcount -= 1
452 continue
453
454 if t < 0:
455 # reduce a symbol on the stack, emit a production
456 p = prod[-t]
457 pname = p.name
458 plen = p.len
459
460 # Get production function
461 sym = YaccSymbol()
462 sym.type = pname # Production name
463 sym.value = None
464
465 #--! DEBUG
466 if plen:
467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
469 goto[statestack[-1-plen]][pname])
470 else:
471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
472 goto[statestack[-1]][pname])
473
474 #--! DEBUG
475
476 if plen:
477 targ = symstack[-plen-1:]
478 targ[0] = sym
479
480 #--! TRACKING
481 if tracking:
482 t1 = targ[1]
483 sym.lineno = t1.lineno
484 sym.lexpos = t1.lexpos
485 t1 = targ[-1]
486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
488 #--! TRACKING
489
490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
491 # The code enclosed in this section is duplicated
492 # below as a performance optimization. Make sure
493 # changes get made in both locations.
494
495 pslice.slice = targ
496
497 try:
498 # Call the grammar rule with our special slice object
499 del symstack[-plen:]
500 self.state = state
501 p.callable(pslice)
502 del statestack[-plen:]
503 #--! DEBUG
504 debug.info('Result : %s', format_result(pslice[0]))
505 #--! DEBUG
506 symstack.append(sym)
507 state = goto[statestack[-1]][pname]
508 statestack.append(state)
509 except SyntaxError:
510 # If an error was set. Enter error recovery state
511 lookaheadstack.append(lookahead) # Save the current lookahead token
512 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
513 statestack.pop() # Pop back one state (before the reduce)
514 state = statestack[-1]
515 sym.type = 'error'
516 sym.value = 'error'
517 lookahead = sym
518 errorcount = error_count
519 self.errorok = False
520
521 continue
522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
523
524 else:
525
526 #--! TRACKING
527 if tracking:
528 sym.lineno = lexer.lineno
529 sym.lexpos = lexer.lexpos
530 #--! TRACKING
531
532 targ = [sym]
533
534 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
535 # The code enclosed in this section is duplicated
536 # above as a performance optimization. Make sure
537 # changes get made in both locations.
538
539 pslice.slice = targ
540
541 try:
542 # Call the grammar rule with our special slice object
543 self.state = state
544 p.callable(pslice)
545 #--! DEBUG
546 debug.info('Result : %s', format_result(pslice[0]))
547 #--! DEBUG
548 symstack.append(sym)
549 state = goto[statestack[-1]][pname]
550 statestack.append(state)
551 except SyntaxError:
552 # If an error was set. Enter error recovery state
553 lookaheadstack.append(lookahead) # Save the current lookahead token
554 statestack.pop() # Pop back one state (before the reduce)
555 state = statestack[-1]
556 sym.type = 'error'
557 sym.value = 'error'
558 lookahead = sym
559 errorcount = error_count
560 self.errorok = False
561
562 continue
563 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
564
565 if t == 0:
566 n = symstack[-1]
567 result = getattr(n, 'value', None)
568 #--! DEBUG
569 debug.info('Done : Returning %s', format_result(result))
570 debug.info('PLY: PARSE DEBUG END')
571 #--! DEBUG
572 return result
573
574 if t is None:
575
576 #--! DEBUG
577 debug.error('Error : %s',
578 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
579 #--! DEBUG
580
581 # We have some kind of parsing error here. To handle
582 # this, we are going to push the current token onto
583 # the tokenstack and replace it with an 'error' token.
584 # If there are any synchronization rules, they may
585 # catch it.
586 #
587 # In addition to pushing the error token, we call call
588 # the user defined p_error() function if this is the
589 # first syntax error. This function is only called if
590 # errorcount == 0.
591 if errorcount == 0 or self.errorok:
592 errorcount = error_count
593 self.errorok = False
594 errtoken = lookahead
595 if errtoken.type == '$end':
596 errtoken = None # End of file!
597 if self.errorfunc:
598 if errtoken and not hasattr(errtoken, 'lexer'):
599 errtoken.lexer = lexer
600 self.state = state
601 tok = call_errorfunc(self.errorfunc, errtoken, self)
602 if self.errorok:
603 # User must have done some kind of panic
604 # mode recovery on their own. The
605 # returned token is the next lookahead
606 lookahead = tok
607 errtoken = None
608 continue
609 else:
610 if errtoken:
611 if hasattr(errtoken, 'lineno'):
612 lineno = lookahead.lineno
613 else:
614 lineno = 0
615 if lineno:
616 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
617 else:
618 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
619 else:
620 sys.stderr.write('yacc: Parse error in input. EOF\n')
621 return
622
623 else:
624 errorcount = error_count
625
626 # case 1: the statestack only has 1 entry on it. If we're in this state, the
627 # entire parse has been rolled back and we're completely hosed. The token is
628 # discarded and we just keep going.
629
630 if len(statestack) <= 1 and lookahead.type != '$end':
631 lookahead = None
632 errtoken = None
633 state = 0
634 # Nuke the pushback stack
635 del lookaheadstack[:]
636 continue
637
638 # case 2: the statestack has a couple of entries on it, but we're
639 # at the end of the file. nuke the top entry and generate an error token
640
641 # Start nuking entries on the stack
642 if lookahead.type == '$end':
643 # Whoa. We're really hosed here. Bail out
644 return
645
646 if lookahead.type != 'error':
647 sym = symstack[-1]
648 if sym.type == 'error':
649 # Hmmm. Error is on top of stack, we'll just nuke input
650 # symbol and continue
651 #--! TRACKING
652 if tracking:
653 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
654 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
655 #--! TRACKING
656 lookahead = None
657 continue
658
659 # Create the error symbol for the first time and make it the new lookahead symbol
660 t = YaccSymbol()
661 t.type = 'error'
662
663 if hasattr(lookahead, 'lineno'):
664 t.lineno = t.endlineno = lookahead.lineno
665 if hasattr(lookahead, 'lexpos'):
666 t.lexpos = t.endlexpos = lookahead.lexpos
667 t.value = lookahead
668 lookaheadstack.append(lookahead)
669 lookahead = t
670 else:
671 sym = symstack.pop()
672 #--! TRACKING
673 if tracking:
674 lookahead.lineno = sym.lineno
675 lookahead.lexpos = sym.lexpos
676 #--! TRACKING
677 statestack.pop()
678 state = statestack[-1]
679
680 continue
681
682 # Call an error function here
683 raise RuntimeError('yacc: internal parser error!!!\n')
684
685 #--! parsedebug-end
686
687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
688 # parseopt().
689 #
690 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
691 # This code is automatically generated by the ply/ygen.py script. Make
692 # changes to the parsedebug() method instead.
693 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
694
695 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
696 #--! parseopt-start
697 lookahead = None # Current lookahead symbol
698 lookaheadstack = [] # Stack of lookahead symbols
699 actions = self.action # Local reference to action table (to avoid lookup on self.)
700 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
701 prod = self.productions # Local reference to production list (to avoid lookup on self.)
702 defaulted_states = self.defaulted_states # Local reference to defaulted states
703 pslice = YaccProduction(None) # Production object passed to grammar rules
704 errorcount = 0 # Used during error recovery
705
706
707 # If no lexer was given, we will try to use the lex module
708 if not lexer:
709 from . import lex
710 lexer = lex.lexer
711
712 # Set up the lexer and parser objects on pslice
713 pslice.lexer = lexer
714 pslice.parser = self
715
716 # If input was supplied, pass to lexer
717 if input is not None:
718 lexer.input(input)
719
720 if tokenfunc is None:
721 # Tokenize function
722 get_token = lexer.token
723 else:
724 get_token = tokenfunc
725
726 # Set the parser() token method (sometimes used in error recovery)
727 self.token = get_token
728
729 # Set up the state and symbol stacks
730
731 statestack = [] # Stack of parsing states
732 self.statestack = statestack
733 symstack = [] # Stack of grammar symbols
734 self.symstack = symstack
735
736 pslice.stack = symstack # Put in the production
737 errtoken = None # Err token
738
739 # The start state is assumed to be (0,$end)
740
741 statestack.append(0)
742 sym = YaccSymbol()
743 sym.type = '$end'
744 symstack.append(sym)
745 state = 0
746 while True:
747 # Get the next symbol on the input. If a lookahead symbol
748 # is already set, we just use that. Otherwise, we'll pull
749 # the next token off of the lookaheadstack or from the lexer
750
751
752 if state not in defaulted_states:
753 if not lookahead:
754 if not lookaheadstack:
755 lookahead = get_token() # Get the next token
756 else:
757 lookahead = lookaheadstack.pop()
758 if not lookahead:
759 lookahead = YaccSymbol()
760 lookahead.type = '$end'
761
762 # Check the action table
763 ltype = lookahead.type
764 t = actions[state].get(ltype)
765 else:
766 t = defaulted_states[state]
767
768
769 if t is not None:
770 if t > 0:
771 # shift a symbol on the stack
772 statestack.append(t)
773 state = t
774
775
776 symstack.append(lookahead)
777 lookahead = None
778
779 # Decrease error count on successful shift
780 if errorcount:
781 errorcount -= 1
782 continue
783
784 if t < 0:
785 # reduce a symbol on the stack, emit a production
786 p = prod[-t]
787 pname = p.name
788 plen = p.len
789
790 # Get production function
791 sym = YaccSymbol()
792 sym.type = pname # Production name
793 sym.value = None
794
795
796 if plen:
797 targ = symstack[-plen-1:]
798 targ[0] = sym
799
800 #--! TRACKING
801 if tracking:
802 t1 = targ[1]
803 sym.lineno = t1.lineno
804 sym.lexpos = t1.lexpos
805 t1 = targ[-1]
806 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
807 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
808 #--! TRACKING
809
810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
811 # The code enclosed in this section is duplicated
812 # below as a performance optimization. Make sure
813 # changes get made in both locations.
814
815 pslice.slice = targ
816
817 try:
818 # Call the grammar rule with our special slice object
819 del symstack[-plen:]
820 self.state = state
821 p.callable(pslice)
822 del statestack[-plen:]
823 symstack.append(sym)
824 state = goto[statestack[-1]][pname]
825 statestack.append(state)
826 except SyntaxError:
827 # If an error was set. Enter error recovery state
828 lookaheadstack.append(lookahead) # Save the current lookahead token
829 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
830 statestack.pop() # Pop back one state (before the reduce)
831 state = statestack[-1]
832 sym.type = 'error'
833 sym.value = 'error'
834 lookahead = sym
835 errorcount = error_count
836 self.errorok = False
837
838 continue
839 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
840
841 else:
842
843 #--! TRACKING
844 if tracking:
845 sym.lineno = lexer.lineno
846 sym.lexpos = lexer.lexpos
847 #--! TRACKING
848
849 targ = [sym]
850
851 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
852 # The code enclosed in this section is duplicated
853 # above as a performance optimization. Make sure
854 # changes get made in both locations.
855
856 pslice.slice = targ
857
858 try:
859 # Call the grammar rule with our special slice object
860 self.state = state
861 p.callable(pslice)
862 symstack.append(sym)
863 state = goto[statestack[-1]][pname]
864 statestack.append(state)
865 except SyntaxError:
866 # If an error was set. Enter error recovery state
867 lookaheadstack.append(lookahead) # Save the current lookahead token
868 statestack.pop() # Pop back one state (before the reduce)
869 state = statestack[-1]
870 sym.type = 'error'
871 sym.value = 'error'
872 lookahead = sym
873 errorcount = error_count
874 self.errorok = False
875
876 continue
877 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
878
879 if t == 0:
880 n = symstack[-1]
881 result = getattr(n, 'value', None)
882 return result
883
884 if t is None:
885
886
887 # We have some kind of parsing error here. To handle
888 # this, we are going to push the current token onto
889 # the tokenstack and replace it with an 'error' token.
890 # If there are any synchronization rules, they may
891 # catch it.
892 #
893 # In addition to pushing the error token, we call call
894 # the user defined p_error() function if this is the
895 # first syntax error. This function is only called if
896 # errorcount == 0.
897 if errorcount == 0 or self.errorok:
898 errorcount = error_count
899 self.errorok = False
900 errtoken = lookahead
901 if errtoken.type == '$end':
902 errtoken = None # End of file!
903 if self.errorfunc:
904 if errtoken and not hasattr(errtoken, 'lexer'):
905 errtoken.lexer = lexer
906 self.state = state
907 tok = call_errorfunc(self.errorfunc, errtoken, self)
908 if self.errorok:
909 # User must have done some kind of panic
910 # mode recovery on their own. The
911 # returned token is the next lookahead
912 lookahead = tok
913 errtoken = None
914 continue
915 else:
916 if errtoken:
917 if hasattr(errtoken, 'lineno'):
918 lineno = lookahead.lineno
919 else:
920 lineno = 0
921 if lineno:
922 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
923 else:
924 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
925 else:
926 sys.stderr.write('yacc: Parse error in input. EOF\n')
927 return
928
929 else:
930 errorcount = error_count
931
932 # case 1: the statestack only has 1 entry on it. If we're in this state, the
933 # entire parse has been rolled back and we're completely hosed. The token is
934 # discarded and we just keep going.
935
936 if len(statestack) <= 1 and lookahead.type != '$end':
937 lookahead = None
938 errtoken = None
939 state = 0
940 # Nuke the pushback stack
941 del lookaheadstack[:]
942 continue
943
944 # case 2: the statestack has a couple of entries on it, but we're
945 # at the end of the file. nuke the top entry and generate an error token
946
947 # Start nuking entries on the stack
948 if lookahead.type == '$end':
949 # Whoa. We're really hosed here. Bail out
950 return
951
952 if lookahead.type != 'error':
953 sym = symstack[-1]
954 if sym.type == 'error':
955 # Hmmm. Error is on top of stack, we'll just nuke input
956 # symbol and continue
957 #--! TRACKING
958 if tracking:
959 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
960 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
961 #--! TRACKING
962 lookahead = None
963 continue
964
965 # Create the error symbol for the first time and make it the new lookahead symbol
966 t = YaccSymbol()
967 t.type = 'error'
968
969 if hasattr(lookahead, 'lineno'):
970 t.lineno = t.endlineno = lookahead.lineno
971 if hasattr(lookahead, 'lexpos'):
972 t.lexpos = t.endlexpos = lookahead.lexpos
973 t.value = lookahead
974 lookaheadstack.append(lookahead)
975 lookahead = t
976 else:
977 sym = symstack.pop()
978 #--! TRACKING
979 if tracking:
980 lookahead.lineno = sym.lineno
981 lookahead.lexpos = sym.lexpos
982 #--! TRACKING
983 statestack.pop()
984 state = statestack[-1]
985
986 continue
987
988 # Call an error function here
989 raise RuntimeError('yacc: internal parser error!!!\n')
990
991 #--! parseopt-end
992
993 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
994 # parseopt_notrack().
995 #
996 # Optimized version of parseopt() with line number tracking removed.
997 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
998 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
999 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1000
1001 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
1002 #--! parseopt-notrack-start
1003 lookahead = None # Current lookahead symbol
1004 lookaheadstack = [] # Stack of lookahead symbols
1005 actions = self.action # Local reference to action table (to avoid lookup on self.)
1006 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
1007 prod = self.productions # Local reference to production list (to avoid lookup on self.)
1008 defaulted_states = self.defaulted_states # Local reference to defaulted states
1009 pslice = YaccProduction(None) # Production object passed to grammar rules
1010 errorcount = 0 # Used during error recovery
1011
1012
1013 # If no lexer was given, we will try to use the lex module
1014 if not lexer:
1015 from . import lex
1016 lexer = lex.lexer
1017
1018 # Set up the lexer and parser objects on pslice
1019 pslice.lexer = lexer
1020 pslice.parser = self
1021
1022 # If input was supplied, pass to lexer
1023 if input is not None:
1024 lexer.input(input)
1025
1026 if tokenfunc is None:
1027 # Tokenize function
1028 get_token = lexer.token
1029 else:
1030 get_token = tokenfunc
1031
1032 # Set the parser() token method (sometimes used in error recovery)
1033 self.token = get_token
1034
1035 # Set up the state and symbol stacks
1036
1037 statestack = [] # Stack of parsing states
1038 self.statestack = statestack
1039 symstack = [] # Stack of grammar symbols
1040 self.symstack = symstack
1041
1042 pslice.stack = symstack # Put in the production
1043 errtoken = None # Err token
1044
1045 # The start state is assumed to be (0,$end)
1046
1047 statestack.append(0)
1048 sym = YaccSymbol()
1049 sym.type = '$end'
1050 symstack.append(sym)
1051 state = 0
1052 while True:
1053 # Get the next symbol on the input. If a lookahead symbol
1054 # is already set, we just use that. Otherwise, we'll pull
1055 # the next token off of the lookaheadstack or from the lexer
1056
1057
1058 if state not in defaulted_states:
1059 if not lookahead:
1060 if not lookaheadstack:
1061 lookahead = get_token() # Get the next token
1062 else:
1063 lookahead = lookaheadstack.pop()
1064 if not lookahead:
1065 lookahead = YaccSymbol()
1066 lookahead.type = '$end'
1067
1068 # Check the action table
1069 ltype = lookahead.type
1070 t = actions[state].get(ltype)
1071 else:
1072 t = defaulted_states[state]
1073
1074
1075 if t is not None:
1076 if t > 0:
1077 # shift a symbol on the stack
1078 statestack.append(t)
1079 state = t
1080
1081
1082 symstack.append(lookahead)
1083 lookahead = None
1084
1085 # Decrease error count on successful shift
1086 if errorcount:
1087 errorcount -= 1
1088 continue
1089
1090 if t < 0:
1091 # reduce a symbol on the stack, emit a production
1092 p = prod[-t]
1093 pname = p.name
1094 plen = p.len
1095
1096 # Get production function
1097 sym = YaccSymbol()
1098 sym.type = pname # Production name
1099 sym.value = None
1100
1101
1102 if plen:
1103 targ = symstack[-plen-1:]
1104 targ[0] = sym
1105
1106
1107 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1108 # The code enclosed in this section is duplicated
1109 # below as a performance optimization. Make sure
1110 # changes get made in both locations.
1111
1112 pslice.slice = targ
1113
1114 try:
1115 # Call the grammar rule with our special slice object
1116 del symstack[-plen:]
1117 self.state = state
1118 p.callable(pslice)
1119 del statestack[-plen:]
1120 symstack.append(sym)
1121 state = goto[statestack[-1]][pname]
1122 statestack.append(state)
1123 except SyntaxError:
1124 # If an error was set. Enter error recovery state
1125 lookaheadstack.append(lookahead) # Save the current lookahead token
1126 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
1127 statestack.pop() # Pop back one state (before the reduce)
1128 state = statestack[-1]
1129 sym.type = 'error'
1130 sym.value = 'error'
1131 lookahead = sym
1132 errorcount = error_count
1133 self.errorok = False
1134
1135 continue
1136 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1137
1138 else:
1139
1140
1141 targ = [sym]
1142
1143 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1144 # The code enclosed in this section is duplicated
1145 # above as a performance optimization. Make sure
1146 # changes get made in both locations.
1147
1148 pslice.slice = targ
1149
1150 try:
1151 # Call the grammar rule with our special slice object
1152 self.state = state
1153 p.callable(pslice)
1154 symstack.append(sym)
1155 state = goto[statestack[-1]][pname]
1156 statestack.append(state)
1157 except SyntaxError:
1158 # If an error was set. Enter error recovery state
1159 lookaheadstack.append(lookahead) # Save the current lookahead token
1160 statestack.pop() # Pop back one state (before the reduce)
1161 state = statestack[-1]
1162 sym.type = 'error'
1163 sym.value = 'error'
1164 lookahead = sym
1165 errorcount = error_count
1166 self.errorok = False
1167
1168 continue
1169 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1170
1171 if t == 0:
1172 n = symstack[-1]
1173 result = getattr(n, 'value', None)
1174 return result
1175
1176 if t is None:
1177
1178
1179 # We have some kind of parsing error here. To handle
1180 # this, we are going to push the current token onto
1181 # the tokenstack and replace it with an 'error' token.
1182 # If there are any synchronization rules, they may
1183 # catch it.
1184 #
1185 # In addition to pushing the error token, we call call
1186 # the user defined p_error() function if this is the
1187 # first syntax error. This function is only called if
1188 # errorcount == 0.
1189 if errorcount == 0 or self.errorok:
1190 errorcount = error_count
1191 self.errorok = False
1192 errtoken = lookahead
1193 if errtoken.type == '$end':
1194 errtoken = None # End of file!
1195 if self.errorfunc:
1196 if errtoken and not hasattr(errtoken, 'lexer'):
1197 errtoken.lexer = lexer
1198 self.state = state
1199 tok = call_errorfunc(self.errorfunc, errtoken, self)
1200 if self.errorok:
1201 # User must have done some kind of panic
1202 # mode recovery on their own. The
1203 # returned token is the next lookahead
1204 lookahead = tok
1205 errtoken = None
1206 continue
1207 else:
1208 if errtoken:
1209 if hasattr(errtoken, 'lineno'):
1210 lineno = lookahead.lineno
1211 else:
1212 lineno = 0
1213 if lineno:
1214 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1215 else:
1216 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1217 else:
1218 sys.stderr.write('yacc: Parse error in input. EOF\n')
1219 return
1220
1221 else:
1222 errorcount = error_count
1223
1224 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1225 # entire parse has been rolled back and we're completely hosed. The token is
1226 # discarded and we just keep going.
1227
1228 if len(statestack) <= 1 and lookahead.type != '$end':
1229 lookahead = None
1230 errtoken = None
1231 state = 0
1232 # Nuke the pushback stack
1233 del lookaheadstack[:]
1234 continue
1235
1236 # case 2: the statestack has a couple of entries on it, but we're
1237 # at the end of the file. nuke the top entry and generate an error token
1238
1239 # Start nuking entries on the stack
1240 if lookahead.type == '$end':
1241 # Whoa. We're really hosed here. Bail out
1242 return
1243
1244 if lookahead.type != 'error':
1245 sym = symstack[-1]
1246 if sym.type == 'error':
1247 # Hmmm. Error is on top of stack, we'll just nuke input
1248 # symbol and continue
1249 lookahead = None
1250 continue
1251
1252 # Create the error symbol for the first time and make it the new lookahead symbol
1253 t = YaccSymbol()
1254 t.type = 'error'
1255
1256 if hasattr(lookahead, 'lineno'):
1257 t.lineno = t.endlineno = lookahead.lineno
1258 if hasattr(lookahead, 'lexpos'):
1259 t.lexpos = t.endlexpos = lookahead.lexpos
1260 t.value = lookahead
1261 lookaheadstack.append(lookahead)
1262 lookahead = t
1263 else:
1264 sym = symstack.pop()
1265 statestack.pop()
1266 state = statestack[-1]
1267
1268 continue
1269
1270 # Call an error function here
1271 raise RuntimeError('yacc: internal parser error!!!\n')
1272
1273 #--! parseopt-notrack-end
1274
1275# -----------------------------------------------------------------------------
1276# === Grammar Representation ===
1277#
1278# The following functions, classes, and variables are used to represent and
1279# manipulate the rules that make up a grammar.
1280# -----------------------------------------------------------------------------
1281
1282# regex matching identifiers
1283_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1284
1285# -----------------------------------------------------------------------------
1286# class Production:
1287#
1288# This class stores the raw information about a single production or grammar rule.
1289# A grammar rule refers to a specification such as this:
1290#
1291# expr : expr PLUS term
1292#
1293# Here are the basic attributes defined on all productions
1294#
1295# name - Name of the production. For example 'expr'
1296# prod - A list of symbols on the right side ['expr','PLUS','term']
1297# prec - Production precedence level
1298# number - Production number.
1299# func - Function that executes on reduce
1300# file - File where production function is defined
1301# lineno - Line number where production function is defined
1302#
1303# The following attributes are defined or optional.
1304#
1305# len - Length of the production (number of symbols on right hand side)
1306# usyms - Set of unique symbols found in the production
1307# -----------------------------------------------------------------------------
1308
1309class Production(object):
1310 reduced = 0
1311 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1312 self.name = name
1313 self.prod = tuple(prod)
1314 self.number = number
1315 self.func = func
1316 self.callable = None
1317 self.file = file
1318 self.line = line
1319 self.prec = precedence
1320
1321 # Internal settings used during table construction
1322
1323 self.len = len(self.prod) # Length of the production
1324
1325 # Create a list of unique production symbols used in the production
1326 self.usyms = []
1327 for s in self.prod:
1328 if s not in self.usyms:
1329 self.usyms.append(s)
1330
1331 # List of all LR items for the production
1332 self.lr_items = []
1333 self.lr_next = None
1334
1335 # Create a string representation
1336 if self.prod:
1337 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1338 else:
1339 self.str = '%s -> <empty>' % self.name
1340
1341 def __str__(self):
1342 return self.str
1343
1344 def __repr__(self):
1345 return 'Production(' + str(self) + ')'
1346
1347 def __len__(self):
1348 return len(self.prod)
1349
1350 def __nonzero__(self):
1351 return 1
1352
1353 def __getitem__(self, index):
1354 return self.prod[index]
1355
1356 # Return the nth lr_item from the production (or None if at the end)
1357 def lr_item(self, n):
1358 if n > len(self.prod):
1359 return None
1360 p = LRItem(self, n)
1361 # Precompute the list of productions immediately following.
1362 try:
1363 p.lr_after = Prodnames[p.prod[n+1]]
1364 except (IndexError, KeyError):
1365 p.lr_after = []
1366 try:
1367 p.lr_before = p.prod[n-1]
1368 except IndexError:
1369 p.lr_before = None
1370 return p
1371
1372 # Bind the production function name to a callable
1373 def bind(self, pdict):
1374 if self.func:
1375 self.callable = pdict[self.func]
1376
1377# This class serves as a minimal standin for Production objects when
1378# reading table data from files. It only contains information
1379# actually used by the LR parsing engine, plus some additional
1380# debugging information.
1381class MiniProduction(object):
1382 def __init__(self, str, name, len, func, file, line):
1383 self.name = name
1384 self.len = len
1385 self.func = func
1386 self.callable = None
1387 self.file = file
1388 self.line = line
1389 self.str = str
1390
1391 def __str__(self):
1392 return self.str
1393
1394 def __repr__(self):
1395 return 'MiniProduction(%s)' % self.str
1396
1397 # Bind the production function name to a callable
1398 def bind(self, pdict):
1399 if self.func:
1400 self.callable = pdict[self.func]
1401
1402
1403# -----------------------------------------------------------------------------
1404# class LRItem
1405#
1406# This class represents a specific stage of parsing a production rule. For
1407# example:
1408#
1409# expr : expr . PLUS term
1410#
1411# In the above, the "." represents the current location of the parse. Here
1412# basic attributes:
1413#
1414# name - Name of the production. For example 'expr'
1415# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1416# number - Production number.
1417#
1418# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1419# then lr_next refers to 'expr -> expr PLUS . term'
1420# lr_index - LR item index (location of the ".") in the prod list.
1421# lookaheads - LALR lookahead symbols for this item
1422# len - Length of the production (number of symbols on right hand side)
1423# lr_after - List of all productions that immediately follow
1424# lr_before - Grammar symbol immediately before
1425# -----------------------------------------------------------------------------
1426
1427class LRItem(object):
1428 def __init__(self, p, n):
1429 self.name = p.name
1430 self.prod = list(p.prod)
1431 self.number = p.number
1432 self.lr_index = n
1433 self.lookaheads = {}
1434 self.prod.insert(n, '.')
1435 self.prod = tuple(self.prod)
1436 self.len = len(self.prod)
1437 self.usyms = p.usyms
1438
1439 def __str__(self):
1440 if self.prod:
1441 s = '%s -> %s' % (self.name, ' '.join(self.prod))
1442 else:
1443 s = '%s -> <empty>' % self.name
1444 return s
1445
1446 def __repr__(self):
1447 return 'LRItem(' + str(self) + ')'
1448
1449# -----------------------------------------------------------------------------
1450# rightmost_terminal()
1451#
1452# Return the rightmost terminal from a list of symbols. Used in add_production()
1453# -----------------------------------------------------------------------------
1454def rightmost_terminal(symbols, terminals):
1455 i = len(symbols) - 1
1456 while i >= 0:
1457 if symbols[i] in terminals:
1458 return symbols[i]
1459 i -= 1
1460 return None
1461
1462# -----------------------------------------------------------------------------
1463# === GRAMMAR CLASS ===
1464#
1465# The following class represents the contents of the specified grammar along
1466# with various computed properties such as first sets, follow sets, LR items, etc.
1467# This data is used for critical parts of the table generation process later.
1468# -----------------------------------------------------------------------------
1469
1470class GrammarError(YaccError):
1471 pass
1472
1473class Grammar(object):
1474 def __init__(self, terminals):
1475 self.Productions = [None] # A list of all of the productions. The first
1476 # entry is always reserved for the purpose of
1477 # building an augmented grammar
1478
1479 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1480 # productions of that nonterminal.
1481
1482 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1483 # productions.
1484
1485 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1486 # list of the rules where they are used.
1487
1488 for term in terminals:
1489 self.Terminals[term] = []
1490
1491 self.Terminals['error'] = []
1492
1493 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1494 # of rule numbers where they are used.
1495
1496 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1497
1498 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1499
1500 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1501 # form ('right',level) or ('nonassoc', level) or ('left',level)
1502
1503 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1504 # This is only used to provide error checking and to generate
1505 # a warning about unused precedence rules.
1506
1507 self.Start = None # Starting symbol for the grammar
1508
1509
1510 def __len__(self):
1511 return len(self.Productions)
1512
1513 def __getitem__(self, index):
1514 return self.Productions[index]
1515
1516 # -----------------------------------------------------------------------------
1517 # set_precedence()
1518 #
1519 # Sets the precedence for a given terminal. assoc is the associativity such as
1520 # 'left','right', or 'nonassoc'. level is a numeric level.
1521 #
1522 # -----------------------------------------------------------------------------
1523
1524 def set_precedence(self, term, assoc, level):
1525 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1526 if term in self.Precedence:
1527 raise GrammarError('Precedence already specified for terminal %r' % term)
1528 if assoc not in ['left', 'right', 'nonassoc']:
1529 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1530 self.Precedence[term] = (assoc, level)
1531
1532 # -----------------------------------------------------------------------------
1533 # add_production()
1534 #
1535 # Given an action function, this function assembles a production rule and
1536 # computes its precedence level.
1537 #
1538 # The production rule is supplied as a list of symbols. For example,
1539 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1540 # symbols ['expr','PLUS','term'].
1541 #
1542 # Precedence is determined by the precedence of the right-most non-terminal
1543 # or the precedence of a terminal specified by %prec.
1544 #
1545 # A variety of error checks are performed to make sure production symbols
1546 # are valid and that %prec is used correctly.
1547 # -----------------------------------------------------------------------------
1548
1549 def add_production(self, prodname, syms, func=None, file='', line=0):
1550
1551 if prodname in self.Terminals:
1552 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1553 if prodname == 'error':
1554 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1555 if not _is_identifier.match(prodname):
1556 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1557
1558 # Look for literal tokens
1559 for n, s in enumerate(syms):
1560 if s[0] in "'\"":
1561 try:
1562 c = eval(s)
1563 if (len(c) > 1):
1564 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1565 (file, line, s, prodname))
1566 if c not in self.Terminals:
1567 self.Terminals[c] = []
1568 syms[n] = c
1569 continue
1570 except SyntaxError:
1571 pass
1572 if not _is_identifier.match(s) and s != '%prec':
1573 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1574
1575 # Determine the precedence level
1576 if '%prec' in syms:
1577 if syms[-1] == '%prec':
1578 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1579 if syms[-2] != '%prec':
1580 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1581 (file, line))
1582 precname = syms[-1]
1583 prodprec = self.Precedence.get(precname)
1584 if not prodprec:
1585 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1586 else:
1587 self.UsedPrecedence.add(precname)
1588 del syms[-2:] # Drop %prec from the rule
1589 else:
1590 # If no %prec, precedence is determined by the rightmost terminal symbol
1591 precname = rightmost_terminal(syms, self.Terminals)
1592 prodprec = self.Precedence.get(precname, ('right', 0))
1593
1594 # See if the rule is already in the rulemap
1595 map = '%s -> %s' % (prodname, syms)
1596 if map in self.Prodmap:
1597 m = self.Prodmap[map]
1598 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1599 'Previous definition at %s:%d' % (m.file, m.line))
1600
1601 # From this point on, everything is valid. Create a new Production instance
1602 pnumber = len(self.Productions)
1603 if prodname not in self.Nonterminals:
1604 self.Nonterminals[prodname] = []
1605
1606 # Add the production number to Terminals and Nonterminals
1607 for t in syms:
1608 if t in self.Terminals:
1609 self.Terminals[t].append(pnumber)
1610 else:
1611 if t not in self.Nonterminals:
1612 self.Nonterminals[t] = []
1613 self.Nonterminals[t].append(pnumber)
1614
1615 # Create a production and add it to the list of productions
1616 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1617 self.Productions.append(p)
1618 self.Prodmap[map] = p
1619
1620 # Add to the global productions list
1621 try:
1622 self.Prodnames[prodname].append(p)
1623 except KeyError:
1624 self.Prodnames[prodname] = [p]
1625
1626 # -----------------------------------------------------------------------------
1627 # set_start()
1628 #
1629 # Sets the starting symbol and creates the augmented grammar. Production
1630 # rule 0 is S' -> start where start is the start symbol.
1631 # -----------------------------------------------------------------------------
1632
1633 def set_start(self, start=None):
1634 if not start:
1635 start = self.Productions[1].name
1636 if start not in self.Nonterminals:
1637 raise GrammarError('start symbol %s undefined' % start)
1638 self.Productions[0] = Production(0, "S'", [start])
1639 self.Nonterminals[start].append(0)
1640 self.Start = start
1641
1642 # -----------------------------------------------------------------------------
1643 # find_unreachable()
1644 #
1645 # Find all of the nonterminal symbols that can't be reached from the starting
1646 # symbol. Returns a list of nonterminals that can't be reached.
1647 # -----------------------------------------------------------------------------
1648
1649 def find_unreachable(self):
1650
1651 # Mark all symbols that are reachable from a symbol s
1652 def mark_reachable_from(s):
1653 if s in reachable:
1654 return
1655 reachable.add(s)
1656 for p in self.Prodnames.get(s, []):
1657 for r in p.prod:
1658 mark_reachable_from(r)
1659
1660 reachable = set()
1661 mark_reachable_from(self.Productions[0].prod[0])
1662 return [s for s in self.Nonterminals if s not in reachable]
1663
1664 # -----------------------------------------------------------------------------
1665 # infinite_cycles()
1666 #
1667 # This function looks at the various parsing rules and tries to detect
1668 # infinite recursion cycles (grammar rules where there is no possible way
1669 # to derive a string of only terminals).
1670 # -----------------------------------------------------------------------------
1671
1672 def infinite_cycles(self):
1673 terminates = {}
1674
1675 # Terminals:
1676 for t in self.Terminals:
1677 terminates[t] = True
1678
1679 terminates['$end'] = True
1680
1681 # Nonterminals:
1682
1683 # Initialize to false:
1684 for n in self.Nonterminals:
1685 terminates[n] = False
1686
1687 # Then propagate termination until no change:
1688 while True:
1689 some_change = False
1690 for (n, pl) in self.Prodnames.items():
1691 # Nonterminal n terminates iff any of its productions terminates.
1692 for p in pl:
1693 # Production p terminates iff all of its rhs symbols terminate.
1694 for s in p.prod:
1695 if not terminates[s]:
1696 # The symbol s does not terminate,
1697 # so production p does not terminate.
1698 p_terminates = False
1699 break
1700 else:
1701 # didn't break from the loop,
1702 # so every symbol s terminates
1703 # so production p terminates.
1704 p_terminates = True
1705
1706 if p_terminates:
1707 # symbol n terminates!
1708 if not terminates[n]:
1709 terminates[n] = True
1710 some_change = True
1711 # Don't need to consider any more productions for this n.
1712 break
1713
1714 if not some_change:
1715 break
1716
1717 infinite = []
1718 for (s, term) in terminates.items():
1719 if not term:
1720 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1721 # s is used-but-not-defined, and we've already warned of that,
1722 # so it would be overkill to say that it's also non-terminating.
1723 pass
1724 else:
1725 infinite.append(s)
1726
1727 return infinite
1728
1729 # -----------------------------------------------------------------------------
1730 # undefined_symbols()
1731 #
1732 # Find all symbols that were used the grammar, but not defined as tokens or
1733 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1734 # and prod is the production where the symbol was used.
1735 # -----------------------------------------------------------------------------
1736 def undefined_symbols(self):
1737 result = []
1738 for p in self.Productions:
1739 if not p:
1740 continue
1741
1742 for s in p.prod:
1743 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1744 result.append((s, p))
1745 return result
1746
1747 # -----------------------------------------------------------------------------
1748 # unused_terminals()
1749 #
1750 # Find all terminals that were defined, but not used by the grammar. Returns
1751 # a list of all symbols.
1752 # -----------------------------------------------------------------------------
1753 def unused_terminals(self):
1754 unused_tok = []
1755 for s, v in self.Terminals.items():
1756 if s != 'error' and not v:
1757 unused_tok.append(s)
1758
1759 return unused_tok
1760
1761 # ------------------------------------------------------------------------------
1762 # unused_rules()
1763 #
1764 # Find all grammar rules that were defined, but not used (maybe not reachable)
1765 # Returns a list of productions.
1766 # ------------------------------------------------------------------------------
1767
1768 def unused_rules(self):
1769 unused_prod = []
1770 for s, v in self.Nonterminals.items():
1771 if not v:
1772 p = self.Prodnames[s][0]
1773 unused_prod.append(p)
1774 return unused_prod
1775
1776 # -----------------------------------------------------------------------------
1777 # unused_precedence()
1778 #
1779 # Returns a list of tuples (term,precedence) corresponding to precedence
1780 # rules that were never used by the grammar. term is the name of the terminal
1781 # on which precedence was applied and precedence is a string such as 'left' or
1782 # 'right' corresponding to the type of precedence.
1783 # -----------------------------------------------------------------------------
1784
1785 def unused_precedence(self):
1786 unused = []
1787 for termname in self.Precedence:
1788 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1789 unused.append((termname, self.Precedence[termname][0]))
1790
1791 return unused
1792
1793 # -------------------------------------------------------------------------
1794 # _first()
1795 #
1796 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1797 #
1798 # During execution of compute_first1, the result may be incomplete.
1799 # Afterward (e.g., when called from compute_follow()), it will be complete.
1800 # -------------------------------------------------------------------------
1801 def _first(self, beta):
1802
1803 # We are computing First(x1,x2,x3,...,xn)
1804 result = []
1805 for x in beta:
1806 x_produces_empty = False
1807
1808 # Add all the non-<empty> symbols of First[x] to the result.
1809 for f in self.First[x]:
1810 if f == '<empty>':
1811 x_produces_empty = True
1812 else:
1813 if f not in result:
1814 result.append(f)
1815
1816 if x_produces_empty:
1817 # We have to consider the next x in beta,
1818 # i.e. stay in the loop.
1819 pass
1820 else:
1821 # We don't have to consider any further symbols in beta.
1822 break
1823 else:
1824 # There was no 'break' from the loop,
1825 # so x_produces_empty was true for all x in beta,
1826 # so beta produces empty as well.
1827 result.append('<empty>')
1828
1829 return result
1830
1831 # -------------------------------------------------------------------------
1832 # compute_first()
1833 #
1834 # Compute the value of FIRST1(X) for all symbols
1835 # -------------------------------------------------------------------------
1836 def compute_first(self):
1837 if self.First:
1838 return self.First
1839
1840 # Terminals:
1841 for t in self.Terminals:
1842 self.First[t] = [t]
1843
1844 self.First['$end'] = ['$end']
1845
1846 # Nonterminals:
1847
1848 # Initialize to the empty set:
1849 for n in self.Nonterminals:
1850 self.First[n] = []
1851
1852 # Then propagate symbols until no change:
1853 while True:
1854 some_change = False
1855 for n in self.Nonterminals:
1856 for p in self.Prodnames[n]:
1857 for f in self._first(p.prod):
1858 if f not in self.First[n]:
1859 self.First[n].append(f)
1860 some_change = True
1861 if not some_change:
1862 break
1863
1864 return self.First
1865
1866 # ---------------------------------------------------------------------
1867 # compute_follow()
1868 #
1869 # Computes all of the follow sets for every non-terminal symbol. The
1870 # follow set is the set of all symbols that might follow a given
1871 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1872 # ---------------------------------------------------------------------
1873 def compute_follow(self, start=None):
1874 # If already computed, return the result
1875 if self.Follow:
1876 return self.Follow
1877
1878 # If first sets not computed yet, do that first.
1879 if not self.First:
1880 self.compute_first()
1881
1882 # Add '$end' to the follow list of the start symbol
1883 for k in self.Nonterminals:
1884 self.Follow[k] = []
1885
1886 if not start:
1887 start = self.Productions[1].name
1888
1889 self.Follow[start] = ['$end']
1890
1891 while True:
1892 didadd = False
1893 for p in self.Productions[1:]:
1894 # Here is the production set
1895 for i, B in enumerate(p.prod):
1896 if B in self.Nonterminals:
1897 # Okay. We got a non-terminal in a production
1898 fst = self._first(p.prod[i+1:])
1899 hasempty = False
1900 for f in fst:
1901 if f != '<empty>' and f not in self.Follow[B]:
1902 self.Follow[B].append(f)
1903 didadd = True
1904 if f == '<empty>':
1905 hasempty = True
1906 if hasempty or i == (len(p.prod)-1):
1907 # Add elements of follow(a) to follow(b)
1908 for f in self.Follow[p.name]:
1909 if f not in self.Follow[B]:
1910 self.Follow[B].append(f)
1911 didadd = True
1912 if not didadd:
1913 break
1914 return self.Follow
1915
1916
1917 # -----------------------------------------------------------------------------
1918 # build_lritems()
1919 #
1920 # This function walks the list of productions and builds a complete set of the
1921 # LR items. The LR items are stored in two ways: First, they are uniquely
1922 # numbered and placed in the list _lritems. Second, a linked list of LR items
1923 # is built for each production. For example:
1924 #
1925 # E -> E PLUS E
1926 #
1927 # Creates the list
1928 #
1929 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1930 # -----------------------------------------------------------------------------
1931
1932 def build_lritems(self):
1933 for p in self.Productions:
1934 lastlri = p
1935 i = 0
1936 lr_items = []
1937 while True:
1938 if i > len(p):
1939 lri = None
1940 else:
1941 lri = LRItem(p, i)
1942 # Precompute the list of productions immediately following
1943 try:
1944 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1945 except (IndexError, KeyError):
1946 lri.lr_after = []
1947 try:
1948 lri.lr_before = lri.prod[i-1]
1949 except IndexError:
1950 lri.lr_before = None
1951
1952 lastlri.lr_next = lri
1953 if not lri:
1954 break
1955 lr_items.append(lri)
1956 lastlri = lri
1957 i += 1
1958 p.lr_items = lr_items
1959
1960# -----------------------------------------------------------------------------
1961# == Class LRTable ==
1962#
1963# This basic class represents a basic table of LR parsing information.
1964# Methods for generating the tables are not defined here. They are defined
1965# in the derived class LRGeneratedTable.
1966# -----------------------------------------------------------------------------
1967
1968class VersionError(YaccError):
1969 pass
1970
1971class LRTable(object):
1972 def __init__(self):
1973 self.lr_action = None
1974 self.lr_goto = None
1975 self.lr_productions = None
1976 self.lr_method = None
1977
1978 def read_table(self, module):
1979 if isinstance(module, types.ModuleType):
1980 parsetab = module
1981 else:
1982 exec('import %s' % module)
1983 parsetab = sys.modules[module]
1984
1985 if parsetab._tabversion != __tabversion__:
1986 raise VersionError('yacc table file version is out of date')
1987
1988 self.lr_action = parsetab._lr_action
1989 self.lr_goto = parsetab._lr_goto
1990
1991 self.lr_productions = []
1992 for p in parsetab._lr_productions:
1993 self.lr_productions.append(MiniProduction(*p))
1994
1995 self.lr_method = parsetab._lr_method
1996 return parsetab._lr_signature
1997
1998 def read_pickle(self, filename):
1999 try:
2000 import cPickle as pickle
2001 except ImportError:
2002 import pickle
2003
2004 if not os.path.exists(filename):
2005 raise ImportError
2006
2007 in_f = open(filename, 'rb')
2008
2009 tabversion = pickle.load(in_f)
2010 if tabversion != __tabversion__:
2011 raise VersionError('yacc table file version is out of date')
2012 self.lr_method = pickle.load(in_f)
2013 signature = pickle.load(in_f)
2014 self.lr_action = pickle.load(in_f)
2015 self.lr_goto = pickle.load(in_f)
2016 productions = pickle.load(in_f)
2017
2018 self.lr_productions = []
2019 for p in productions:
2020 self.lr_productions.append(MiniProduction(*p))
2021
2022 in_f.close()
2023 return signature
2024
2025 # Bind all production function names to callable objects in pdict
2026 def bind_callables(self, pdict):
2027 for p in self.lr_productions:
2028 p.bind(pdict)
2029
2030
2031# -----------------------------------------------------------------------------
2032# === LR Generator ===
2033#
2034# The following classes and functions are used to generate LR parsing tables on
2035# a grammar.
2036# -----------------------------------------------------------------------------
2037
2038# -----------------------------------------------------------------------------
2039# digraph()
2040# traverse()
2041#
2042# The following two functions are used to compute set valued functions
2043# of the form:
2044#
2045# F(x) = F'(x) U U{F(y) | x R y}
2046#
2047# This is used to compute the values of Read() sets as well as FOLLOW sets
2048# in LALR(1) generation.
2049#
2050# Inputs: X - An input set
2051# R - A relation
2052# FP - Set-valued function
2053# ------------------------------------------------------------------------------
2054
2055def digraph(X, R, FP):
2056 N = {}
2057 for x in X:
2058 N[x] = 0
2059 stack = []
2060 F = {}
2061 for x in X:
2062 if N[x] == 0:
2063 traverse(x, N, stack, F, X, R, FP)
2064 return F
2065
2066def traverse(x, N, stack, F, X, R, FP):
2067 stack.append(x)
2068 d = len(stack)
2069 N[x] = d
2070 F[x] = FP(x) # F(X) <- F'(x)
2071
2072 rel = R(x) # Get y's related to x
2073 for y in rel:
2074 if N[y] == 0:
2075 traverse(y, N, stack, F, X, R, FP)
2076 N[x] = min(N[x], N[y])
2077 for a in F.get(y, []):
2078 if a not in F[x]:
2079 F[x].append(a)
2080 if N[x] == d:
2081 N[stack[-1]] = MAXINT
2082 F[stack[-1]] = F[x]
2083 element = stack.pop()
2084 while element != x:
2085 N[stack[-1]] = MAXINT
2086 F[stack[-1]] = F[x]
2087 element = stack.pop()
2088
2089class LALRError(YaccError):
2090 pass
2091
2092# -----------------------------------------------------------------------------
2093# == LRGeneratedTable ==
2094#
2095# This class implements the LR table generation algorithm. There are no
2096# public methods except for write()
2097# -----------------------------------------------------------------------------
2098
2099class LRGeneratedTable(LRTable):
2100 def __init__(self, grammar, method='LALR', log=None):
2101 if method not in ['SLR', 'LALR']:
2102 raise LALRError('Unsupported method %s' % method)
2103
2104 self.grammar = grammar
2105 self.lr_method = method
2106
2107 # Set up the logger
2108 if not log:
2109 log = NullLogger()
2110 self.log = log
2111
2112 # Internal attributes
2113 self.lr_action = {} # Action table
2114 self.lr_goto = {} # Goto table
2115 self.lr_productions = grammar.Productions # Copy of grammar Production array
2116 self.lr_goto_cache = {} # Cache of computed gotos
2117 self.lr0_cidhash = {} # Cache of closures
2118
2119 self._add_count = 0 # Internal counter used to detect cycles
2120
2121 # Diagonistic information filled in by the table generator
2122 self.sr_conflict = 0
2123 self.rr_conflict = 0
2124 self.conflicts = [] # List of conflicts
2125
2126 self.sr_conflicts = []
2127 self.rr_conflicts = []
2128
2129 # Build the tables
2130 self.grammar.build_lritems()
2131 self.grammar.compute_first()
2132 self.grammar.compute_follow()
2133 self.lr_parse_table()
2134
2135 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2136
2137 def lr0_closure(self, I):
2138 self._add_count += 1
2139
2140 # Add everything in I to J
2141 J = I[:]
2142 didadd = True
2143 while didadd:
2144 didadd = False
2145 for j in J:
2146 for x in j.lr_after:
2147 if getattr(x, 'lr0_added', 0) == self._add_count:
2148 continue
2149 # Add B --> .G to J
2150 J.append(x.lr_next)
2151 x.lr0_added = self._add_count
2152 didadd = True
2153
2154 return J
2155
2156 # Compute the LR(0) goto function goto(I,X) where I is a set
2157 # of LR(0) items and X is a grammar symbol. This function is written
2158 # in a way that guarantees uniqueness of the generated goto sets
2159 # (i.e. the same goto set will never be returned as two different Python
2160 # objects). With uniqueness, we can later do fast set comparisons using
2161 # id(obj) instead of element-wise comparison.
2162
2163 def lr0_goto(self, I, x):
2164 # First we look for a previously cached entry
2165 g = self.lr_goto_cache.get((id(I), x))
2166 if g:
2167 return g
2168
2169 # Now we generate the goto set in a way that guarantees uniqueness
2170 # of the result
2171
2172 s = self.lr_goto_cache.get(x)
2173 if not s:
2174 s = {}
2175 self.lr_goto_cache[x] = s
2176
2177 gs = []
2178 for p in I:
2179 n = p.lr_next
2180 if n and n.lr_before == x:
2181 s1 = s.get(id(n))
2182 if not s1:
2183 s1 = {}
2184 s[id(n)] = s1
2185 gs.append(n)
2186 s = s1
2187 g = s.get('$end')
2188 if not g:
2189 if gs:
2190 g = self.lr0_closure(gs)
2191 s['$end'] = g
2192 else:
2193 s['$end'] = gs
2194 self.lr_goto_cache[(id(I), x)] = g
2195 return g
2196
2197 # Compute the LR(0) sets of item function
2198 def lr0_items(self):
2199 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2200 i = 0
2201 for I in C:
2202 self.lr0_cidhash[id(I)] = i
2203 i += 1
2204
2205 # Loop over the items in C and each grammar symbols
2206 i = 0
2207 while i < len(C):
2208 I = C[i]
2209 i += 1
2210
2211 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2212 asyms = {}
2213 for ii in I:
2214 for s in ii.usyms:
2215 asyms[s] = None
2216
2217 for x in asyms:
2218 g = self.lr0_goto(I, x)
2219 if not g or id(g) in self.lr0_cidhash:
2220 continue
2221 self.lr0_cidhash[id(g)] = len(C)
2222 C.append(g)
2223
2224 return C
2225
2226 # -----------------------------------------------------------------------------
2227 # ==== LALR(1) Parsing ====
2228 #
2229 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2230 # relying upon Follow() sets when performing reductions, a more selective
2231 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2232 # Thus, we mainly just have to focus on calculating the lookahead sets.
2233 #
2234 # The method used here is due to DeRemer and Pennelo (1982).
2235 #
2236 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2237 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2238 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2239 #
2240 # Further details can also be found in:
2241 #
2242 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2243 # McGraw-Hill Book Company, (1985).
2244 #
2245 # -----------------------------------------------------------------------------
2246
2247 # -----------------------------------------------------------------------------
2248 # compute_nullable_nonterminals()
2249 #
2250 # Creates a dictionary containing all of the non-terminals that might produce
2251 # an empty production.
2252 # -----------------------------------------------------------------------------
2253
2254 def compute_nullable_nonterminals(self):
2255 nullable = set()
2256 num_nullable = 0
2257 while True:
2258 for p in self.grammar.Productions[1:]:
2259 if p.len == 0:
2260 nullable.add(p.name)
2261 continue
2262 for t in p.prod:
2263 if t not in nullable:
2264 break
2265 else:
2266 nullable.add(p.name)
2267 if len(nullable) == num_nullable:
2268 break
2269 num_nullable = len(nullable)
2270 return nullable
2271
2272 # -----------------------------------------------------------------------------
2273 # find_nonterminal_trans(C)
2274 #
2275 # Given a set of LR(0) items, this functions finds all of the non-terminal
2276 # transitions. These are transitions in which a dot appears immediately before
2277 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2278 # is the state number and N is the nonterminal symbol.
2279 #
2280 # The input C is the set of LR(0) items.
2281 # -----------------------------------------------------------------------------
2282
2283 def find_nonterminal_transitions(self, C):
2284 trans = []
2285 for stateno, state in enumerate(C):
2286 for p in state:
2287 if p.lr_index < p.len - 1:
2288 t = (stateno, p.prod[p.lr_index+1])
2289 if t[1] in self.grammar.Nonterminals:
2290 if t not in trans:
2291 trans.append(t)
2292 return trans
2293
2294 # -----------------------------------------------------------------------------
2295 # dr_relation()
2296 #
2297 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2298 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2299 #
2300 # Returns a list of terminals.
2301 # -----------------------------------------------------------------------------
2302
2303 def dr_relation(self, C, trans, nullable):
2304 dr_set = {}
2305 state, N = trans
2306 terms = []
2307
2308 g = self.lr0_goto(C[state], N)
2309 for p in g:
2310 if p.lr_index < p.len - 1:
2311 a = p.prod[p.lr_index+1]
2312 if a in self.grammar.Terminals:
2313 if a not in terms:
2314 terms.append(a)
2315
2316 # This extra bit is to handle the start state
2317 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2318 terms.append('$end')
2319
2320 return terms
2321
2322 # -----------------------------------------------------------------------------
2323 # reads_relation()
2324 #
2325 # Computes the READS() relation (p,A) READS (t,C).
2326 # -----------------------------------------------------------------------------
2327
2328 def reads_relation(self, C, trans, empty):
2329 # Look for empty transitions
2330 rel = []
2331 state, N = trans
2332
2333 g = self.lr0_goto(C[state], N)
2334 j = self.lr0_cidhash.get(id(g), -1)
2335 for p in g:
2336 if p.lr_index < p.len - 1:
2337 a = p.prod[p.lr_index + 1]
2338 if a in empty:
2339 rel.append((j, a))
2340
2341 return rel
2342
2343 # -----------------------------------------------------------------------------
2344 # compute_lookback_includes()
2345 #
2346 # Determines the lookback and includes relations
2347 #
2348 # LOOKBACK:
2349 #
2350 # This relation is determined by running the LR(0) state machine forward.
2351 # For example, starting with a production "N : . A B C", we run it forward
2352 # to obtain "N : A B C ." We then build a relationship between this final
2353 # state and the starting state. These relationships are stored in a dictionary
2354 # lookdict.
2355 #
2356 # INCLUDES:
2357 #
2358 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2359 #
2360 # This relation is used to determine non-terminal transitions that occur
2361 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2362 # if the following holds:
2363 #
2364 # B -> LAT, where T -> epsilon and p' -L-> p
2365 #
2366 # L is essentially a prefix (which may be empty), T is a suffix that must be
2367 # able to derive an empty string. State p' must lead to state p with the string L.
2368 #
2369 # -----------------------------------------------------------------------------
2370
2371 def compute_lookback_includes(self, C, trans, nullable):
2372 lookdict = {} # Dictionary of lookback relations
2373 includedict = {} # Dictionary of include relations
2374
2375 # Make a dictionary of non-terminal transitions
2376 dtrans = {}
2377 for t in trans:
2378 dtrans[t] = 1
2379
2380 # Loop over all transitions and compute lookbacks and includes
2381 for state, N in trans:
2382 lookb = []
2383 includes = []
2384 for p in C[state]:
2385 if p.name != N:
2386 continue
2387
2388 # Okay, we have a name match. We now follow the production all the way
2389 # through the state machine until we get the . on the right hand side
2390
2391 lr_index = p.lr_index
2392 j = state
2393 while lr_index < p.len - 1:
2394 lr_index = lr_index + 1
2395 t = p.prod[lr_index]
2396
2397 # Check to see if this symbol and state are a non-terminal transition
2398 if (j, t) in dtrans:
2399 # Yes. Okay, there is some chance that this is an includes relation
2400 # the only way to know for certain is whether the rest of the
2401 # production derives empty
2402
2403 li = lr_index + 1
2404 while li < p.len:
2405 if p.prod[li] in self.grammar.Terminals:
2406 break # No forget it
2407 if p.prod[li] not in nullable:
2408 break
2409 li = li + 1
2410 else:
2411 # Appears to be a relation between (j,t) and (state,N)
2412 includes.append((j, t))
2413
2414 g = self.lr0_goto(C[j], t) # Go to next set
2415 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2416
2417 # When we get here, j is the final state, now we have to locate the production
2418 for r in C[j]:
2419 if r.name != p.name:
2420 continue
2421 if r.len != p.len:
2422 continue
2423 i = 0
2424 # This look is comparing a production ". A B C" with "A B C ."
2425 while i < r.lr_index:
2426 if r.prod[i] != p.prod[i+1]:
2427 break
2428 i = i + 1
2429 else:
2430 lookb.append((j, r))
2431 for i in includes:
2432 if i not in includedict:
2433 includedict[i] = []
2434 includedict[i].append((state, N))
2435 lookdict[(state, N)] = lookb
2436
2437 return lookdict, includedict
2438
2439 # -----------------------------------------------------------------------------
2440 # compute_read_sets()
2441 #
2442 # Given a set of LR(0) items, this function computes the read sets.
2443 #
2444 # Inputs: C = Set of LR(0) items
2445 # ntrans = Set of nonterminal transitions
2446 # nullable = Set of empty transitions
2447 #
2448 # Returns a set containing the read sets
2449 # -----------------------------------------------------------------------------
2450
2451 def compute_read_sets(self, C, ntrans, nullable):
2452 FP = lambda x: self.dr_relation(C, x, nullable)
2453 R = lambda x: self.reads_relation(C, x, nullable)
2454 F = digraph(ntrans, R, FP)
2455 return F
2456
2457 # -----------------------------------------------------------------------------
2458 # compute_follow_sets()
2459 #
2460 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2461 # and an include set, this function computes the follow sets
2462 #
2463 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2464 #
2465 # Inputs:
2466 # ntrans = Set of nonterminal transitions
2467 # readsets = Readset (previously computed)
2468 # inclsets = Include sets (previously computed)
2469 #
2470 # Returns a set containing the follow sets
2471 # -----------------------------------------------------------------------------
2472
2473 def compute_follow_sets(self, ntrans, readsets, inclsets):
2474 FP = lambda x: readsets[x]
2475 R = lambda x: inclsets.get(x, [])
2476 F = digraph(ntrans, R, FP)
2477 return F
2478
2479 # -----------------------------------------------------------------------------
2480 # add_lookaheads()
2481 #
2482 # Attaches the lookahead symbols to grammar rules.
2483 #
2484 # Inputs: lookbacks - Set of lookback relations
2485 # followset - Computed follow set
2486 #
2487 # This function directly attaches the lookaheads to productions contained
2488 # in the lookbacks set
2489 # -----------------------------------------------------------------------------
2490
2491 def add_lookaheads(self, lookbacks, followset):
2492 for trans, lb in lookbacks.items():
2493 # Loop over productions in lookback
2494 for state, p in lb:
2495 if state not in p.lookaheads:
2496 p.lookaheads[state] = []
2497 f = followset.get(trans, [])
2498 for a in f:
2499 if a not in p.lookaheads[state]:
2500 p.lookaheads[state].append(a)
2501
2502 # -----------------------------------------------------------------------------
2503 # add_lalr_lookaheads()
2504 #
2505 # This function does all of the work of adding lookahead information for use
2506 # with LALR parsing
2507 # -----------------------------------------------------------------------------
2508
2509 def add_lalr_lookaheads(self, C):
2510 # Determine all of the nullable nonterminals
2511 nullable = self.compute_nullable_nonterminals()
2512
2513 # Find all non-terminal transitions
2514 trans = self.find_nonterminal_transitions(C)
2515
2516 # Compute read sets
2517 readsets = self.compute_read_sets(C, trans, nullable)
2518
2519 # Compute lookback/includes relations
2520 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2521
2522 # Compute LALR FOLLOW sets
2523 followsets = self.compute_follow_sets(trans, readsets, included)
2524
2525 # Add all of the lookaheads
2526 self.add_lookaheads(lookd, followsets)
2527
2528 # -----------------------------------------------------------------------------
2529 # lr_parse_table()
2530 #
2531 # This function constructs the parse tables for SLR or LALR
2532 # -----------------------------------------------------------------------------
2533 def lr_parse_table(self):
2534 Productions = self.grammar.Productions
2535 Precedence = self.grammar.Precedence
2536 goto = self.lr_goto # Goto array
2537 action = self.lr_action # Action array
2538 log = self.log # Logger for output
2539
2540 actionp = {} # Action production array (temporary)
2541
2542 log.info('Parsing method: %s', self.lr_method)
2543
2544 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2545 # This determines the number of states
2546
2547 C = self.lr0_items()
2548
2549 if self.lr_method == 'LALR':
2550 self.add_lalr_lookaheads(C)
2551
2552 # Build the parser table, state by state
2553 st = 0
2554 for I in C:
2555 # Loop over each production in I
2556 actlist = [] # List of actions
2557 st_action = {}
2558 st_actionp = {}
2559 st_goto = {}
2560 log.info('')
2561 log.info('state %d', st)
2562 log.info('')
2563 for p in I:
2564 log.info(' (%d) %s', p.number, p)
2565 log.info('')
2566
2567 for p in I:
2568 if p.len == p.lr_index + 1:
2569 if p.name == "S'":
2570 # Start symbol. Accept!
2571 st_action['$end'] = 0
2572 st_actionp['$end'] = p
2573 else:
2574 # We are at the end of a production. Reduce!
2575 if self.lr_method == 'LALR':
2576 laheads = p.lookaheads[st]
2577 else:
2578 laheads = self.grammar.Follow[p.name]
2579 for a in laheads:
2580 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2581 r = st_action.get(a)
2582 if r is not None:
2583 # Whoa. Have a shift/reduce or reduce/reduce conflict
2584 if r > 0:
2585 # Need to decide on shift or reduce here
2586 # By default we favor shifting. Need to add
2587 # some precedence rules here.
2588
2589 # Shift precedence comes from the token
2590 sprec, slevel = Precedence.get(a, ('right', 0))
2591
2592 # Reduce precedence comes from rule being reduced (p)
2593 rprec, rlevel = Productions[p.number].prec
2594
2595 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2596 # We really need to reduce here.
2597 st_action[a] = -p.number
2598 st_actionp[a] = p
2599 if not slevel and not rlevel:
2600 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2601 self.sr_conflicts.append((st, a, 'reduce'))
2602 Productions[p.number].reduced += 1
2603 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2604 st_action[a] = None
2605 else:
2606 # Hmmm. Guess we'll keep the shift
2607 if not rlevel:
2608 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2609 self.sr_conflicts.append((st, a, 'shift'))
2610 elif r < 0:
2611 # Reduce/reduce conflict. In this case, we favor the rule
2612 # that was defined first in the grammar file
2613 oldp = Productions[-r]
2614 pp = Productions[p.number]
2615 if oldp.line > pp.line:
2616 st_action[a] = -p.number
2617 st_actionp[a] = p
2618 chosenp, rejectp = pp, oldp
2619 Productions[p.number].reduced += 1
2620 Productions[oldp.number].reduced -= 1
2621 else:
2622 chosenp, rejectp = oldp, pp
2623 self.rr_conflicts.append((st, chosenp, rejectp))
2624 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2625 a, st_actionp[a].number, st_actionp[a])
2626 else:
2627 raise LALRError('Unknown conflict in state %d' % st)
2628 else:
2629 st_action[a] = -p.number
2630 st_actionp[a] = p
2631 Productions[p.number].reduced += 1
2632 else:
2633 i = p.lr_index
2634 a = p.prod[i+1] # Get symbol right after the "."
2635 if a in self.grammar.Terminals:
2636 g = self.lr0_goto(I, a)
2637 j = self.lr0_cidhash.get(id(g), -1)
2638 if j >= 0:
2639 # We are in a shift state
2640 actlist.append((a, p, 'shift and go to state %d' % j))
2641 r = st_action.get(a)
2642 if r is not None:
2643 # Whoa have a shift/reduce or shift/shift conflict
2644 if r > 0:
2645 if r != j:
2646 raise LALRError('Shift/shift conflict in state %d' % st)
2647 elif r < 0:
2648 # Do a precedence check.
2649 # - if precedence of reduce rule is higher, we reduce.
2650 # - if precedence of reduce is same and left assoc, we reduce.
2651 # - otherwise we shift
2652
2653 # Shift precedence comes from the token
2654 sprec, slevel = Precedence.get(a, ('right', 0))
2655
2656 # Reduce precedence comes from the rule that could have been reduced
2657 rprec, rlevel = Productions[st_actionp[a].number].prec
2658
2659 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2660 # We decide to shift here... highest precedence to shift
2661 Productions[st_actionp[a].number].reduced -= 1
2662 st_action[a] = j
2663 st_actionp[a] = p
2664 if not rlevel:
2665 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2666 self.sr_conflicts.append((st, a, 'shift'))
2667 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2668 st_action[a] = None
2669 else:
2670 # Hmmm. Guess we'll keep the reduce
2671 if not slevel and not rlevel:
2672 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2673 self.sr_conflicts.append((st, a, 'reduce'))
2674
2675 else:
2676 raise LALRError('Unknown conflict in state %d' % st)
2677 else:
2678 st_action[a] = j
2679 st_actionp[a] = p
2680
2681 # Print the actions associated with each terminal
2682 _actprint = {}
2683 for a, p, m in actlist:
2684 if a in st_action:
2685 if p is st_actionp[a]:
2686 log.info(' %-15s %s', a, m)
2687 _actprint[(a, m)] = 1
2688 log.info('')
2689 # Print the actions that were not used. (debugging)
2690 not_used = 0
2691 for a, p, m in actlist:
2692 if a in st_action:
2693 if p is not st_actionp[a]:
2694 if not (a, m) in _actprint:
2695 log.debug(' ! %-15s [ %s ]', a, m)
2696 not_used = 1
2697 _actprint[(a, m)] = 1
2698 if not_used:
2699 log.debug('')
2700
2701 # Construct the goto table for this state
2702
2703 nkeys = {}
2704 for ii in I:
2705 for s in ii.usyms:
2706 if s in self.grammar.Nonterminals:
2707 nkeys[s] = None
2708 for n in nkeys:
2709 g = self.lr0_goto(I, n)
2710 j = self.lr0_cidhash.get(id(g), -1)
2711 if j >= 0:
2712 st_goto[n] = j
2713 log.info(' %-30s shift and go to state %d', n, j)
2714
2715 action[st] = st_action
2716 actionp[st] = st_actionp
2717 goto[st] = st_goto
2718 st += 1
2719
2720 # -----------------------------------------------------------------------------
2721 # write()
2722 #
2723 # This function writes the LR parsing tables to a file
2724 # -----------------------------------------------------------------------------
2725
2726 def write_table(self, tabmodule, outputdir='', signature=''):
2727 if isinstance(tabmodule, types.ModuleType):
2728 raise IOError("Won't overwrite existing tabmodule")
2729
2730 basemodulename = tabmodule.split('.')[-1]
2731 filename = os.path.join(outputdir, basemodulename) + '.py'
2732 try:
2733 f = open(filename, 'w')
2734
2735 f.write('''
2736# %s
2737# This file is automatically generated. Do not edit.
2738_tabversion = %r
2739
2740_lr_method = %r
2741
2742_lr_signature = %r
2743 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2744
2745 # Change smaller to 0 to go back to original tables
2746 smaller = 1
2747
2748 # Factor out names to try and make smaller
2749 if smaller:
2750 items = {}
2751
2752 for s, nd in self.lr_action.items():
2753 for name, v in nd.items():
2754 i = items.get(name)
2755 if not i:
2756 i = ([], [])
2757 items[name] = i
2758 i[0].append(s)
2759 i[1].append(v)
2760
2761 f.write('\n_lr_action_items = {')
2762 for k, v in items.items():
2763 f.write('%r:([' % k)
2764 for i in v[0]:
2765 f.write('%r,' % i)
2766 f.write('],[')
2767 for i in v[1]:
2768 f.write('%r,' % i)
2769
2770 f.write(']),')
2771 f.write('}\n')
2772
2773 f.write('''
2774_lr_action = {}
2775for _k, _v in _lr_action_items.items():
2776 for _x,_y in zip(_v[0],_v[1]):
2777 if not _x in _lr_action: _lr_action[_x] = {}
2778 _lr_action[_x][_k] = _y
2779del _lr_action_items
2780''')
2781
2782 else:
2783 f.write('\n_lr_action = { ')
2784 for k, v in self.lr_action.items():
2785 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2786 f.write('}\n')
2787
2788 if smaller:
2789 # Factor out names to try and make smaller
2790 items = {}
2791
2792 for s, nd in self.lr_goto.items():
2793 for name, v in nd.items():
2794 i = items.get(name)
2795 if not i:
2796 i = ([], [])
2797 items[name] = i
2798 i[0].append(s)
2799 i[1].append(v)
2800
2801 f.write('\n_lr_goto_items = {')
2802 for k, v in items.items():
2803 f.write('%r:([' % k)
2804 for i in v[0]:
2805 f.write('%r,' % i)
2806 f.write('],[')
2807 for i in v[1]:
2808 f.write('%r,' % i)
2809
2810 f.write(']),')
2811 f.write('}\n')
2812
2813 f.write('''
2814_lr_goto = {}
2815for _k, _v in _lr_goto_items.items():
2816 for _x, _y in zip(_v[0], _v[1]):
2817 if not _x in _lr_goto: _lr_goto[_x] = {}
2818 _lr_goto[_x][_k] = _y
2819del _lr_goto_items
2820''')
2821 else:
2822 f.write('\n_lr_goto = { ')
2823 for k, v in self.lr_goto.items():
2824 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2825 f.write('}\n')
2826
2827 # Write production table
2828 f.write('_lr_productions = [\n')
2829 for p in self.lr_productions:
2830 if p.func:
2831 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2832 p.func, os.path.basename(p.file), p.line))
2833 else:
2834 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2835 f.write(']\n')
2836 f.close()
2837
2838 except IOError as e:
2839 raise
2840
2841
2842 # -----------------------------------------------------------------------------
2843 # pickle_table()
2844 #
2845 # This function pickles the LR parsing tables to a supplied file object
2846 # -----------------------------------------------------------------------------
2847
2848 def pickle_table(self, filename, signature=''):
2849 try:
2850 import cPickle as pickle
2851 except ImportError:
2852 import pickle
2853 with open(filename, 'wb') as outf:
2854 pickle.dump(__tabversion__, outf, pickle_protocol)
2855 pickle.dump(self.lr_method, outf, pickle_protocol)
2856 pickle.dump(signature, outf, pickle_protocol)
2857 pickle.dump(self.lr_action, outf, pickle_protocol)
2858 pickle.dump(self.lr_goto, outf, pickle_protocol)
2859
2860 outp = []
2861 for p in self.lr_productions:
2862 if p.func:
2863 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2864 else:
2865 outp.append((str(p), p.name, p.len, None, None, None))
2866 pickle.dump(outp, outf, pickle_protocol)
2867
2868# -----------------------------------------------------------------------------
2869# === INTROSPECTION ===
2870#
2871# The following functions and classes are used to implement the PLY
2872# introspection features followed by the yacc() function itself.
2873# -----------------------------------------------------------------------------
2874
2875# -----------------------------------------------------------------------------
2876# get_caller_module_dict()
2877#
2878# This function returns a dictionary containing all of the symbols defined within
2879# a caller further down the call stack. This is used to get the environment
2880# associated with the yacc() call if none was provided.
2881# -----------------------------------------------------------------------------
2882
2883def get_caller_module_dict(levels):
2884 f = sys._getframe(levels)
2885 ldict = f.f_globals.copy()
2886 if f.f_globals != f.f_locals:
2887 ldict.update(f.f_locals)
2888 return ldict
2889
2890# -----------------------------------------------------------------------------
2891# parse_grammar()
2892#
2893# This takes a raw grammar rule string and parses it into production data
2894# -----------------------------------------------------------------------------
2895def parse_grammar(doc, file, line):
2896 grammar = []
2897 # Split the doc string into lines
2898 pstrings = doc.splitlines()
2899 lastp = None
2900 dline = line
2901 for ps in pstrings:
2902 dline += 1
2903 p = ps.split()
2904 if not p:
2905 continue
2906 try:
2907 if p[0] == '|':
2908 # This is a continuation of a previous rule
2909 if not lastp:
2910 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2911 prodname = lastp
2912 syms = p[1:]
2913 else:
2914 prodname = p[0]
2915 lastp = prodname
2916 syms = p[2:]
2917 assign = p[1]
2918 if assign != ':' and assign != '::=':
2919 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2920
2921 grammar.append((file, dline, prodname, syms))
2922 except SyntaxError:
2923 raise
2924 except Exception:
2925 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2926
2927 return grammar
2928
2929# -----------------------------------------------------------------------------
2930# ParserReflect()
2931#
2932# This class represents information extracted for building a parser including
2933# start symbol, error function, tokens, precedence list, action functions,
2934# etc.
2935# -----------------------------------------------------------------------------
2936class ParserReflect(object):
2937 def __init__(self, pdict, log=None):
2938 self.pdict = pdict
2939 self.start = None
2940 self.error_func = None
2941 self.tokens = None
2942 self.modules = set()
2943 self.grammar = []
2944 self.error = False
2945
2946 if log is None:
2947 self.log = PlyLogger(sys.stderr)
2948 else:
2949 self.log = log
2950
2951 # Get all of the basic information
2952 def get_all(self):
2953 self.get_start()
2954 self.get_error_func()
2955 self.get_tokens()
2956 self.get_precedence()
2957 self.get_pfunctions()
2958
2959 # Validate all of the information
2960 def validate_all(self):
2961 self.validate_start()
2962 self.validate_error_func()
2963 self.validate_tokens()
2964 self.validate_precedence()
2965 self.validate_pfunctions()
2966 self.validate_modules()
2967 return self.error
2968
2969 # Compute a signature over the grammar
2970 def signature(self):
2971 parts = []
2972 try:
2973 if self.start:
2974 parts.append(self.start)
2975 if self.prec:
2976 parts.append(''.join([''.join(p) for p in self.prec]))
2977 if self.tokens:
2978 parts.append(' '.join(self.tokens))
2979 for f in self.pfuncs:
2980 if f[3]:
2981 parts.append(f[3])
2982 except (TypeError, ValueError):
2983 pass
2984 return ''.join(parts)
2985
2986 # -----------------------------------------------------------------------------
2987 # validate_modules()
2988 #
2989 # This method checks to see if there are duplicated p_rulename() functions
2990 # in the parser module file. Without this function, it is really easy for
2991 # users to make mistakes by cutting and pasting code fragments (and it's a real
2992 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2993 # we just do a little regular expression pattern matching of def statements
2994 # to try and detect duplicates.
2995 # -----------------------------------------------------------------------------
2996
2997 def validate_modules(self):
2998 # Match def p_funcname(
2999 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
3000
3001 for module in self.modules:
3002 try:
3003 lines, linen = inspect.getsourcelines(module)
3004 except IOError:
3005 continue
3006
3007 counthash = {}
3008 for linen, line in enumerate(lines):
3009 linen += 1
3010 m = fre.match(line)
3011 if m:
3012 name = m.group(1)
3013 prev = counthash.get(name)
3014 if not prev:
3015 counthash[name] = linen
3016 else:
3017 filename = inspect.getsourcefile(module)
3018 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
3019 filename, linen, name, prev)
3020
3021 # Get the start symbol
3022 def get_start(self):
3023 self.start = self.pdict.get('start')
3024
3025 # Validate the start symbol
3026 def validate_start(self):
3027 if self.start is not None:
3028 if not isinstance(self.start, string_types):
3029 self.log.error("'start' must be a string")
3030
3031 # Look for error handler
3032 def get_error_func(self):
3033 self.error_func = self.pdict.get('p_error')
3034
3035 # Validate the error function
3036 def validate_error_func(self):
3037 if self.error_func:
3038 if isinstance(self.error_func, types.FunctionType):
3039 ismethod = 0
3040 elif isinstance(self.error_func, types.MethodType):
3041 ismethod = 1
3042 else:
3043 self.log.error("'p_error' defined, but is not a function or method")
3044 self.error = True
3045 return
3046
3047 eline = self.error_func.__code__.co_firstlineno
3048 efile = self.error_func.__code__.co_filename
3049 module = inspect.getmodule(self.error_func)
3050 self.modules.add(module)
3051
3052 argcount = self.error_func.__code__.co_argcount - ismethod
3053 if argcount != 1:
3054 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3055 self.error = True
3056
3057 # Get the tokens map
3058 def get_tokens(self):
3059 tokens = self.pdict.get('tokens')
3060 if not tokens:
3061 self.log.error('No token list is defined')
3062 self.error = True
3063 return
3064
3065 if not isinstance(tokens, (list, tuple)):
3066 self.log.error('tokens must be a list or tuple')
3067 self.error = True
3068 return
3069
3070 if not tokens:
3071 self.log.error('tokens is empty')
3072 self.error = True
3073 return
3074
3075 self.tokens = tokens
3076
3077 # Validate the tokens
3078 def validate_tokens(self):
3079 # Validate the tokens.
3080 if 'error' in self.tokens:
3081 self.log.error("Illegal token name 'error'. Is a reserved word")
3082 self.error = True
3083 return
3084
3085 terminals = set()
3086 for n in self.tokens:
3087 if n in terminals:
3088 self.log.warning('Token %r multiply defined', n)
3089 terminals.add(n)
3090
3091 # Get the precedence map (if any)
3092 def get_precedence(self):
3093 self.prec = self.pdict.get('precedence')
3094
3095 # Validate and parse the precedence map
3096 def validate_precedence(self):
3097 preclist = []
3098 if self.prec:
3099 if not isinstance(self.prec, (list, tuple)):
3100 self.log.error('precedence must be a list or tuple')
3101 self.error = True
3102 return
3103 for level, p in enumerate(self.prec):
3104 if not isinstance(p, (list, tuple)):
3105 self.log.error('Bad precedence table')
3106 self.error = True
3107 return
3108
3109 if len(p) < 2:
3110 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3111 self.error = True
3112 return
3113 assoc = p[0]
3114 if not isinstance(assoc, string_types):
3115 self.log.error('precedence associativity must be a string')
3116 self.error = True
3117 return
3118 for term in p[1:]:
3119 if not isinstance(term, string_types):
3120 self.log.error('precedence items must be strings')
3121 self.error = True
3122 return
3123 preclist.append((term, assoc, level+1))
3124 self.preclist = preclist
3125
3126 # Get all p_functions from the grammar
3127 def get_pfunctions(self):
3128 p_functions = []
3129 for name, item in self.pdict.items():
3130 if not name.startswith('p_') or name == 'p_error':
3131 continue
3132 if isinstance(item, (types.FunctionType, types.MethodType)):
3133 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
3134 module = inspect.getmodule(item)
3135 p_functions.append((line, module, name, item.__doc__))
3136
3137 # Sort all of the actions by line number; make sure to stringify
3138 # modules to make them sortable, since `line` may not uniquely sort all
3139 # p functions
3140 p_functions.sort(key=lambda p_function: (
3141 p_function[0],
3142 str(p_function[1]),
3143 p_function[2],
3144 p_function[3]))
3145 self.pfuncs = p_functions
3146
3147 # Validate all of the p_functions
3148 def validate_pfunctions(self):
3149 grammar = []
3150 # Check for non-empty symbols
3151 if len(self.pfuncs) == 0:
3152 self.log.error('no rules of the form p_rulename are defined')
3153 self.error = True
3154 return
3155
3156 for line, module, name, doc in self.pfuncs:
3157 file = inspect.getsourcefile(module)
3158 func = self.pdict[name]
3159 if isinstance(func, types.MethodType):
3160 reqargs = 2
3161 else:
3162 reqargs = 1
3163 if func.__code__.co_argcount > reqargs:
3164 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3165 self.error = True
3166 elif func.__code__.co_argcount < reqargs:
3167 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3168 self.error = True
3169 elif not func.__doc__:
3170 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3171 file, line, func.__name__)
3172 else:
3173 try:
3174 parsed_g = parse_grammar(doc, file, line)
3175 for g in parsed_g:
3176 grammar.append((name, g))
3177 except SyntaxError as e:
3178 self.log.error(str(e))
3179 self.error = True
3180
3181 # Looks like a valid grammar rule
3182 # Mark the file in which defined.
3183 self.modules.add(module)
3184
3185 # Secondary validation step that looks for p_ definitions that are not functions
3186 # or functions that look like they might be grammar rules.
3187
3188 for n, v in self.pdict.items():
3189 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3190 continue
3191 if n.startswith('t_'):
3192 continue
3193 if n.startswith('p_') and n != 'p_error':
3194 self.log.warning('%r not defined as a function', n)
3195 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3196 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3197 if v.__doc__:
3198 try:
3199 doc = v.__doc__.split(' ')
3200 if doc[1] == ':':
3201 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3202 v.__code__.co_filename, v.__code__.co_firstlineno, n)
3203 except IndexError:
3204 pass
3205
3206 self.grammar = grammar
3207
3208# -----------------------------------------------------------------------------
3209# yacc(module)
3210#
3211# Build a parser
3212# -----------------------------------------------------------------------------
3213
3214def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3215 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3216 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3217
3218 if tabmodule is None:
3219 tabmodule = tab_module
3220
3221 # Reference to the parsing method of the last built parser
3222 global parse
3223
3224 # If pickling is enabled, table files are not created
3225 if picklefile:
3226 write_tables = 0
3227
3228 if errorlog is None:
3229 errorlog = PlyLogger(sys.stderr)
3230
3231 # Get the module dictionary used for the parser
3232 if module:
3233 _items = [(k, getattr(module, k)) for k in dir(module)]
3234 pdict = dict(_items)
3235 # If no __file__ attribute is available, try to obtain it from the __module__ instead
3236 if '__file__' not in pdict:
3237 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3238 else:
3239 pdict = get_caller_module_dict(2)
3240
3241 if outputdir is None:
3242 # If no output directory is set, the location of the output files
3243 # is determined according to the following rules:
3244 # - If tabmodule specifies a package, files go into that package directory
3245 # - Otherwise, files go in the same directory as the specifying module
3246 if isinstance(tabmodule, types.ModuleType):
3247 srcfile = tabmodule.__file__
3248 else:
3249 if '.' not in tabmodule:
3250 srcfile = pdict['__file__']
3251 else:
3252 parts = tabmodule.split('.')
3253 pkgname = '.'.join(parts[:-1])
3254 exec('import %s' % pkgname)
3255 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3256 outputdir = os.path.dirname(srcfile)
3257
3258 # Determine if the module is package of a package or not.
3259 # If so, fix the tabmodule setting so that tables load correctly
3260 pkg = pdict.get('__package__')
3261 if pkg and isinstance(tabmodule, str):
3262 if '.' not in tabmodule:
3263 tabmodule = pkg + '.' + tabmodule
3264
3265
3266
3267 # Set start symbol if it's specified directly using an argument
3268 if start is not None:
3269 pdict['start'] = start
3270
3271 # Collect parser information from the dictionary
3272 pinfo = ParserReflect(pdict, log=errorlog)
3273 pinfo.get_all()
3274
3275 if pinfo.error:
3276 raise YaccError('Unable to build parser')
3277
3278 # Check signature against table files (if any)
3279 signature = pinfo.signature()
3280
3281 # Read the tables
3282 try:
3283 lr = LRTable()
3284 if picklefile:
3285 read_signature = lr.read_pickle(picklefile)
3286 else:
3287 read_signature = lr.read_table(tabmodule)
3288 if optimize or (read_signature == signature):
3289 try:
3290 lr.bind_callables(pinfo.pdict)
3291 parser = LRParser(lr, pinfo.error_func)
3292 parse = parser.parse
3293 return parser
3294 except Exception as e:
3295 errorlog.warning('There was a problem loading the table file: %r', e)
3296 except VersionError as e:
3297 errorlog.warning(str(e))
3298 except ImportError:
3299 pass
3300
3301 if debuglog is None:
3302 if debug:
3303 try:
3304 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3305 except IOError as e:
3306 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3307 debuglog = NullLogger()
3308 else:
3309 debuglog = NullLogger()
3310
3311 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3312
3313 errors = False
3314
3315 # Validate the parser information
3316 if pinfo.validate_all():
3317 raise YaccError('Unable to build parser')
3318
3319 if not pinfo.error_func:
3320 errorlog.warning('no p_error() function is defined')
3321
3322 # Create a grammar object
3323 grammar = Grammar(pinfo.tokens)
3324
3325 # Set precedence level for terminals
3326 for term, assoc, level in pinfo.preclist:
3327 try:
3328 grammar.set_precedence(term, assoc, level)
3329 except GrammarError as e:
3330 errorlog.warning('%s', e)
3331
3332 # Add productions to the grammar
3333 for funcname, gram in pinfo.grammar:
3334 file, line, prodname, syms = gram
3335 try:
3336 grammar.add_production(prodname, syms, funcname, file, line)
3337 except GrammarError as e:
3338 errorlog.error('%s', e)
3339 errors = True
3340
3341 # Set the grammar start symbols
3342 try:
3343 if start is None:
3344 grammar.set_start(pinfo.start)
3345 else:
3346 grammar.set_start(start)
3347 except GrammarError as e:
3348 errorlog.error(str(e))
3349 errors = True
3350
3351 if errors:
3352 raise YaccError('Unable to build parser')
3353
3354 # Verify the grammar structure
3355 undefined_symbols = grammar.undefined_symbols()
3356 for sym, prod in undefined_symbols:
3357 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3358 errors = True
3359
3360 unused_terminals = grammar.unused_terminals()
3361 if unused_terminals:
3362 debuglog.info('')
3363 debuglog.info('Unused terminals:')
3364 debuglog.info('')
3365 for term in unused_terminals:
3366 errorlog.warning('Token %r defined, but not used', term)
3367 debuglog.info(' %s', term)
3368
3369 # Print out all productions to the debug log
3370 if debug:
3371 debuglog.info('')
3372 debuglog.info('Grammar')
3373 debuglog.info('')
3374 for n, p in enumerate(grammar.Productions):
3375 debuglog.info('Rule %-5d %s', n, p)
3376
3377 # Find unused non-terminals
3378 unused_rules = grammar.unused_rules()
3379 for prod in unused_rules:
3380 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3381
3382 if len(unused_terminals) == 1:
3383 errorlog.warning('There is 1 unused token')
3384 if len(unused_terminals) > 1:
3385 errorlog.warning('There are %d unused tokens', len(unused_terminals))
3386
3387 if len(unused_rules) == 1:
3388 errorlog.warning('There is 1 unused rule')
3389 if len(unused_rules) > 1:
3390 errorlog.warning('There are %d unused rules', len(unused_rules))
3391
3392 if debug:
3393 debuglog.info('')
3394 debuglog.info('Terminals, with rules where they appear')
3395 debuglog.info('')
3396 terms = list(grammar.Terminals)
3397 terms.sort()
3398 for term in terms:
3399 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3400
3401 debuglog.info('')
3402 debuglog.info('Nonterminals, with rules where they appear')
3403 debuglog.info('')
3404 nonterms = list(grammar.Nonterminals)
3405 nonterms.sort()
3406 for nonterm in nonterms:
3407 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3408 debuglog.info('')
3409
3410 if check_recursion:
3411 unreachable = grammar.find_unreachable()
3412 for u in unreachable:
3413 errorlog.warning('Symbol %r is unreachable', u)
3414
3415 infinite = grammar.infinite_cycles()
3416 for inf in infinite:
3417 errorlog.error('Infinite recursion detected for symbol %r', inf)
3418 errors = True
3419
3420 unused_prec = grammar.unused_precedence()
3421 for term, assoc in unused_prec:
3422 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3423 errors = True
3424
3425 if errors:
3426 raise YaccError('Unable to build parser')
3427
3428 # Run the LRGeneratedTable on the grammar
3429 if debug:
3430 errorlog.debug('Generating %s tables', method)
3431
3432 lr = LRGeneratedTable(grammar, method, debuglog)
3433
3434 if debug:
3435 num_sr = len(lr.sr_conflicts)
3436
3437 # Report shift/reduce and reduce/reduce conflicts
3438 if num_sr == 1:
3439 errorlog.warning('1 shift/reduce conflict')
3440 elif num_sr > 1:
3441 errorlog.warning('%d shift/reduce conflicts', num_sr)
3442
3443 num_rr = len(lr.rr_conflicts)
3444 if num_rr == 1:
3445 errorlog.warning('1 reduce/reduce conflict')
3446 elif num_rr > 1:
3447 errorlog.warning('%d reduce/reduce conflicts', num_rr)
3448
3449 # Write out conflicts to the output file
3450 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3451 debuglog.warning('')
3452 debuglog.warning('Conflicts:')
3453 debuglog.warning('')
3454
3455 for state, tok, resolution in lr.sr_conflicts:
3456 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3457
3458 already_reported = set()
3459 for state, rule, rejected in lr.rr_conflicts:
3460 if (state, id(rule), id(rejected)) in already_reported:
3461 continue
3462 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3463 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3464 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3465 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3466 already_reported.add((state, id(rule), id(rejected)))
3467
3468 warned_never = []
3469 for state, rule, rejected in lr.rr_conflicts:
3470 if not rejected.reduced and (rejected not in warned_never):
3471 debuglog.warning('Rule (%s) is never reduced', rejected)
3472 errorlog.warning('Rule (%s) is never reduced', rejected)
3473 warned_never.append(rejected)
3474
3475 # Write the table file if requested
3476 if write_tables:
3477 try:
3478 lr.write_table(tabmodule, outputdir, signature)
3479 except IOError as e:
3480 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3481
3482 # Write a pickled version of the tables
3483 if picklefile:
3484 try:
3485 lr.pickle_table(picklefile, signature)
3486 except IOError as e:
3487 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3488
3489 # Build the parser
3490 lr.bind_callables(pinfo.pdict)
3491 parser = LRParser(lr, pinfo.error_func)
3492
3493 parse = parser.parse
3494 return parser