Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/lalr_parser.py: 74%
139 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
1"""This module implements a LALR(1) Parser
2"""
3# Author: Erez Shinan (2017)
4# Email : erezshin@gmail.com
5from copy import deepcopy, copy
6from typing import Dict, Any
7from ..lexer import Token
8from ..utils import Serialize
10from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable
11from .lalr_interactive_parser import InteractiveParser
12from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
14###{standalone
16class LALR_Parser(Serialize):
17 def __init__(self, parser_conf, debug=False, strict=False):
18 analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
19 analysis.compute_lalr()
20 callbacks = parser_conf.callbacks
22 self._parse_table = analysis.parse_table
23 self.parser_conf = parser_conf
24 self.parser = _Parser(analysis.parse_table, callbacks, debug)
26 @classmethod
27 def deserialize(cls, data, memo, callbacks, debug=False):
28 inst = cls.__new__(cls)
29 inst._parse_table = IntParseTable.deserialize(data, memo)
30 inst.parser = _Parser(inst._parse_table, callbacks, debug)
31 return inst
33 def serialize(self, memo: Any = None) -> Dict[str, Any]:
34 return self._parse_table.serialize(memo)
36 def parse_interactive(self, lexer, start):
37 return self.parser.parse(lexer, start, start_interactive=True)
39 def parse(self, lexer, start, on_error=None):
40 try:
41 return self.parser.parse(lexer, start)
42 except UnexpectedInput as e:
43 if on_error is None:
44 raise
46 while True:
47 if isinstance(e, UnexpectedCharacters):
48 s = e.interactive_parser.lexer_thread.state
49 p = s.line_ctr.char_pos
51 if not on_error(e):
52 raise e
54 if isinstance(e, UnexpectedCharacters):
55 # If user didn't change the character position, then we should
56 if p == s.line_ctr.char_pos:
57 s.line_ctr.feed(s.text[p:p+1])
59 try:
60 return e.interactive_parser.resume_parse()
61 except UnexpectedToken as e2:
62 if (isinstance(e, UnexpectedToken)
63 and e.token.type == e2.token.type == '$END'
64 and e.interactive_parser == e2.interactive_parser):
65 # Prevent infinite loop
66 raise e2
67 e = e2
68 except UnexpectedCharacters as e2:
69 e = e2
72class ParseConf:
73 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
75 def __init__(self, parse_table, callbacks, start):
76 self.parse_table = parse_table
78 self.start_state = self.parse_table.start_states[start]
79 self.end_state = self.parse_table.end_states[start]
80 self.states = self.parse_table.states
82 self.callbacks = callbacks
83 self.start = start
86class ParserState:
87 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
89 def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
90 self.parse_conf = parse_conf
91 self.lexer = lexer
92 self.state_stack = state_stack or [self.parse_conf.start_state]
93 self.value_stack = value_stack or []
95 @property
96 def position(self):
97 return self.state_stack[-1]
99 # Necessary for match_examples() to work
100 def __eq__(self, other):
101 if not isinstance(other, ParserState):
102 return NotImplemented
103 return len(self.state_stack) == len(other.state_stack) and self.position == other.position
105 def __copy__(self):
106 return type(self)(
107 self.parse_conf,
108 self.lexer, # XXX copy
109 copy(self.state_stack),
110 deepcopy(self.value_stack),
111 )
113 def copy(self):
114 return copy(self)
116 def feed_token(self, token, is_end=False):
117 state_stack = self.state_stack
118 value_stack = self.value_stack
119 states = self.parse_conf.states
120 end_state = self.parse_conf.end_state
121 callbacks = self.parse_conf.callbacks
123 while True:
124 state = state_stack[-1]
125 try:
126 action, arg = states[state][token.type]
127 except KeyError:
128 expected = {s for s in states[state].keys() if s.isupper()}
129 raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
131 assert arg != end_state
133 if action is Shift:
134 # shift once and return
135 assert not is_end
136 state_stack.append(arg)
137 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
138 return
139 else:
140 # reduce+shift as many times as necessary
141 rule = arg
142 size = len(rule.expansion)
143 if size:
144 s = value_stack[-size:]
145 del state_stack[-size:]
146 del value_stack[-size:]
147 else:
148 s = []
150 value = callbacks[rule](s)
152 _action, new_state = states[state_stack[-1]][rule.origin.name]
153 assert _action is Shift
154 state_stack.append(new_state)
155 value_stack.append(value)
157 if is_end and state_stack[-1] == end_state:
158 return value_stack[-1]
160class _Parser:
161 def __init__(self, parse_table, callbacks, debug=False):
162 self.parse_table = parse_table
163 self.callbacks = callbacks
164 self.debug = debug
166 def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
167 parse_conf = ParseConf(self.parse_table, self.callbacks, start)
168 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
169 if start_interactive:
170 return InteractiveParser(self, parser_state, parser_state.lexer)
171 return self.parse_from_state(parser_state)
174 def parse_from_state(self, state, last_token=None):
175 """Run the main LALR parser loop
177 Parameters:
178 state (ParseState) - the initial state. Changed in-place.
179 last_token (optional, Token) - Used only for line information in case of an empty lexer.
180 """
181 try:
182 token = last_token
183 for token in state.lexer.lex(state):
184 state.feed_token(token)
186 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
187 return state.feed_token(end_token, True)
188 except UnexpectedInput as e:
189 try:
190 e.interactive_parser = InteractiveParser(self, state, state.lexer)
191 except NameError:
192 pass
193 raise e
194 except Exception as e:
195 if self.debug:
196 print("")
197 print("STATE STACK DUMP")
198 print("----------------")
199 for i, s in enumerate(state.state_stack):
200 print('%d)' % i , s)
201 print("")
203 raise
204###}