Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/lalr_parser.py: 74%

139 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:30 +0000

1"""This module implements a LALR(1) Parser 

2""" 

3# Author: Erez Shinan (2017) 

4# Email : erezshin@gmail.com 

5from copy import deepcopy, copy 

6from typing import Dict, Any 

7from ..lexer import Token 

8from ..utils import Serialize 

9 

10from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable 

11from .lalr_interactive_parser import InteractiveParser 

12from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken 

13 

14###{standalone 

15 

16class LALR_Parser(Serialize): 

17 def __init__(self, parser_conf, debug=False, strict=False): 

18 analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict) 

19 analysis.compute_lalr() 

20 callbacks = parser_conf.callbacks 

21 

22 self._parse_table = analysis.parse_table 

23 self.parser_conf = parser_conf 

24 self.parser = _Parser(analysis.parse_table, callbacks, debug) 

25 

26 @classmethod 

27 def deserialize(cls, data, memo, callbacks, debug=False): 

28 inst = cls.__new__(cls) 

29 inst._parse_table = IntParseTable.deserialize(data, memo) 

30 inst.parser = _Parser(inst._parse_table, callbacks, debug) 

31 return inst 

32 

33 def serialize(self, memo: Any = None) -> Dict[str, Any]: 

34 return self._parse_table.serialize(memo) 

35 

36 def parse_interactive(self, lexer, start): 

37 return self.parser.parse(lexer, start, start_interactive=True) 

38 

39 def parse(self, lexer, start, on_error=None): 

40 try: 

41 return self.parser.parse(lexer, start) 

42 except UnexpectedInput as e: 

43 if on_error is None: 

44 raise 

45 

46 while True: 

47 if isinstance(e, UnexpectedCharacters): 

48 s = e.interactive_parser.lexer_thread.state 

49 p = s.line_ctr.char_pos 

50 

51 if not on_error(e): 

52 raise e 

53 

54 if isinstance(e, UnexpectedCharacters): 

55 # If user didn't change the character position, then we should 

56 if p == s.line_ctr.char_pos: 

57 s.line_ctr.feed(s.text[p:p+1]) 

58 

59 try: 

60 return e.interactive_parser.resume_parse() 

61 except UnexpectedToken as e2: 

62 if (isinstance(e, UnexpectedToken) 

63 and e.token.type == e2.token.type == '$END' 

64 and e.interactive_parser == e2.interactive_parser): 

65 # Prevent infinite loop 

66 raise e2 

67 e = e2 

68 except UnexpectedCharacters as e2: 

69 e = e2 

70 

71 

72class ParseConf: 

73 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' 

74 

75 def __init__(self, parse_table, callbacks, start): 

76 self.parse_table = parse_table 

77 

78 self.start_state = self.parse_table.start_states[start] 

79 self.end_state = self.parse_table.end_states[start] 

80 self.states = self.parse_table.states 

81 

82 self.callbacks = callbacks 

83 self.start = start 

84 

85 

86class ParserState: 

87 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' 

88 

89 def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None): 

90 self.parse_conf = parse_conf 

91 self.lexer = lexer 

92 self.state_stack = state_stack or [self.parse_conf.start_state] 

93 self.value_stack = value_stack or [] 

94 

95 @property 

96 def position(self): 

97 return self.state_stack[-1] 

98 

99 # Necessary for match_examples() to work 

100 def __eq__(self, other): 

101 if not isinstance(other, ParserState): 

102 return NotImplemented 

103 return len(self.state_stack) == len(other.state_stack) and self.position == other.position 

104 

105 def __copy__(self): 

106 return type(self)( 

107 self.parse_conf, 

108 self.lexer, # XXX copy 

109 copy(self.state_stack), 

110 deepcopy(self.value_stack), 

111 ) 

112 

113 def copy(self): 

114 return copy(self) 

115 

116 def feed_token(self, token, is_end=False): 

117 state_stack = self.state_stack 

118 value_stack = self.value_stack 

119 states = self.parse_conf.states 

120 end_state = self.parse_conf.end_state 

121 callbacks = self.parse_conf.callbacks 

122 

123 while True: 

124 state = state_stack[-1] 

125 try: 

126 action, arg = states[state][token.type] 

127 except KeyError: 

128 expected = {s for s in states[state].keys() if s.isupper()} 

129 raise UnexpectedToken(token, expected, state=self, interactive_parser=None) 

130 

131 assert arg != end_state 

132 

133 if action is Shift: 

134 # shift once and return 

135 assert not is_end 

136 state_stack.append(arg) 

137 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) 

138 return 

139 else: 

140 # reduce+shift as many times as necessary 

141 rule = arg 

142 size = len(rule.expansion) 

143 if size: 

144 s = value_stack[-size:] 

145 del state_stack[-size:] 

146 del value_stack[-size:] 

147 else: 

148 s = [] 

149 

150 value = callbacks[rule](s) 

151 

152 _action, new_state = states[state_stack[-1]][rule.origin.name] 

153 assert _action is Shift 

154 state_stack.append(new_state) 

155 value_stack.append(value) 

156 

157 if is_end and state_stack[-1] == end_state: 

158 return value_stack[-1] 

159 

160class _Parser: 

161 def __init__(self, parse_table, callbacks, debug=False): 

162 self.parse_table = parse_table 

163 self.callbacks = callbacks 

164 self.debug = debug 

165 

166 def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): 

167 parse_conf = ParseConf(self.parse_table, self.callbacks, start) 

168 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) 

169 if start_interactive: 

170 return InteractiveParser(self, parser_state, parser_state.lexer) 

171 return self.parse_from_state(parser_state) 

172 

173 

174 def parse_from_state(self, state, last_token=None): 

175 """Run the main LALR parser loop 

176 

177 Parameters: 

178 state (ParseState) - the initial state. Changed in-place. 

179 last_token (optional, Token) - Used only for line information in case of an empty lexer. 

180 """ 

181 try: 

182 token = last_token 

183 for token in state.lexer.lex(state): 

184 state.feed_token(token) 

185 

186 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) 

187 return state.feed_token(end_token, True) 

188 except UnexpectedInput as e: 

189 try: 

190 e.interactive_parser = InteractiveParser(self, state, state.lexer) 

191 except NameError: 

192 pass 

193 raise e 

194 except Exception as e: 

195 if self.debug: 

196 print("") 

197 print("STATE STACK DUMP") 

198 print("----------------") 

199 for i, s in enumerate(state.state_stack): 

200 print('%d)' % i , s) 

201 print("") 

202 

203 raise 

204###}