Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parser_frontends.py: 69%

171 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:30 +0000

1from typing import Any, Callable, Dict, Optional, Collection 

2 

3from .exceptions import ConfigurationError, GrammarError, assert_config 

4from .utils import get_regexp_width, Serialize 

5from .parsers.grammar_analysis import GrammarAnalyzer 

6from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer 

7from .parsers import earley, xearley, cyk 

8from .parsers.lalr_parser import LALR_Parser 

9from .tree import Tree 

10from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType 

11 

12###{standalone 

13 

14def _wrap_lexer(lexer_class): 

15 future_interface = getattr(lexer_class, '__future_interface__', False) 

16 if future_interface: 

17 return lexer_class 

18 else: 

19 class CustomLexerWrapper(Lexer): 

20 def __init__(self, lexer_conf): 

21 self.lexer = lexer_class(lexer_conf) 

22 def lex(self, lexer_state, parser_state): 

23 return self.lexer.lex(lexer_state.text) 

24 return CustomLexerWrapper 

25 

26 

27def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options): 

28 parser_conf = ParserConf.deserialize(data['parser_conf'], memo) 

29 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser 

30 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug) 

31 parser_conf.callbacks = callbacks 

32 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) 

33 

34 

35_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {} 

36 

37 

38class ParsingFrontend(Serialize): 

39 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser' 

40 

41 lexer_conf: LexerConf 

42 parser_conf: ParserConf 

43 options: Any 

44 

45 def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None): 

46 self.parser_conf = parser_conf 

47 self.lexer_conf = lexer_conf 

48 self.options = options 

49 

50 # Set-up parser 

51 if parser: # From cache 

52 self.parser = parser 

53 else: 

54 create_parser = _parser_creators.get(parser_conf.parser_type) 

55 assert create_parser is not None, "{} is not supported in standalone mode".format( 

56 parser_conf.parser_type 

57 ) 

58 self.parser = create_parser(lexer_conf, parser_conf, options) 

59 

60 # Set-up lexer 

61 lexer_type = lexer_conf.lexer_type 

62 self.skip_lexer = False 

63 if lexer_type in ('dynamic', 'dynamic_complete'): 

64 assert lexer_conf.postlex is None 

65 self.skip_lexer = True 

66 return 

67 

68 if isinstance(lexer_type, type): 

69 assert issubclass(lexer_type, Lexer) 

70 self.lexer = _wrap_lexer(lexer_type)(lexer_conf) 

71 elif isinstance(lexer_type, str): 

72 create_lexer = { 

73 'basic': create_basic_lexer, 

74 'contextual': create_contextual_lexer, 

75 }[lexer_type] 

76 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options) 

77 else: 

78 raise TypeError("Bad value for lexer_type: {lexer_type}") 

79 

80 if lexer_conf.postlex: 

81 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) 

82 

83 def _verify_start(self, start=None): 

84 if start is None: 

85 start_decls = self.parser_conf.start 

86 if len(start_decls) > 1: 

87 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) 

88 start ,= start_decls 

89 elif start not in self.parser_conf.start: 

90 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) 

91 return start 

92 

93 def _make_lexer_thread(self, text: str): 

94 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread 

95 return text if self.skip_lexer else cls.from_text(self.lexer, text) 

96 

97 def parse(self, text: str, start=None, on_error=None): 

98 chosen_start = self._verify_start(start) 

99 kw = {} if on_error is None else {'on_error': on_error} 

100 stream = self._make_lexer_thread(text) 

101 return self.parser.parse(stream, chosen_start, **kw) 

102 

103 def parse_interactive(self, text: Optional[str]=None, start=None): 

104 # TODO BREAK - Change text from Optional[str] to text: str = ''. 

105 # Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return [] 

106 chosen_start = self._verify_start(start) 

107 if self.parser_conf.parser_type != 'lalr': 

108 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") 

109 stream = self._make_lexer_thread(text) # type: ignore[arg-type] 

110 return self.parser.parse_interactive(stream, chosen_start) 

111 

112 

113def _validate_frontend_args(parser, lexer) -> None: 

114 assert_config(parser, ('lalr', 'earley', 'cyk')) 

115 if not isinstance(lexer, type): # not custom lexer? 

116 expected = { 

117 'lalr': ('basic', 'contextual'), 

118 'earley': ('basic', 'dynamic', 'dynamic_complete'), 

119 'cyk': ('basic', ), 

120 }[parser] 

121 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) 

122 

123 

124def _get_lexer_callbacks(transformer, terminals): 

125 result = {} 

126 for terminal in terminals: 

127 callback = getattr(transformer, terminal.name, None) 

128 if callback is not None: 

129 result[terminal.name] = callback 

130 return result 

131 

132class PostLexConnector: 

133 def __init__(self, lexer, postlexer): 

134 self.lexer = lexer 

135 self.postlexer = postlexer 

136 

137 def lex(self, lexer_state, parser_state): 

138 i = self.lexer.lex(lexer_state, parser_state) 

139 return self.postlexer.process(i) 

140 

141 

142 

143def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer: 

144 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer 

145 return cls(lexer_conf) 

146 

147def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer: 

148 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer 

149 states: Dict[str, Collection[str]] = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} 

150 always_accept: Collection[str] = postlex.always_accept if postlex else () 

151 return cls(lexer_conf, states, always_accept=always_accept) 

152 

153def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser: 

154 debug = options.debug if options else False 

155 strict = options.strict if options else False 

156 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser 

157 return cls(parser_conf, debug=debug, strict=strict) 

158 

159_parser_creators['lalr'] = create_lalr_parser 

160 

161###} 

162 

163class EarleyRegexpMatcher: 

164 def __init__(self, lexer_conf): 

165 self.regexps = {} 

166 for t in lexer_conf.terminals: 

167 regexp = t.pattern.to_regexp() 

168 try: 

169 width = get_regexp_width(regexp)[0] 

170 except ValueError: 

171 raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp)) 

172 else: 

173 if width == 0: 

174 raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t) 

175 if lexer_conf.use_bytes: 

176 regexp = regexp.encode('utf-8') 

177 

178 self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags) 

179 

180 def match(self, term, text, index=0): 

181 return self.regexps[term.name].match(text, index) 

182 

183 

184def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): 

185 if lexer_conf.callbacks: 

186 raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.") 

187 

188 earley_matcher = EarleyRegexpMatcher(lexer_conf) 

189 return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw) 

190 

191def _match_earley_basic(term, token): 

192 return term.name == token.type 

193 

194def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): 

195 return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw) 

196 

197def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser: 

198 resolve_ambiguity = options.ambiguity == 'resolve' 

199 debug = options.debug if options else False 

200 tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None 

201 

202 extra = {} 

203 if lexer_conf.lexer_type == 'dynamic': 

204 f = create_earley_parser__dynamic 

205 elif lexer_conf.lexer_type == 'dynamic_complete': 

206 extra['complete_lex'] = True 

207 f = create_earley_parser__dynamic 

208 else: 

209 f = create_earley_parser__basic 

210 

211 return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra) 

212 

213 

214 

215class CYK_FrontEnd: 

216 def __init__(self, lexer_conf, parser_conf, options=None): 

217 # self._analysis = GrammarAnalyzer(parser_conf) 

218 self.parser = cyk.Parser(parser_conf.rules) 

219 

220 self.callbacks = parser_conf.callbacks 

221 

222 def parse(self, lexer_thread, start): 

223 tokens = list(lexer_thread.lex(None)) 

224 tree = self.parser.parse(tokens, start) 

225 return self._transform(tree) 

226 

227 def _transform(self, tree): 

228 subtrees = list(tree.iter_subtrees()) 

229 for subtree in subtrees: 

230 subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] 

231 

232 return self._apply_callback(tree) 

233 

234 def _apply_callback(self, tree): 

235 return self.callbacks[tree.rule](tree.children) 

236 

237 

238_parser_creators['earley'] = create_earley_parser 

239_parser_creators['cyk'] = CYK_FrontEnd 

240 

241 

242def _construct_parsing_frontend( 

243 parser_type: _ParserArgType, 

244 lexer_type: _LexerArgType, 

245 lexer_conf, 

246 parser_conf, 

247 options 

248): 

249 assert isinstance(lexer_conf, LexerConf) 

250 assert isinstance(parser_conf, ParserConf) 

251 parser_conf.parser_type = parser_type 

252 lexer_conf.lexer_type = lexer_type 

253 return ParsingFrontend(lexer_conf, parser_conf, options)