Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parser_frontends.py: 69%
171 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
1from typing import Any, Callable, Dict, Optional, Collection
3from .exceptions import ConfigurationError, GrammarError, assert_config
4from .utils import get_regexp_width, Serialize
5from .parsers.grammar_analysis import GrammarAnalyzer
6from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
7from .parsers import earley, xearley, cyk
8from .parsers.lalr_parser import LALR_Parser
9from .tree import Tree
10from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
12###{standalone
14def _wrap_lexer(lexer_class):
15 future_interface = getattr(lexer_class, '__future_interface__', False)
16 if future_interface:
17 return lexer_class
18 else:
19 class CustomLexerWrapper(Lexer):
20 def __init__(self, lexer_conf):
21 self.lexer = lexer_class(lexer_conf)
22 def lex(self, lexer_state, parser_state):
23 return self.lexer.lex(lexer_state.text)
24 return CustomLexerWrapper
27def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
28 parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
29 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
30 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
31 parser_conf.callbacks = callbacks
32 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
35_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
38class ParsingFrontend(Serialize):
39 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
41 lexer_conf: LexerConf
42 parser_conf: ParserConf
43 options: Any
45 def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
46 self.parser_conf = parser_conf
47 self.lexer_conf = lexer_conf
48 self.options = options
50 # Set-up parser
51 if parser: # From cache
52 self.parser = parser
53 else:
54 create_parser = _parser_creators.get(parser_conf.parser_type)
55 assert create_parser is not None, "{} is not supported in standalone mode".format(
56 parser_conf.parser_type
57 )
58 self.parser = create_parser(lexer_conf, parser_conf, options)
60 # Set-up lexer
61 lexer_type = lexer_conf.lexer_type
62 self.skip_lexer = False
63 if lexer_type in ('dynamic', 'dynamic_complete'):
64 assert lexer_conf.postlex is None
65 self.skip_lexer = True
66 return
68 if isinstance(lexer_type, type):
69 assert issubclass(lexer_type, Lexer)
70 self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
71 elif isinstance(lexer_type, str):
72 create_lexer = {
73 'basic': create_basic_lexer,
74 'contextual': create_contextual_lexer,
75 }[lexer_type]
76 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
77 else:
78 raise TypeError("Bad value for lexer_type: {lexer_type}")
80 if lexer_conf.postlex:
81 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
83 def _verify_start(self, start=None):
84 if start is None:
85 start_decls = self.parser_conf.start
86 if len(start_decls) > 1:
87 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
88 start ,= start_decls
89 elif start not in self.parser_conf.start:
90 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
91 return start
93 def _make_lexer_thread(self, text: str):
94 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
95 return text if self.skip_lexer else cls.from_text(self.lexer, text)
97 def parse(self, text: str, start=None, on_error=None):
98 chosen_start = self._verify_start(start)
99 kw = {} if on_error is None else {'on_error': on_error}
100 stream = self._make_lexer_thread(text)
101 return self.parser.parse(stream, chosen_start, **kw)
103 def parse_interactive(self, text: Optional[str]=None, start=None):
104 # TODO BREAK - Change text from Optional[str] to text: str = ''.
105 # Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return []
106 chosen_start = self._verify_start(start)
107 if self.parser_conf.parser_type != 'lalr':
108 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
109 stream = self._make_lexer_thread(text) # type: ignore[arg-type]
110 return self.parser.parse_interactive(stream, chosen_start)
113def _validate_frontend_args(parser, lexer) -> None:
114 assert_config(parser, ('lalr', 'earley', 'cyk'))
115 if not isinstance(lexer, type): # not custom lexer?
116 expected = {
117 'lalr': ('basic', 'contextual'),
118 'earley': ('basic', 'dynamic', 'dynamic_complete'),
119 'cyk': ('basic', ),
120 }[parser]
121 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
124def _get_lexer_callbacks(transformer, terminals):
125 result = {}
126 for terminal in terminals:
127 callback = getattr(transformer, terminal.name, None)
128 if callback is not None:
129 result[terminal.name] = callback
130 return result
132class PostLexConnector:
133 def __init__(self, lexer, postlexer):
134 self.lexer = lexer
135 self.postlexer = postlexer
137 def lex(self, lexer_state, parser_state):
138 i = self.lexer.lex(lexer_state, parser_state)
139 return self.postlexer.process(i)
143def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
144 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
145 return cls(lexer_conf)
147def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
148 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
149 states: Dict[str, Collection[str]] = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
150 always_accept: Collection[str] = postlex.always_accept if postlex else ()
151 return cls(lexer_conf, states, always_accept=always_accept)
153def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
154 debug = options.debug if options else False
155 strict = options.strict if options else False
156 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
157 return cls(parser_conf, debug=debug, strict=strict)
159_parser_creators['lalr'] = create_lalr_parser
161###}
163class EarleyRegexpMatcher:
164 def __init__(self, lexer_conf):
165 self.regexps = {}
166 for t in lexer_conf.terminals:
167 regexp = t.pattern.to_regexp()
168 try:
169 width = get_regexp_width(regexp)[0]
170 except ValueError:
171 raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
172 else:
173 if width == 0:
174 raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
175 if lexer_conf.use_bytes:
176 regexp = regexp.encode('utf-8')
178 self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
180 def match(self, term, text, index=0):
181 return self.regexps[term.name].match(text, index)
184def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
185 if lexer_conf.callbacks:
186 raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")
188 earley_matcher = EarleyRegexpMatcher(lexer_conf)
189 return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw)
191def _match_earley_basic(term, token):
192 return term.name == token.type
194def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
195 return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)
197def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser:
198 resolve_ambiguity = options.ambiguity == 'resolve'
199 debug = options.debug if options else False
200 tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
202 extra = {}
203 if lexer_conf.lexer_type == 'dynamic':
204 f = create_earley_parser__dynamic
205 elif lexer_conf.lexer_type == 'dynamic_complete':
206 extra['complete_lex'] = True
207 f = create_earley_parser__dynamic
208 else:
209 f = create_earley_parser__basic
211 return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
215class CYK_FrontEnd:
216 def __init__(self, lexer_conf, parser_conf, options=None):
217 # self._analysis = GrammarAnalyzer(parser_conf)
218 self.parser = cyk.Parser(parser_conf.rules)
220 self.callbacks = parser_conf.callbacks
222 def parse(self, lexer_thread, start):
223 tokens = list(lexer_thread.lex(None))
224 tree = self.parser.parse(tokens, start)
225 return self._transform(tree)
227 def _transform(self, tree):
228 subtrees = list(tree.iter_subtrees())
229 for subtree in subtrees:
230 subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
232 return self._apply_callback(tree)
234 def _apply_callback(self, tree):
235 return self.callbacks[tree.rule](tree.children)
238_parser_creators['earley'] = create_earley_parser
239_parser_creators['cyk'] = CYK_FrontEnd
242def _construct_parsing_frontend(
243 parser_type: _ParserArgType,
244 lexer_type: _LexerArgType,
245 lexer_conf,
246 parser_conf,
247 options
248):
249 assert isinstance(lexer_conf, LexerConf)
250 assert isinstance(parser_conf, ParserConf)
251 parser_conf.parser_type = parser_type
252 lexer_conf.lexer_type = lexer_type
253 return ParsingFrontend(lexer_conf, parser_conf, options)