Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/lark/load_grammar.py: 84%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Parses and compiles Lark grammars into an internal representation.
2"""
4import hashlib
5import os.path
6import sys
7from collections import namedtuple
8from copy import copy, deepcopy
9import pkgutil
10from ast import literal_eval
11from contextlib import suppress
12from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence, Generator
14from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors, OrderedSet, Serialize
15from .lexer import Token, TerminalDef, PatternStr, PatternRE, Pattern
17from .parse_tree_builder import ParseTreeBuilder
18from .parser_frontends import ParsingFrontend
19from .common import LexerConf, ParserConf
20from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY
21from .utils import classify, dedup_list
22from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput
24from .tree import Tree, SlottedTree as ST
25from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
26inline_args = v_args(inline=True)
28IMPORT_PATHS = ['grammars']
30EXT = '.lark'
32_RE_FLAGS = 'imslux'
34_EMPTY = Symbol('__empty__')
36_TERMINAL_NAMES = {
37 '.' : 'DOT',
38 ',' : 'COMMA',
39 ':' : 'COLON',
40 ';' : 'SEMICOLON',
41 '+' : 'PLUS',
42 '-' : 'MINUS',
43 '*' : 'STAR',
44 '/' : 'SLASH',
45 '\\' : 'BACKSLASH',
46 '|' : 'VBAR',
47 '?' : 'QMARK',
48 '!' : 'BANG',
49 '@' : 'AT',
50 '#' : 'HASH',
51 '$' : 'DOLLAR',
52 '%' : 'PERCENT',
53 '^' : 'CIRCUMFLEX',
54 '&' : 'AMPERSAND',
55 '_' : 'UNDERSCORE',
56 '<' : 'LESSTHAN',
57 '>' : 'MORETHAN',
58 '=' : 'EQUAL',
59 '"' : 'DBLQUOTE',
60 '\'' : 'QUOTE',
61 '`' : 'BACKQUOTE',
62 '~' : 'TILDE',
63 '(' : 'LPAR',
64 ')' : 'RPAR',
65 '{' : 'LBRACE',
66 '}' : 'RBRACE',
67 '[' : 'LSQB',
68 ']' : 'RSQB',
69 '\n' : 'NEWLINE',
70 '\r\n' : 'CRLF',
71 '\t' : 'TAB',
72 ' ' : 'SPACE',
73}
75# Grammar Parser
76TERMINALS = {
77 '_LPAR': r'\(',
78 '_RPAR': r'\)',
79 '_LBRA': r'\[',
80 '_RBRA': r'\]',
81 '_LBRACE': r'\{',
82 '_RBRACE': r'\}',
83 'OP': '[+*]|[?](?![a-z_])',
84 '_COLON': ':',
85 '_COMMA': ',',
86 '_OR': r'\|',
87 '_DOT': r'\.(?!\.)',
88 '_DOTDOT': r'\.\.',
89 'TILDE': '~',
90 'RULE_MODIFIERS': '(!|![?]?|[?]!?)(?=[_a-z])',
91 'RULE': '_?[a-z][_a-z0-9]*',
92 'TERMINAL': '_?[A-Z][_A-Z0-9]*',
93 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
94 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
95 '_NL': r'(\r?\n)+\s*',
96 '_NL_OR': r'(\r?\n)+\s*\|',
97 'WS': r'[ \t]+',
98 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*',
99 'BACKSLASH': r'\\[ ]*\n',
100 '_TO': '->',
101 '_IGNORE': r'%ignore',
102 '_OVERRIDE': r'%override',
103 '_DECLARE': r'%declare',
104 '_EXTEND': r'%extend',
105 '_IMPORT': r'%import',
106 'NUMBER': r'[+-]?\d+',
107}
109RULES = {
110 'start': ['_list'],
111 '_list': ['_item', '_list _item'],
112 '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],
114 'rule': ['rule_modifiers RULE template_params priority _COLON expansions _NL'],
115 'rule_modifiers': ['RULE_MODIFIERS',
116 ''],
117 'priority': ['_DOT NUMBER',
118 ''],
119 'template_params': ['_LBRACE _template_params _RBRACE',
120 ''],
121 '_template_params': ['RULE',
122 '_template_params _COMMA RULE'],
123 'expansions': ['_expansions'],
124 '_expansions': ['alias',
125 '_expansions _OR alias',
126 '_expansions _NL_OR alias'],
128 '?alias': ['expansion _TO nonterminal', 'expansion'],
129 'expansion': ['_expansion'],
131 '_expansion': ['', '_expansion expr'],
133 '?expr': ['atom',
134 'atom OP',
135 'atom TILDE NUMBER',
136 'atom TILDE NUMBER _DOTDOT NUMBER',
137 ],
139 '?atom': ['_LPAR expansions _RPAR',
140 'maybe',
141 'value'],
143 'value': ['terminal',
144 'nonterminal',
145 'literal',
146 'range',
147 'template_usage'],
149 'terminal': ['TERMINAL'],
150 'nonterminal': ['RULE'],
152 '?name': ['RULE', 'TERMINAL'],
153 '?symbol': ['terminal', 'nonterminal'],
155 'maybe': ['_LBRA expansions _RBRA'],
156 'range': ['STRING _DOTDOT STRING'],
158 'template_usage': ['nonterminal _LBRACE _template_args _RBRACE'],
159 '_template_args': ['value',
160 '_template_args _COMMA value'],
162 'term': ['TERMINAL _COLON expansions _NL',
163 'TERMINAL _DOT NUMBER _COLON expansions _NL'],
164 'override': ['_OVERRIDE rule',
165 '_OVERRIDE term'],
166 'extend': ['_EXTEND rule',
167 '_EXTEND term'],
168 'ignore': ['_IGNORE expansions _NL'],
169 'declare': ['_DECLARE _declare_args _NL'],
170 'import': ['_IMPORT _import_path _NL',
171 '_IMPORT _import_path _LPAR name_list _RPAR _NL',
172 '_IMPORT _import_path _TO name _NL'],
174 '_import_path': ['import_lib', 'import_rel'],
175 'import_lib': ['_import_args'],
176 'import_rel': ['_DOT _import_args'],
177 '_import_args': ['name', '_import_args _DOT name'],
179 'name_list': ['_name_list'],
180 '_name_list': ['name', '_name_list _COMMA name'],
182 '_declare_args': ['symbol', '_declare_args symbol'],
183 'literal': ['REGEXP', 'STRING'],
184}
187# Value 5 keeps the number of states in the lalr parser somewhat minimal
188# It isn't optimal, but close to it. See PR #949
189SMALL_FACTOR_THRESHOLD = 5
190# The Threshold whether repeat via ~ are split up into different rules
191# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,
192# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.
193# (See PR #949)
194REPEAT_BREAK_THRESHOLD = 50
197class FindRuleSize(Transformer):
198 def __init__(self, keep_all_tokens: bool):
199 self.keep_all_tokens = keep_all_tokens
201 def _will_not_get_removed(self, sym: Symbol) -> bool:
202 if isinstance(sym, NonTerminal):
203 return not sym.name.startswith('_')
204 if isinstance(sym, Terminal):
205 return self.keep_all_tokens or not sym.filter_out
206 if sym is _EMPTY:
207 return False
208 assert False, sym
210 def _args_as_int(self, args: List[Union[int, Symbol]]) -> Generator[int, None, None]:
211 for a in args:
212 if isinstance(a, int):
213 yield a
214 elif isinstance(a, Symbol):
215 yield 1 if self._will_not_get_removed(a) else 0
216 else:
217 assert False
219 def expansion(self, args) -> int:
220 return sum(self._args_as_int(args))
222 def expansions(self, args) -> int:
223 return max(self._args_as_int(args))
226@inline_args
227class EBNF_to_BNF(Transformer_InPlace):
228 def __init__(self):
229 self.new_rules = []
230 self.rules_cache = {}
231 self.prefix = 'anon'
232 self.i = 0
233 self.rule_options = None
235 def _name_rule(self, inner: str):
236 new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
237 self.i += 1
238 return new_name
240 def _add_rule(self, key, name, expansions):
241 t = NonTerminal(name)
242 self.new_rules.append((name, expansions, self.rule_options))
243 self.rules_cache[key] = t
244 return t
246 def _add_recurse_rule(self, type_: str, expr: Tree):
247 try:
248 return self.rules_cache[expr]
249 except KeyError:
250 new_name = self._name_rule(type_)
251 t = NonTerminal(new_name)
252 tree = ST('expansions', [
253 ST('expansion', [expr]),
254 ST('expansion', [t, expr])
255 ])
256 return self._add_rule(expr, new_name, tree)
258 def _add_repeat_rule(self, a, b, target, atom):
259 """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.
261 When called recursively (into target), it repeats atom for x(n) times, where:
262 x(0) = 1
263 x(n) = a(n) * x(n-1) + b
265 Example rule when a=3, b=4:
267 new_rule: target target target atom atom atom atom
269 """
270 key = (a, b, target, atom)
271 try:
272 return self.rules_cache[key]
273 except KeyError:
274 new_name = self._name_rule('repeat_a%d_b%d' % (a, b))
275 tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
276 return self._add_rule(key, new_name, tree)
278 def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
279 """Creates a rule that matches atom 0 to (a*n+b)-1 times.
281 When target matches n times atom, and target_opt 0 to n-1 times target_opt,
283 First we generate target * i followed by target_opt, for i from 0 to a-1
284 These match 0 to n*a - 1 times atom
286 Then we generate target * a followed by atom * i, for i from 0 to b-1
287 These match n*a to n*a + b-1 times atom
289 The created rule will not have any shift/reduce conflicts so that it can be used with lalr
291 Example rule when a=3, b=4:
293 new_rule: target_opt
294 | target target_opt
295 | target target target_opt
297 | target target target
298 | target target target atom
299 | target target target atom atom
300 | target target target atom atom atom
302 """
303 key = (a, b, target, atom, "opt")
304 try:
305 return self.rules_cache[key]
306 except KeyError:
307 new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))
308 tree = ST('expansions', [
309 ST('expansion', [target]*i + [target_opt]) for i in range(a)
310 ] + [
311 ST('expansion', [target]*a + [atom]*i) for i in range(b)
312 ])
313 return self._add_rule(key, new_name, tree)
315 def _generate_repeats(self, rule: Tree, mn: int, mx: int):
316 """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.
317 """
318 # For a small number of repeats, we can take the naive approach
319 if mx < REPEAT_BREAK_THRESHOLD:
320 return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
322 # For large repeat values, we break the repetition into sub-rules.
323 # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.
324 # We then use small_factors to split up mn and diff up into values [(a, b), ...]
325 # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt
326 # to generate a complete rule/expression that matches the corresponding number of repeats
327 mn_target = rule
328 for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):
329 mn_target = self._add_repeat_rule(a, b, mn_target, rule)
330 if mx == mn:
331 return mn_target
333 diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less
334 diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)
335 diff_target = rule # Match rule 1 times
336 diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)
337 for a, b in diff_factors[:-1]:
338 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
339 diff_target = self._add_repeat_rule(a, b, diff_target, rule)
341 a, b = diff_factors[-1]
342 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
344 return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])
346 def expr(self, rule: Tree, op: Token, *args):
347 if op.value == '?':
348 empty = ST('expansion', [])
349 return ST('expansions', [rule, empty])
350 elif op.value == '+':
351 # a : b c+ d
352 # -->
353 # a : b _c d
354 # _c : _c c | c;
355 return self._add_recurse_rule('plus', rule)
356 elif op.value == '*':
357 # a : b c* d
358 # -->
359 # a : b _c? d
360 # _c : _c c | c;
361 new_name = self._add_recurse_rule('star', rule)
362 return ST('expansions', [new_name, ST('expansion', [])])
363 elif op.value == '~':
364 if len(args) == 1:
365 mn = mx = int(args[0])
366 else:
367 mn, mx = map(int, args)
368 if mx < mn or mn < 0:
369 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
371 return self._generate_repeats(rule, mn, mx)
373 assert False, op
375 def maybe(self, rule: Tree):
376 keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens
377 rule_size = FindRuleSize(keep_all_tokens).transform(rule)
378 empty = ST('expansion', [_EMPTY] * rule_size)
379 return ST('expansions', [rule, empty])
382class SimplifyRule_Visitor(Visitor):
384 @staticmethod
385 def _flatten(tree: Tree):
386 while tree.expand_kids_by_data(tree.data):
387 pass
389 def expansion(self, tree: Tree):
390 # rules_list unpacking
391 # a : b (c|d) e
392 # -->
393 # a : b c e | b d e
394 #
395 # In AST terms:
396 # expansion(b, expansions(c, d), e)
397 # -->
398 # expansions( expansion(b, c, e), expansion(b, d, e) )
400 self._flatten(tree)
402 for i, child in enumerate(tree.children):
403 if isinstance(child, Tree) and child.data == 'expansions':
404 tree.data = 'expansions'
405 tree.children = [self.visit(ST('expansion', [option if i == j else other
406 for j, other in enumerate(tree.children)]))
407 for option in dedup_list(child.children)]
408 self._flatten(tree)
409 break
411 def alias(self, tree):
412 rule, alias_name = tree.children
413 if rule.data == 'expansions':
414 aliases = []
415 for child in tree.children[0].children:
416 aliases.append(ST('alias', [child, alias_name]))
417 tree.data = 'expansions'
418 tree.children = aliases
420 def expansions(self, tree: Tree):
421 self._flatten(tree)
422 # Ensure all children are unique
423 if len(set(tree.children)) != len(tree.children):
424 tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use
427class RuleTreeToText(Transformer):
428 def expansions(self, x):
429 return x
431 def expansion(self, symbols):
432 return symbols, None
434 def alias(self, x):
435 (expansion, _alias), alias = x
436 assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
437 return expansion, alias.name
440class PrepareAnonTerminals(Transformer_InPlace):
441 """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""
443 def __init__(self, terminals):
444 self.terminals = terminals
445 self.term_set = {td.name for td in self.terminals}
446 self.term_reverse = {td.pattern: td for td in terminals}
447 self.i = 0
448 self.rule_options = None
450 @inline_args
451 def pattern(self, p):
452 value = p.value
453 if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:
454 raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)
456 term_name = None
458 if isinstance(p, PatternStr):
459 try:
460 # If already defined, use the user-defined terminal name
461 term_name = self.term_reverse[p].name
462 except KeyError:
463 # Try to assign an indicative anon-terminal name
464 try:
465 term_name = _TERMINAL_NAMES[value]
466 except KeyError:
467 if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:
468 term_name = value.upper()
470 if term_name in self.term_set:
471 term_name = None
473 elif isinstance(p, PatternRE):
474 if p in self.term_reverse: # Kind of a weird placement.name
475 term_name = self.term_reverse[p].name
476 else:
477 assert False, p
479 if term_name is None:
480 term_name = '__ANON_%d' % self.i
481 self.i += 1
483 if term_name not in self.term_set:
484 assert p not in self.term_reverse
485 self.term_set.add(term_name)
486 termdef = TerminalDef(term_name, p)
487 self.term_reverse[p] = termdef
488 self.terminals.append(termdef)
490 filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)
492 return Terminal(term_name, filter_out=filter_out)
495class _ReplaceSymbols(Transformer_InPlace):
496 """Helper for ApplyTemplates"""
498 def __init__(self):
499 self.names = {}
501 def value(self, c):
502 if len(c) == 1 and isinstance(c[0], Symbol) and c[0].name in self.names:
503 return self.names[c[0].name]
504 return self.__default__('value', c, None)
506 def template_usage(self, c):
507 name = c[0].name
508 if name in self.names:
509 return self.__default__('template_usage', [self.names[name]] + c[1:], None)
510 return self.__default__('template_usage', c, None)
513class ApplyTemplates(Transformer_InPlace):
514 """Apply the templates, creating new rules that represent the used templates"""
516 def __init__(self, rule_defs):
517 self.rule_defs = rule_defs
518 self.replacer = _ReplaceSymbols()
519 self.created_templates = set()
521 def template_usage(self, c):
522 name = c[0].name
523 args = c[1:]
524 result_name = "%s{%s}" % (name, ",".join(a.name for a in args))
525 if result_name not in self.created_templates:
526 self.created_templates.add(result_name)
527 (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)
528 assert len(params) == len(args), args
529 result_tree = deepcopy(tree)
530 self.replacer.names = dict(zip(params, args))
531 self.replacer.transform(result_tree)
532 self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))
533 return NonTerminal(result_name)
536def _rfind(s, choices):
537 return max(s.rfind(c) for c in choices)
540def eval_escaping(s):
541 w = ''
542 i = iter(s)
543 for n in i:
544 w += n
545 if n == '\\':
546 try:
547 n2 = next(i)
548 except StopIteration:
549 raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
550 if n2 == '\\':
551 w += '\\\\'
552 elif n2 not in 'Uuxnftr':
553 w += '\\'
554 w += n2
555 w = w.replace('\\"', '"').replace("'", "\\'")
557 to_eval = "u'''%s'''" % w
558 try:
559 s = literal_eval(to_eval)
560 except SyntaxError as e:
561 raise GrammarError(s, e)
563 return s
566def _literal_to_pattern(literal):
567 assert isinstance(literal, Token)
568 v = literal.value
569 flag_start = _rfind(v, '/"')+1
570 assert flag_start > 0
571 flags = v[flag_start:]
572 assert all(f in _RE_FLAGS for f in flags), flags
574 if literal.type == 'STRING' and '\n' in v:
575 raise GrammarError('You cannot put newlines in string literals')
577 if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
578 raise GrammarError('You can only use newlines in regular expressions '
579 'with the `x` (verbose) flag')
581 v = v[:flag_start]
582 assert v[0] == v[-1] and v[0] in '"/'
583 x = v[1:-1]
585 s = eval_escaping(x)
587 if s == "":
588 raise GrammarError("Empty terminals are not allowed (%s)" % literal)
590 if literal.type == 'STRING':
591 s = s.replace('\\\\', '\\')
592 return PatternStr(s, flags, raw=literal.value)
593 elif literal.type == 'REGEXP':
594 return PatternRE(s, flags, raw=literal.value)
595 else:
596 assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'
599@inline_args
600class PrepareLiterals(Transformer_InPlace):
601 def literal(self, literal):
602 return ST('pattern', [_literal_to_pattern(literal)])
604 def range(self, start, end):
605 assert start.type == end.type == 'STRING'
606 start = start.value[1:-1]
607 end = end.value[1:-1]
608 assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
609 regexp = '[%s-%s]' % (start, end)
610 return ST('pattern', [PatternRE(regexp)])
613def _make_joined_pattern(regexp, flags_set) -> PatternRE:
614 return PatternRE(regexp, ())
616class TerminalTreeToPattern(Transformer_NonRecursive):
617 def pattern(self, ps):
618 p ,= ps
619 return p
621 def expansion(self, items: List[Pattern]) -> Pattern:
622 if not items:
623 return PatternStr('')
625 if len(items) == 1:
626 return items[0]
628 pattern = ''.join(i.to_regexp() for i in items)
629 return _make_joined_pattern(pattern, {i.flags for i in items})
631 def expansions(self, exps: List[Pattern]) -> Pattern:
632 if len(exps) == 1:
633 return exps[0]
635 # Do a bit of sorting to make sure that the longest option is returned
636 # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
637 exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))
639 pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
640 return _make_joined_pattern(pattern, {i.flags for i in exps})
642 def expr(self, args) -> Pattern:
643 inner: Pattern
644 inner, op = args[:2]
645 if op == '~':
646 if len(args) == 3:
647 op = "{%d}" % int(args[2])
648 else:
649 mn, mx = map(int, args[2:])
650 if mx < mn:
651 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
652 op = "{%d,%d}" % (mn, mx)
653 else:
654 assert len(args) == 2
655 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
657 def maybe(self, expr):
658 return self.expr(expr + ['?'])
660 def alias(self, t):
661 raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")
663 def value(self, v):
664 return v[0]
667class ValidateSymbols(Transformer_InPlace):
668 def value(self, v):
669 v ,= v
670 assert isinstance(v, (Tree, Symbol))
671 return v
674def nr_deepcopy_tree(t):
675 """Deepcopy tree `t` without recursion"""
676 return Transformer_NonRecursive(False).transform(t)
679class Grammar(Serialize):
681 term_defs: List[Tuple[str, Tuple[Tree, int]]]
682 rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
683 ignore: List[str]
685 def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None:
686 self.term_defs = term_defs
687 self.rule_defs = rule_defs
688 self.ignore = ignore
690 __serialize_fields__ = 'term_defs', 'rule_defs', 'ignore'
692 def compile(self, start, terminals_to_keep) -> Tuple[List[TerminalDef], List[Rule], List[str]]:
693 # We change the trees in-place (to support huge grammars)
694 # So deepcopy allows calling compile more than once.
695 term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]
696 rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]
698 # ===================
699 # Compile Terminals
700 # ===================
702 # Convert terminal-trees to strings/regexps
704 for name, (term_tree, priority) in term_defs:
705 if term_tree is None: # Terminal added through %declare
706 continue
707 if next(term_tree.find_data('template_usage'), None) is not None:
708 raise GrammarError("Templates cannot be used inside terminals (%s)" % name)
709 expansions = list(term_tree.find_data('expansion'))
710 if len(expansions) == 1 and not expansions[0].children:
711 raise GrammarError("Terminals cannot be empty (%s)" % name)
713 transformer = PrepareLiterals() * TerminalTreeToPattern()
714 terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
715 for name, (term_tree, priority) in term_defs if term_tree]
717 # =================
718 # Compile Rules
719 # =================
721 # 1. Pre-process terminals
722 anon_tokens_transf = PrepareAnonTerminals(terminals)
723 transformer = PrepareLiterals() * ValidateSymbols() * anon_tokens_transf # Adds to terminals
725 # 2. Inline Templates
727 transformer *= ApplyTemplates(rule_defs)
729 # 3. Convert EBNF to BNF (and apply step 1 & 2)
730 ebnf_to_bnf = EBNF_to_BNF()
731 rules = []
732 i = 0
733 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
734 name, params, rule_tree, options = rule_defs[i]
735 i += 1
736 if len(params) != 0: # Dont transform templates
737 continue
738 rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
739 ebnf_to_bnf.rule_options = rule_options
740 ebnf_to_bnf.prefix = name
741 anon_tokens_transf.rule_options = rule_options
742 tree = transformer.transform(rule_tree)
743 res: Tree = ebnf_to_bnf.transform(tree)
744 rules.append((name, res, options))
745 rules += ebnf_to_bnf.new_rules
747 assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
749 # 4. Compile tree to Rule objects
750 rule_tree_to_text = RuleTreeToText()
752 simplify_rule = SimplifyRule_Visitor()
753 compiled_rules: List[Rule] = []
754 for rule_content in rules:
755 name, tree, options = rule_content
756 simplify_rule.visit(tree)
757 expansions = rule_tree_to_text.transform(tree)
759 for i, (expansion, alias) in enumerate(expansions):
760 if alias and name.startswith('_'):
761 raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))
763 empty_indices = tuple(x==_EMPTY for x in expansion)
764 if any(empty_indices):
765 exp_options = copy(options) or RuleOptions()
766 exp_options.empty_indices = empty_indices
767 expansion = [x for x in expansion if x!=_EMPTY]
768 else:
769 exp_options = options
771 for sym in expansion:
772 assert isinstance(sym, Symbol)
773 if sym.is_term and exp_options and exp_options.keep_all_tokens:
774 assert isinstance(sym, Terminal)
775 sym.filter_out = False
776 rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)
777 compiled_rules.append(rule)
779 # Remove duplicates of empty rules, throw error for non-empty duplicates
780 if len(set(compiled_rules)) != len(compiled_rules):
781 duplicates = classify(compiled_rules, lambda x: x)
782 for dups in duplicates.values():
783 if len(dups) > 1:
784 if dups[0].expansion:
785 raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
786 % ''.join('\n * %s' % i for i in dups))
788 # Empty rule; assert all other attributes are equal
789 assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
791 # Remove duplicates
792 compiled_rules = list(OrderedSet(compiled_rules))
794 # Filter out unused rules
795 while True:
796 c = len(compiled_rules)
797 used_rules = {s for r in compiled_rules
798 for s in r.expansion
799 if isinstance(s, NonTerminal)
800 and s != r.origin}
801 used_rules |= {NonTerminal(s) for s in start}
802 compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
803 for r in unused:
804 logger.debug("Unused rule: %s", r)
805 if len(compiled_rules) == c:
806 break
808 # Filter out unused terminals
809 if terminals_to_keep != '*':
810 used_terms = {t.name for r in compiled_rules
811 for t in r.expansion
812 if isinstance(t, Terminal)}
813 terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
814 if unused:
815 logger.debug("Unused terminals: %s", [t.name for t in unused])
817 return terminals, compiled_rules, self.ignore
820PackageResource = namedtuple('PackageResource', 'pkg_name path')
823class FromPackageLoader:
824 """
825 Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
826 This allows them to be compatible even from within zip files.
828 Relative imports are handled, so you can just freely use them.
830 pkg_name: The name of the package. You can probably provide `__name__` most of the time
831 search_paths: All the path that will be search on absolute imports.
832 """
834 pkg_name: str
835 search_paths: Sequence[str]
837 def __init__(self, pkg_name: str, search_paths: Sequence[str]=("", )) -> None:
838 self.pkg_name = pkg_name
839 self.search_paths = search_paths
841 def __repr__(self):
842 return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)
844 def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:
845 if base_path is None:
846 to_try = self.search_paths
847 else:
848 # Check whether or not the importing grammar was loaded by this module.
849 if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:
850 # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
851 raise IOError()
852 to_try = [base_path.path]
854 err = None
855 for path in to_try:
856 full_path = os.path.join(path, grammar_path)
857 try:
858 text: Optional[bytes] = pkgutil.get_data(self.pkg_name, full_path)
859 except IOError as e:
860 err = e
861 continue
862 else:
863 return PackageResource(self.pkg_name, full_path), (text.decode() if text else '')
865 raise IOError('Cannot find grammar in given paths') from err
868stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)
872def resolve_term_references(term_dict):
873 # TODO Solve with transitive closure (maybe)
875 while True:
876 changed = False
877 for name, token_tree in term_dict.items():
878 if token_tree is None: # Terminal added through %declare
879 continue
880 for exp in token_tree.find_data('value'):
881 item ,= exp.children
882 if isinstance(item, NonTerminal):
883 raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
884 elif isinstance(item, Terminal):
885 try:
886 term_value = term_dict[item.name]
887 except KeyError:
888 raise GrammarError("Terminal used but not defined: %s" % item.name)
889 assert term_value is not None
890 exp.children[0] = term_value
891 changed = True
892 else:
893 assert isinstance(item, Tree)
894 if not changed:
895 break
897 for name, term in term_dict.items():
898 if term: # Not just declared
899 for child in term.children:
900 ids = [id(x) for x in child.iter_subtrees()]
901 if id(term) in ids:
902 raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
906def symbol_from_strcase(s):
907 assert isinstance(s, str)
908 return Terminal(s, filter_out=s.startswith('_')) if s.isupper() else NonTerminal(s)
910@inline_args
911class PrepareGrammar(Transformer_InPlace):
912 def terminal(self, name):
913 return Terminal(str(name), filter_out=name.startswith('_'))
915 def nonterminal(self, name):
916 return NonTerminal(name.value)
919def _find_used_symbols(tree):
920 assert tree.data == 'expansions'
921 return {t.name for x in tree.find_data('expansion')
922 for t in x.scan_values(lambda t: isinstance(t, Symbol))}
925def _get_parser():
926 try:
927 return _get_parser.cache
928 except AttributeError:
929 terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
931 rules = [(name.lstrip('?'), x, RuleOptions(expand1=name.startswith('?')))
932 for name, x in RULES.items()]
933 rules = [Rule(NonTerminal(r), [symbol_from_strcase(s) for s in x.split()], i, None, o)
934 for r, xs, o in rules for i, x in enumerate(xs)]
936 callback = ParseTreeBuilder(rules, ST).create_callback()
937 import re
938 lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH'])
939 parser_conf = ParserConf(rules, callback, ['start'])
940 lexer_conf.lexer_type = 'basic'
941 parser_conf.parser_type = 'lalr'
942 _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)
943 return _get_parser.cache
945GRAMMAR_ERRORS = [
946 ('Incorrect type of value', ['a: 1\n']),
947 ('Unclosed parenthesis', ['a: (\n']),
948 ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
949 ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
950 ('Illegal name for rules or terminals', ['Aa:\n']),
951 ('Alias expects lowercase name', ['a: -> "a"\n']),
952 ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),
953 ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),
954 ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),
955 ('Terminal names cannot contain dots', ['A.B\n']),
956 ('Expecting rule or terminal definition', ['"a"\n']),
957 ('%import expects a name', ['%import "a"\n']),
958 ('%ignore expects a value', ['%ignore %import\n']),
959 ]
961def _translate_parser_exception(parse, e):
962 error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)
963 if error:
964 return error
965 elif 'STRING' in e.expected:
966 return "Expecting a value"
968def _parse_grammar(text, name, start='start'):
969 try:
970 tree = _get_parser().parse(text + '\n', start)
971 except UnexpectedCharacters as e:
972 context = e.get_context(text)
973 raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
974 (e.line, e.column, name, context))
975 except UnexpectedToken as e:
976 context = e.get_context(text)
977 error = _translate_parser_exception(_get_parser().parse, e)
978 if error:
979 raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
980 raise
982 return PrepareGrammar().transform(tree)
984def _error_repr(error):
985 if isinstance(error, UnexpectedToken):
986 error2 = _translate_parser_exception(_get_parser().parse, error)
987 if error2:
988 return error2
989 expected = ', '.join(error.accepts or error.expected)
990 return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)
991 else:
992 return str(error)
994def _search_interactive_parser(interactive_parser, predicate):
995 def expand(node):
996 path, p = node
997 for choice in p.choices():
998 t = Token(choice, '')
999 try:
1000 new_p = p.feed_token(t)
1001 except ParseError: # Illegal
1002 pass
1003 else:
1004 yield path + (choice,), new_p
1006 for path, p in bfs_all_unique([((), interactive_parser)], expand):
1007 if predicate(p):
1008 return path, p
1010def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]:
1011 errors = []
1012 def on_error(e):
1013 errors.append((e, _error_repr(e)))
1015 # recover to a new line
1016 token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
1017 for token_type in token_path:
1018 e.interactive_parser.feed_token(Token(token_type, ''))
1019 e.interactive_parser.feed_token(Token('_NL', '\n'))
1020 return True
1022 _tree = _get_parser().parse(text + '\n', start, on_error=on_error)
1024 errors_by_line = classify(errors, lambda e: e[0].line)
1025 errors = [el[0] for el in errors_by_line.values()] # already sorted
1027 for e in errors:
1028 e[0].interactive_parser = None
1029 return errors
1032def _get_mangle(prefix, aliases, base_mangle=None):
1033 def mangle(s):
1034 if s in aliases:
1035 s = aliases[s]
1036 else:
1037 if s[0] == '_':
1038 s = '_%s__%s' % (prefix, s[1:])
1039 else:
1040 s = '%s__%s' % (prefix, s)
1041 if base_mangle is not None:
1042 s = base_mangle(s)
1043 return s
1044 return mangle
1046def _mangle_definition_tree(exp, mangle):
1047 if mangle is None:
1048 return exp
1049 exp = deepcopy(exp) # TODO: is this needed?
1050 for t in exp.iter_subtrees():
1051 for i, c in enumerate(t.children):
1052 if isinstance(c, Symbol):
1053 t.children[i] = c.renamed(mangle)
1055 return exp
1057def _make_rule_tuple(modifiers_tree, name, params, priority_tree, expansions):
1058 if modifiers_tree.children:
1059 m ,= modifiers_tree.children
1060 expand1 = '?' in m
1061 if expand1 and name.startswith('_'):
1062 raise GrammarError("Inlined rules (_rule) cannot use the ?rule modifier.")
1063 keep_all_tokens = '!' in m
1064 else:
1065 keep_all_tokens = False
1066 expand1 = False
1068 if priority_tree.children:
1069 p ,= priority_tree.children
1070 priority = int(p)
1071 else:
1072 priority = None
1074 if params is not None:
1075 params = [t.value for t in params.children] # For the grammar parser
1077 return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,
1078 template_source=(name if params else None))
1081class Definition:
1082 def __init__(self, is_term, tree, params=(), options=None):
1083 self.is_term = is_term
1084 self.tree = tree
1085 self.params = tuple(params)
1086 self.options = options
1088class GrammarBuilder:
1090 global_keep_all_tokens: bool
1091 import_paths: List[Union[str, Callable]]
1092 used_files: Dict[str, str]
1094 _definitions: Dict[str, Definition]
1095 _ignore_names: List[str]
1097 def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None:
1098 self.global_keep_all_tokens = global_keep_all_tokens
1099 self.import_paths = import_paths or []
1100 self.used_files = used_files or {}
1102 self._definitions: Dict[str, Definition] = {}
1103 self._ignore_names: List[str] = []
1105 def _grammar_error(self, is_term, msg, *names):
1106 args = {}
1107 for i, name in enumerate(names, start=1):
1108 postfix = '' if i == 1 else str(i)
1109 args['name' + postfix] = name
1110 args['type' + postfix] = lowercase_type = ("rule", "terminal")[is_term]
1111 args['Type' + postfix] = lowercase_type.title()
1112 raise GrammarError(msg.format(**args))
1114 def _check_options(self, is_term, options):
1115 if is_term:
1116 if options is None:
1117 options = 1
1118 elif not isinstance(options, int):
1119 raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
1120 else:
1121 if options is None:
1122 options = RuleOptions()
1123 elif not isinstance(options, RuleOptions):
1124 raise GrammarError("Rules require a RuleOptions instance as 'options'")
1125 if self.global_keep_all_tokens:
1126 options.keep_all_tokens = True
1127 return options
1130 def _define(self, name, is_term, exp, params=(), options=None, *, override=False):
1131 if name in self._definitions:
1132 if not override:
1133 self._grammar_error(is_term, "{Type} '{name}' defined more than once", name)
1134 elif override:
1135 self._grammar_error(is_term, "Cannot override a nonexisting {type} {name}", name)
1137 if name.startswith('__'):
1138 self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name)
1140 self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options))
1142 def _extend(self, name, is_term, exp, params=(), options=None):
1143 if name not in self._definitions:
1144 self._grammar_error(is_term, "Can't extend {type} {name} as it wasn't defined before", name)
1146 d = self._definitions[name]
1148 if is_term != d.is_term:
1149 self._grammar_error(is_term, "Cannot extend {type} {name} - one is a terminal, while the other is not.", name)
1150 if tuple(params) != d.params:
1151 self._grammar_error(is_term, "Cannot extend {type} with different parameters: {name}", name)
1153 if d.tree is None:
1154 self._grammar_error(is_term, "Can't extend {type} {name} - it is abstract.", name)
1156 # TODO: think about what to do with 'options'
1157 base = d.tree
1159 assert isinstance(base, Tree) and base.data == 'expansions'
1160 base.children.insert(0, exp)
1162 def _ignore(self, exp_or_name):
1163 if isinstance(exp_or_name, str):
1164 self._ignore_names.append(exp_or_name)
1165 else:
1166 assert isinstance(exp_or_name, Tree)
1167 t = exp_or_name
1168 if t.data == 'expansions' and len(t.children) == 1:
1169 t2 ,= t.children
1170 if t2.data=='expansion' and len(t2.children) == 1:
1171 item ,= t2.children
1172 if item.data == 'value':
1173 item ,= item.children
1174 if isinstance(item, Terminal):
1175 # Keep terminal name, no need to create a new definition
1176 self._ignore_names.append(item.name)
1177 return
1179 name = '__IGNORE_%d'% len(self._ignore_names)
1180 self._ignore_names.append(name)
1181 self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY)
1183 def _unpack_import(self, stmt, grammar_name):
1184 if len(stmt.children) > 1:
1185 path_node, arg1 = stmt.children
1186 else:
1187 path_node, = stmt.children
1188 arg1 = None
1190 if isinstance(arg1, Tree): # Multi import
1191 dotted_path = tuple(path_node.children)
1192 names = arg1.children
1193 aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names
1194 else: # Single import
1195 dotted_path = tuple(path_node.children[:-1])
1196 if not dotted_path:
1197 name ,= path_node.children
1198 raise GrammarError("Nothing was imported from grammar `%s`" % name)
1199 name = path_node.children[-1] # Get name from dotted path
1200 aliases = {name.value: (arg1 or name).value} # Aliases if exist
1202 if path_node.data == 'import_lib': # Import from library
1203 base_path = None
1204 else: # Relative import
1205 if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
1206 try:
1207 base_file = os.path.abspath(sys.modules['__main__'].__file__)
1208 except AttributeError:
1209 base_file = None
1210 else:
1211 base_file = grammar_name # Import relative to grammar file path if external grammar file
1212 if base_file:
1213 if isinstance(base_file, PackageResource):
1214 base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
1215 else:
1216 base_path = os.path.split(base_file)[0]
1217 else:
1218 base_path = os.path.abspath(os.path.curdir)
1220 return dotted_path, base_path, aliases
1222 def _unpack_definition(self, tree, mangle):
1224 if tree.data == 'rule':
1225 name, params, exp, opts = _make_rule_tuple(*tree.children)
1226 is_term = False
1227 else:
1228 name = tree.children[0].value
1229 params = () # TODO terminal templates
1230 opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority
1231 exp = tree.children[-1]
1232 is_term = True
1234 if mangle is not None:
1235 params = tuple(mangle(p) for p in params)
1236 name = mangle(name)
1238 exp = _mangle_definition_tree(exp, mangle)
1239 return name, is_term, exp, params, opts
1242 def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None:
1243 tree = _parse_grammar(grammar_text, grammar_name)
1245 imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {}
1247 for stmt in tree.children:
1248 if stmt.data == 'import':
1249 dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
1250 try:
1251 import_base_path, import_aliases = imports[dotted_path]
1252 assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
1253 import_aliases.update(aliases)
1254 except KeyError:
1255 imports[dotted_path] = base_path, aliases
1257 for dotted_path, (base_path, aliases) in imports.items():
1258 self.do_import(dotted_path, base_path, aliases, mangle)
1260 for stmt in tree.children:
1261 if stmt.data in ('term', 'rule'):
1262 self._define(*self._unpack_definition(stmt, mangle))
1263 elif stmt.data == 'override':
1264 r ,= stmt.children
1265 self._define(*self._unpack_definition(r, mangle), override=True)
1266 elif stmt.data == 'extend':
1267 r ,= stmt.children
1268 self._extend(*self._unpack_definition(r, mangle))
1269 elif stmt.data == 'ignore':
1270 # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
1271 if mangle is None:
1272 self._ignore(*stmt.children)
1273 elif stmt.data == 'declare':
1274 for symbol in stmt.children:
1275 assert isinstance(symbol, Symbol), symbol
1276 is_term = isinstance(symbol, Terminal)
1277 if mangle is None:
1278 name = symbol.name
1279 else:
1280 name = mangle(symbol.name)
1281 self._define(name, is_term, None)
1282 elif stmt.data == 'import':
1283 pass
1284 else:
1285 assert False, stmt
1288 term_defs = { name: d.tree
1289 for name, d in self._definitions.items()
1290 if d.is_term
1291 }
1292 resolve_term_references(term_defs)
1295 def _remove_unused(self, used):
1296 def rule_dependencies(symbol):
1297 try:
1298 d = self._definitions[symbol]
1299 except KeyError:
1300 return []
1301 if d.is_term:
1302 return []
1303 return _find_used_symbols(d.tree) - set(d.params)
1305 _used = set(bfs(used, rule_dependencies))
1306 self._definitions = {k: v for k, v in self._definitions.items() if k in _used}
1309 def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None:
1310 assert dotted_path
1311 mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
1312 grammar_path = os.path.join(*dotted_path) + EXT
1313 to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
1314 for source in to_try:
1315 try:
1316 if callable(source):
1317 joined_path, text = source(base_path, grammar_path)
1318 else:
1319 joined_path = os.path.join(source, grammar_path)
1320 with open(joined_path, encoding='utf8') as f:
1321 text = f.read()
1322 except IOError:
1323 continue
1324 else:
1325 h = sha256_digest(text)
1326 if self.used_files.get(joined_path, h) != h:
1327 raise RuntimeError("Grammar file was changed during importing")
1328 self.used_files[joined_path] = h
1330 gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)
1331 gb.load_grammar(text, joined_path, mangle)
1332 gb._remove_unused(map(mangle, aliases))
1333 for name in gb._definitions:
1334 if name in self._definitions:
1335 raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))
1337 self._definitions.update(**gb._definitions)
1338 break
1339 else:
1340 # Search failed. Make Python throw a nice error.
1341 open(grammar_path, encoding='utf8')
1342 assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)
1345 def validate(self) -> None:
1346 for name, d in self._definitions.items():
1347 params = d.params
1348 exp = d.tree
1350 for i, p in enumerate(params):
1351 if p in self._definitions:
1352 raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
1353 if p in params[:i]:
1354 raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
1356 if exp is None: # Remaining checks don't apply to abstract rules/terminals (created with %declare)
1357 continue
1359 for temp in exp.find_data('template_usage'):
1360 sym = temp.children[0].name
1361 args = temp.children[1:]
1362 if sym not in params:
1363 if sym not in self._definitions:
1364 self._grammar_error(d.is_term, "Template '%s' used but not defined (in {type} {name})" % sym, name)
1365 if len(args) != len(self._definitions[sym].params):
1366 expected, actual = len(self._definitions[sym].params), len(args)
1367 self._grammar_error(d.is_term, "Wrong number of template arguments used for {name} "
1368 "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
1370 for sym in _find_used_symbols(exp):
1371 if sym not in self._definitions and sym not in params:
1372 self._grammar_error(d.is_term, "{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)
1374 if not set(self._definitions).issuperset(self._ignore_names):
1375 raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))
1377 def build(self) -> Grammar:
1378 self.validate()
1379 rule_defs = []
1380 term_defs = []
1381 for name, d in self._definitions.items():
1382 (params, exp, options) = d.params, d.tree, d.options
1383 if d.is_term:
1384 assert len(params) == 0
1385 term_defs.append((name, (exp, options)))
1386 else:
1387 rule_defs.append((name, params, exp, options))
1388 # resolve_term_references(term_defs)
1389 return Grammar(rule_defs, term_defs, self._ignore_names)
1392def verify_used_files(file_hashes):
1393 for path, old in file_hashes.items():
1394 text = None
1395 if isinstance(path, str) and os.path.exists(path):
1396 with open(path, encoding='utf8') as f:
1397 text = f.read()
1398 elif isinstance(path, PackageResource):
1399 with suppress(IOError):
1400 text = pkgutil.get_data(*path).decode('utf-8')
1401 if text is None: # We don't know how to load the path. ignore it.
1402 continue
1404 current = sha256_digest(text)
1405 if old != current:
1406 logger.info("File %r changed, rebuilding Parser" % path)
1407 return False
1408 return True
1410def list_grammar_imports(grammar, import_paths=[]):
1411 "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"
1412 builder = GrammarBuilder(False, import_paths)
1413 builder.load_grammar(grammar, '<string>')
1414 return list(builder.used_files.keys())
1416def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
1417 builder = GrammarBuilder(global_keep_all_tokens, import_paths)
1418 builder.load_grammar(grammar, source)
1419 return builder.build(), builder.used_files
1422def sha256_digest(s: str) -> str:
1423 """Get the sha256 digest of a string
1425 Supports the `usedforsecurity` argument for Python 3.9+ to allow running on
1426 a FIPS-enabled system.
1427 """
1428 if sys.version_info >= (3, 9):
1429 return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest()
1430 else:
1431 return hashlib.sha256(s.encode('utf8')).hexdigest()