Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/load_grammar.py: 75%

852 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:30 +0000

1"""Parses and compiles Lark grammars into an internal representation. 

2""" 

3 

4import hashlib 

5import os.path 

6import sys 

7from collections import namedtuple 

8from copy import copy, deepcopy 

9import pkgutil 

10from ast import literal_eval 

11from contextlib import suppress 

12from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence 

13 

14from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors 

15from .lexer import Token, TerminalDef, PatternStr, PatternRE 

16 

17from .parse_tree_builder import ParseTreeBuilder 

18from .parser_frontends import ParsingFrontend 

19from .common import LexerConf, ParserConf 

20from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY 

21from .utils import classify, dedup_list 

22from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput 

23 

24from .tree import Tree, SlottedTree as ST 

25from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive 

26inline_args = v_args(inline=True) 

27 

28IMPORT_PATHS = ['grammars'] 

29 

30EXT = '.lark' 

31 

32_RE_FLAGS = 'imslux' 

33 

34_EMPTY = Symbol('__empty__') 

35 

36_TERMINAL_NAMES = { 

37 '.' : 'DOT', 

38 ',' : 'COMMA', 

39 ':' : 'COLON', 

40 ';' : 'SEMICOLON', 

41 '+' : 'PLUS', 

42 '-' : 'MINUS', 

43 '*' : 'STAR', 

44 '/' : 'SLASH', 

45 '\\' : 'BACKSLASH', 

46 '|' : 'VBAR', 

47 '?' : 'QMARK', 

48 '!' : 'BANG', 

49 '@' : 'AT', 

50 '#' : 'HASH', 

51 '$' : 'DOLLAR', 

52 '%' : 'PERCENT', 

53 '^' : 'CIRCUMFLEX', 

54 '&' : 'AMPERSAND', 

55 '_' : 'UNDERSCORE', 

56 '<' : 'LESSTHAN', 

57 '>' : 'MORETHAN', 

58 '=' : 'EQUAL', 

59 '"' : 'DBLQUOTE', 

60 '\'' : 'QUOTE', 

61 '`' : 'BACKQUOTE', 

62 '~' : 'TILDE', 

63 '(' : 'LPAR', 

64 ')' : 'RPAR', 

65 '{' : 'LBRACE', 

66 '}' : 'RBRACE', 

67 '[' : 'LSQB', 

68 ']' : 'RSQB', 

69 '\n' : 'NEWLINE', 

70 '\r\n' : 'CRLF', 

71 '\t' : 'TAB', 

72 ' ' : 'SPACE', 

73} 

74 

75# Grammar Parser 

76TERMINALS = { 

77 '_LPAR': r'\(', 

78 '_RPAR': r'\)', 

79 '_LBRA': r'\[', 

80 '_RBRA': r'\]', 

81 '_LBRACE': r'\{', 

82 '_RBRACE': r'\}', 

83 'OP': '[+*]|[?](?![a-z_])', 

84 '_COLON': ':', 

85 '_COMMA': ',', 

86 '_OR': r'\|', 

87 '_DOT': r'\.(?!\.)', 

88 '_DOTDOT': r'\.\.', 

89 'TILDE': '~', 

90 'RULE_MODIFIERS': '(!|![?]?|[?]!?)(?=[_a-z])', 

91 'RULE': '_?[a-z][_a-z0-9]*', 

92 'TERMINAL': '_?[A-Z][_A-Z0-9]*', 

93 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 

94 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, 

95 '_NL': r'(\r?\n)+\s*', 

96 '_NL_OR': r'(\r?\n)+\s*\|', 

97 'WS': r'[ \t]+', 

98 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*', 

99 'BACKSLASH': r'\\[ ]*\n', 

100 '_TO': '->', 

101 '_IGNORE': r'%ignore', 

102 '_OVERRIDE': r'%override', 

103 '_DECLARE': r'%declare', 

104 '_EXTEND': r'%extend', 

105 '_IMPORT': r'%import', 

106 'NUMBER': r'[+-]?\d+', 

107} 

108 

109RULES = { 

110 'start': ['_list'], 

111 '_list': ['_item', '_list _item'], 

112 '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'], 

113 

114 'rule': ['rule_modifiers RULE template_params priority _COLON expansions _NL'], 

115 'rule_modifiers': ['RULE_MODIFIERS', 

116 ''], 

117 'priority': ['_DOT NUMBER', 

118 ''], 

119 'template_params': ['_LBRACE _template_params _RBRACE', 

120 ''], 

121 '_template_params': ['RULE', 

122 '_template_params _COMMA RULE'], 

123 'expansions': ['_expansions'], 

124 '_expansions': ['alias', 

125 '_expansions _OR alias', 

126 '_expansions _NL_OR alias'], 

127 

128 '?alias': ['expansion _TO nonterminal', 'expansion'], 

129 'expansion': ['_expansion'], 

130 

131 '_expansion': ['', '_expansion expr'], 

132 

133 '?expr': ['atom', 

134 'atom OP', 

135 'atom TILDE NUMBER', 

136 'atom TILDE NUMBER _DOTDOT NUMBER', 

137 ], 

138 

139 '?atom': ['_LPAR expansions _RPAR', 

140 'maybe', 

141 'value'], 

142 

143 'value': ['terminal', 

144 'nonterminal', 

145 'literal', 

146 'range', 

147 'template_usage'], 

148 

149 'terminal': ['TERMINAL'], 

150 'nonterminal': ['RULE'], 

151 

152 '?name': ['RULE', 'TERMINAL'], 

153 '?symbol': ['terminal', 'nonterminal'], 

154 

155 'maybe': ['_LBRA expansions _RBRA'], 

156 'range': ['STRING _DOTDOT STRING'], 

157 

158 'template_usage': ['nonterminal _LBRACE _template_args _RBRACE'], 

159 '_template_args': ['value', 

160 '_template_args _COMMA value'], 

161 

162 'term': ['TERMINAL _COLON expansions _NL', 

163 'TERMINAL _DOT NUMBER _COLON expansions _NL'], 

164 'override': ['_OVERRIDE rule', 

165 '_OVERRIDE term'], 

166 'extend': ['_EXTEND rule', 

167 '_EXTEND term'], 

168 'ignore': ['_IGNORE expansions _NL'], 

169 'declare': ['_DECLARE _declare_args _NL'], 

170 'import': ['_IMPORT _import_path _NL', 

171 '_IMPORT _import_path _LPAR name_list _RPAR _NL', 

172 '_IMPORT _import_path _TO name _NL'], 

173 

174 '_import_path': ['import_lib', 'import_rel'], 

175 'import_lib': ['_import_args'], 

176 'import_rel': ['_DOT _import_args'], 

177 '_import_args': ['name', '_import_args _DOT name'], 

178 

179 'name_list': ['_name_list'], 

180 '_name_list': ['name', '_name_list _COMMA name'], 

181 

182 '_declare_args': ['symbol', '_declare_args symbol'], 

183 'literal': ['REGEXP', 'STRING'], 

184} 

185 

186 

187# Value 5 keeps the number of states in the lalr parser somewhat minimal 

188# It isn't optimal, but close to it. See PR #949 

189SMALL_FACTOR_THRESHOLD = 5 

190# The Threshold whether repeat via ~ are split up into different rules 

191# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low, 

192# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts. 

193# (See PR #949) 

194REPEAT_BREAK_THRESHOLD = 50 

195 

196 

197class FindRuleSize(Transformer): 

198 def __init__(self, keep_all_tokens): 

199 self.keep_all_tokens = keep_all_tokens 

200 

201 def _will_not_get_removed(self, sym): 

202 if isinstance(sym, NonTerminal): 

203 return not sym.name.startswith('_') 

204 if isinstance(sym, Terminal): 

205 return self.keep_all_tokens or not sym.filter_out 

206 if sym is _EMPTY: 

207 return False 

208 assert False, sym 

209 

210 def _args_as_int(self, args): 

211 for a in args: 

212 if isinstance(a, int): 

213 yield a 

214 elif isinstance(a, Symbol): 

215 yield 1 if self._will_not_get_removed(a) else 0 

216 else: 

217 assert False 

218 

219 def expansion(self, args): 

220 return sum(self._args_as_int(args)) 

221 

222 def expansions(self, args): 

223 return max(self._args_as_int(args)) 

224 

225 

226@inline_args 

227class EBNF_to_BNF(Transformer_InPlace): 

228 def __init__(self): 

229 self.new_rules = [] 

230 self.rules_cache = {} 

231 self.prefix = 'anon' 

232 self.i = 0 

233 self.rule_options = None 

234 

235 def _name_rule(self, inner): 

236 new_name = '__%s_%s_%d' % (self.prefix, inner, self.i) 

237 self.i += 1 

238 return new_name 

239 

240 def _add_rule(self, key, name, expansions): 

241 t = NonTerminal(name) 

242 self.new_rules.append((name, expansions, self.rule_options)) 

243 self.rules_cache[key] = t 

244 return t 

245 

246 def _add_recurse_rule(self, type_, expr): 

247 try: 

248 return self.rules_cache[expr] 

249 except KeyError: 

250 new_name = self._name_rule(type_) 

251 t = NonTerminal(new_name) 

252 tree = ST('expansions', [ 

253 ST('expansion', [expr]), 

254 ST('expansion', [t, expr]) 

255 ]) 

256 return self._add_rule(expr, new_name, tree) 

257 

258 def _add_repeat_rule(self, a, b, target, atom): 

259 """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times. 

260 

261 When called recursively (into target), it repeats atom for x(n) times, where: 

262 x(0) = 1 

263 x(n) = a(n) * x(n-1) + b 

264 

265 Example rule when a=3, b=4: 

266 

267 new_rule: target target target atom atom atom atom 

268 

269 """ 

270 key = (a, b, target, atom) 

271 try: 

272 return self.rules_cache[key] 

273 except KeyError: 

274 new_name = self._name_rule('repeat_a%d_b%d' % (a, b)) 

275 tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)]) 

276 return self._add_rule(key, new_name, tree) 

277 

278 def _add_repeat_opt_rule(self, a, b, target, target_opt, atom): 

279 """Creates a rule that matches atom 0 to (a*n+b)-1 times. 

280 

281 When target matches n times atom, and target_opt 0 to n-1 times target_opt, 

282 

283 First we generate target * i followed by target_opt, for i from 0 to a-1 

284 These match 0 to n*a - 1 times atom 

285 

286 Then we generate target * a followed by atom * i, for i from 0 to b-1 

287 These match n*a to n*a + b-1 times atom 

288 

289 The created rule will not have any shift/reduce conflicts so that it can be used with lalr 

290 

291 Example rule when a=3, b=4: 

292 

293 new_rule: target_opt 

294 | target target_opt 

295 | target target target_opt 

296 

297 | target target target 

298 | target target target atom 

299 | target target target atom atom 

300 | target target target atom atom atom 

301 

302 """ 

303 key = (a, b, target, atom, "opt") 

304 try: 

305 return self.rules_cache[key] 

306 except KeyError: 

307 new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b)) 

308 tree = ST('expansions', [ 

309 ST('expansion', [target]*i + [target_opt]) for i in range(a) 

310 ] + [ 

311 ST('expansion', [target]*a + [atom]*i) for i in range(b) 

312 ]) 

313 return self._add_rule(key, new_name, tree) 

314 

315 def _generate_repeats(self, rule, mn, mx): 

316 """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times. 

317 """ 

318 # For a small number of repeats, we can take the naive approach 

319 if mx < REPEAT_BREAK_THRESHOLD: 

320 return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)]) 

321 

322 # For large repeat values, we break the repetition into sub-rules. 

323 # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``. 

324 # We then use small_factors to split up mn and diff up into values [(a, b), ...] 

325 # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt 

326 # to generate a complete rule/expression that matches the corresponding number of repeats 

327 mn_target = rule 

328 for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD): 

329 mn_target = self._add_repeat_rule(a, b, mn_target, rule) 

330 if mx == mn: 

331 return mn_target 

332 

333 diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less 

334 diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD) 

335 diff_target = rule # Match rule 1 times 

336 diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times) 

337 for a, b in diff_factors[:-1]: 

338 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) 

339 diff_target = self._add_repeat_rule(a, b, diff_target, rule) 

340 

341 a, b = diff_factors[-1] 

342 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) 

343 

344 return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])]) 

345 

346 def expr(self, rule, op, *args): 

347 if op.value == '?': 

348 empty = ST('expansion', []) 

349 return ST('expansions', [rule, empty]) 

350 elif op.value == '+': 

351 # a : b c+ d 

352 # --> 

353 # a : b _c d 

354 # _c : _c c | c; 

355 return self._add_recurse_rule('plus', rule) 

356 elif op.value == '*': 

357 # a : b c* d 

358 # --> 

359 # a : b _c? d 

360 # _c : _c c | c; 

361 new_name = self._add_recurse_rule('star', rule) 

362 return ST('expansions', [new_name, ST('expansion', [])]) 

363 elif op.value == '~': 

364 if len(args) == 1: 

365 mn = mx = int(args[0]) 

366 else: 

367 mn, mx = map(int, args) 

368 if mx < mn or mn < 0: 

369 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) 

370 

371 return self._generate_repeats(rule, mn, mx) 

372 

373 assert False, op 

374 

375 def maybe(self, rule): 

376 keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens 

377 rule_size = FindRuleSize(keep_all_tokens).transform(rule) 

378 empty = ST('expansion', [_EMPTY] * rule_size) 

379 return ST('expansions', [rule, empty]) 

380 

381 

382class SimplifyRule_Visitor(Visitor): 

383 

384 @staticmethod 

385 def _flatten(tree): 

386 while tree.expand_kids_by_data(tree.data): 

387 pass 

388 

389 def expansion(self, tree): 

390 # rules_list unpacking 

391 # a : b (c|d) e 

392 # --> 

393 # a : b c e | b d e 

394 # 

395 # In AST terms: 

396 # expansion(b, expansions(c, d), e) 

397 # --> 

398 # expansions( expansion(b, c, e), expansion(b, d, e) ) 

399 

400 self._flatten(tree) 

401 

402 for i, child in enumerate(tree.children): 

403 if isinstance(child, Tree) and child.data == 'expansions': 

404 tree.data = 'expansions' 

405 tree.children = [self.visit(ST('expansion', [option if i == j else other 

406 for j, other in enumerate(tree.children)])) 

407 for option in dedup_list(child.children)] 

408 self._flatten(tree) 

409 break 

410 

411 def alias(self, tree): 

412 rule, alias_name = tree.children 

413 if rule.data == 'expansions': 

414 aliases = [] 

415 for child in tree.children[0].children: 

416 aliases.append(ST('alias', [child, alias_name])) 

417 tree.data = 'expansions' 

418 tree.children = aliases 

419 

420 def expansions(self, tree): 

421 self._flatten(tree) 

422 # Ensure all children are unique 

423 if len(set(tree.children)) != len(tree.children): 

424 tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use 

425 

426 

427class RuleTreeToText(Transformer): 

428 def expansions(self, x): 

429 return x 

430 

431 def expansion(self, symbols): 

432 return symbols, None 

433 

434 def alias(self, x): 

435 (expansion, _alias), alias = x 

436 assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed 

437 return expansion, alias.name 

438 

439 

440class PrepareAnonTerminals(Transformer_InPlace): 

441 """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" 

442 

443 def __init__(self, terminals): 

444 self.terminals = terminals 

445 self.term_set = {td.name for td in self.terminals} 

446 self.term_reverse = {td.pattern: td for td in terminals} 

447 self.i = 0 

448 self.rule_options = None 

449 

450 @inline_args 

451 def pattern(self, p): 

452 value = p.value 

453 if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags: 

454 raise GrammarError(u'Conflicting flags for the same terminal: %s' % p) 

455 

456 term_name = None 

457 

458 if isinstance(p, PatternStr): 

459 try: 

460 # If already defined, use the user-defined terminal name 

461 term_name = self.term_reverse[p].name 

462 except KeyError: 

463 # Try to assign an indicative anon-terminal name 

464 try: 

465 term_name = _TERMINAL_NAMES[value] 

466 except KeyError: 

467 if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set: 

468 term_name = value.upper() 

469 

470 if term_name in self.term_set: 

471 term_name = None 

472 

473 elif isinstance(p, PatternRE): 

474 if p in self.term_reverse: # Kind of a weird placement.name 

475 term_name = self.term_reverse[p].name 

476 else: 

477 assert False, p 

478 

479 if term_name is None: 

480 term_name = '__ANON_%d' % self.i 

481 self.i += 1 

482 

483 if term_name not in self.term_set: 

484 assert p not in self.term_reverse 

485 self.term_set.add(term_name) 

486 termdef = TerminalDef(term_name, p) 

487 self.term_reverse[p] = termdef 

488 self.terminals.append(termdef) 

489 

490 filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr) 

491 

492 return Terminal(term_name, filter_out=filter_out) 

493 

494 

495class _ReplaceSymbols(Transformer_InPlace): 

496 """Helper for ApplyTemplates""" 

497 

498 def __init__(self): 

499 self.names = {} 

500 

501 def value(self, c): 

502 if len(c) == 1 and isinstance(c[0], Symbol) and c[0].name in self.names: 

503 return self.names[c[0].name] 

504 return self.__default__('value', c, None) 

505 

506 def template_usage(self, c): 

507 name = c[0].name 

508 if name in self.names: 

509 return self.__default__('template_usage', [self.names[name]] + c[1:], None) 

510 return self.__default__('template_usage', c, None) 

511 

512 

513class ApplyTemplates(Transformer_InPlace): 

514 """Apply the templates, creating new rules that represent the used templates""" 

515 

516 def __init__(self, rule_defs): 

517 self.rule_defs = rule_defs 

518 self.replacer = _ReplaceSymbols() 

519 self.created_templates = set() 

520 

521 def template_usage(self, c): 

522 name = c[0].name 

523 args = c[1:] 

524 result_name = "%s{%s}" % (name, ",".join(a.name for a in args)) 

525 if result_name not in self.created_templates: 

526 self.created_templates.add(result_name) 

527 (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name) 

528 assert len(params) == len(args), args 

529 result_tree = deepcopy(tree) 

530 self.replacer.names = dict(zip(params, args)) 

531 self.replacer.transform(result_tree) 

532 self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) 

533 return NonTerminal(result_name) 

534 

535 

536def _rfind(s, choices): 

537 return max(s.rfind(c) for c in choices) 

538 

539 

540def eval_escaping(s): 

541 w = '' 

542 i = iter(s) 

543 for n in i: 

544 w += n 

545 if n == '\\': 

546 try: 

547 n2 = next(i) 

548 except StopIteration: 

549 raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) 

550 if n2 == '\\': 

551 w += '\\\\' 

552 elif n2 not in 'Uuxnftr': 

553 w += '\\' 

554 w += n2 

555 w = w.replace('\\"', '"').replace("'", "\\'") 

556 

557 to_eval = "u'''%s'''" % w 

558 try: 

559 s = literal_eval(to_eval) 

560 except SyntaxError as e: 

561 raise GrammarError(s, e) 

562 

563 return s 

564 

565 

566def _literal_to_pattern(literal): 

567 assert isinstance(literal, Token) 

568 v = literal.value 

569 flag_start = _rfind(v, '/"')+1 

570 assert flag_start > 0 

571 flags = v[flag_start:] 

572 assert all(f in _RE_FLAGS for f in flags), flags 

573 

574 if literal.type == 'STRING' and '\n' in v: 

575 raise GrammarError('You cannot put newlines in string literals') 

576 

577 if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags: 

578 raise GrammarError('You can only use newlines in regular expressions ' 

579 'with the `x` (verbose) flag') 

580 

581 v = v[:flag_start] 

582 assert v[0] == v[-1] and v[0] in '"/' 

583 x = v[1:-1] 

584 

585 s = eval_escaping(x) 

586 

587 if s == "": 

588 raise GrammarError("Empty terminals are not allowed (%s)" % literal) 

589 

590 if literal.type == 'STRING': 

591 s = s.replace('\\\\', '\\') 

592 return PatternStr(s, flags, raw=literal.value) 

593 elif literal.type == 'REGEXP': 

594 return PatternRE(s, flags, raw=literal.value) 

595 else: 

596 assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]' 

597 

598 

599@inline_args 

600class PrepareLiterals(Transformer_InPlace): 

601 def literal(self, literal): 

602 return ST('pattern', [_literal_to_pattern(literal)]) 

603 

604 def range(self, start, end): 

605 assert start.type == end.type == 'STRING' 

606 start = start.value[1:-1] 

607 end = end.value[1:-1] 

608 assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 

609 regexp = '[%s-%s]' % (start, end) 

610 return ST('pattern', [PatternRE(regexp)]) 

611 

612 

613def _make_joined_pattern(regexp, flags_set): 

614 return PatternRE(regexp, ()) 

615 

616class TerminalTreeToPattern(Transformer_NonRecursive): 

617 def pattern(self, ps): 

618 p ,= ps 

619 return p 

620 

621 def expansion(self, items): 

622 assert items 

623 if len(items) == 1: 

624 return items[0] 

625 

626 pattern = ''.join(i.to_regexp() for i in items) 

627 return _make_joined_pattern(pattern, {i.flags for i in items}) 

628 

629 def expansions(self, exps): 

630 if len(exps) == 1: 

631 return exps[0] 

632 

633 # Do a bit of sorting to make sure that the longest option is returned 

634 # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match) 

635 exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value))) 

636 

637 pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) 

638 return _make_joined_pattern(pattern, {i.flags for i in exps}) 

639 

640 def expr(self, args): 

641 inner, op = args[:2] 

642 if op == '~': 

643 if len(args) == 3: 

644 op = "{%d}" % int(args[2]) 

645 else: 

646 mn, mx = map(int, args[2:]) 

647 if mx < mn: 

648 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx)) 

649 op = "{%d,%d}" % (mn, mx) 

650 else: 

651 assert len(args) == 2 

652 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) 

653 

654 def maybe(self, expr): 

655 return self.expr(expr + ['?']) 

656 

657 def alias(self, t): 

658 raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") 

659 

660 def value(self, v): 

661 return v[0] 

662 

663 

664class ValidateSymbols(Transformer_InPlace): 

665 def value(self, v): 

666 v ,= v 

667 assert isinstance(v, (Tree, Symbol)) 

668 return v 

669 

670 

671def nr_deepcopy_tree(t): 

672 """Deepcopy tree `t` without recursion""" 

673 return Transformer_NonRecursive(False).transform(t) 

674 

675 

676class Grammar: 

677 

678 term_defs: List[Tuple[str, Tuple[Tree, int]]] 

679 rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] 

680 ignore: List[str] 

681 

682 def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: 

683 self.term_defs = term_defs 

684 self.rule_defs = rule_defs 

685 self.ignore = ignore 

686 

687 def compile(self, start, terminals_to_keep): 

688 # We change the trees in-place (to support huge grammars) 

689 # So deepcopy allows calling compile more than once. 

690 term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs] 

691 rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs] 

692 

693 # =================== 

694 # Compile Terminals 

695 # =================== 

696 

697 # Convert terminal-trees to strings/regexps 

698 

699 for name, (term_tree, priority) in term_defs: 

700 if term_tree is None: # Terminal added through %declare 

701 continue 

702 expansions = list(term_tree.find_data('expansion')) 

703 if len(expansions) == 1 and not expansions[0].children: 

704 raise GrammarError("Terminals cannot be empty (%s)" % name) 

705 

706 transformer = PrepareLiterals() * TerminalTreeToPattern() 

707 terminals = [TerminalDef(name, transformer.transform(term_tree), priority) 

708 for name, (term_tree, priority) in term_defs if term_tree] 

709 

710 # ================= 

711 # Compile Rules 

712 # ================= 

713 

714 # 1. Pre-process terminals 

715 anon_tokens_transf = PrepareAnonTerminals(terminals) 

716 transformer = PrepareLiterals() * ValidateSymbols() * anon_tokens_transf # Adds to terminals 

717 

718 # 2. Inline Templates 

719 

720 transformer *= ApplyTemplates(rule_defs) 

721 

722 # 3. Convert EBNF to BNF (and apply step 1 & 2) 

723 ebnf_to_bnf = EBNF_to_BNF() 

724 rules = [] 

725 i = 0 

726 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates 

727 name, params, rule_tree, options = rule_defs[i] 

728 i += 1 

729 if len(params) != 0: # Dont transform templates 

730 continue 

731 rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None 

732 ebnf_to_bnf.rule_options = rule_options 

733 ebnf_to_bnf.prefix = name 

734 anon_tokens_transf.rule_options = rule_options 

735 tree = transformer.transform(rule_tree) 

736 res = ebnf_to_bnf.transform(tree) 

737 rules.append((name, res, options)) 

738 rules += ebnf_to_bnf.new_rules 

739 

740 assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" 

741 

742 # 4. Compile tree to Rule objects 

743 rule_tree_to_text = RuleTreeToText() 

744 

745 simplify_rule = SimplifyRule_Visitor() 

746 compiled_rules = [] 

747 for rule_content in rules: 

748 name, tree, options = rule_content 

749 simplify_rule.visit(tree) 

750 expansions = rule_tree_to_text.transform(tree) 

751 

752 for i, (expansion, alias) in enumerate(expansions): 

753 if alias and name.startswith('_'): 

754 raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) 

755 

756 empty_indices = [x==_EMPTY for x in expansion] 

757 if any(empty_indices): 

758 exp_options = copy(options) or RuleOptions() 

759 exp_options.empty_indices = empty_indices 

760 expansion = [x for x in expansion if x!=_EMPTY] 

761 else: 

762 exp_options = options 

763 

764 for sym in expansion: 

765 assert isinstance(sym, Symbol) 

766 if sym.is_term and exp_options and exp_options.keep_all_tokens: 

767 sym.filter_out = False 

768 rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) 

769 compiled_rules.append(rule) 

770 

771 # Remove duplicates of empty rules, throw error for non-empty duplicates 

772 if len(set(compiled_rules)) != len(compiled_rules): 

773 duplicates = classify(compiled_rules, lambda x: x) 

774 for dups in duplicates.values(): 

775 if len(dups) > 1: 

776 if dups[0].expansion: 

777 raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" 

778 % ''.join('\n * %s' % i for i in dups)) 

779 

780 # Empty rule; assert all other attributes are equal 

781 assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) 

782 

783 # Remove duplicates 

784 compiled_rules = list(set(compiled_rules)) 

785 

786 # Filter out unused rules 

787 while True: 

788 c = len(compiled_rules) 

789 used_rules = {s for r in compiled_rules 

790 for s in r.expansion 

791 if isinstance(s, NonTerminal) 

792 and s != r.origin} 

793 used_rules |= {NonTerminal(s) for s in start} 

794 compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) 

795 for r in unused: 

796 logger.debug("Unused rule: %s", r) 

797 if len(compiled_rules) == c: 

798 break 

799 

800 # Filter out unused terminals 

801 if terminals_to_keep != '*': 

802 used_terms = {t.name for r in compiled_rules 

803 for t in r.expansion 

804 if isinstance(t, Terminal)} 

805 terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) 

806 if unused: 

807 logger.debug("Unused terminals: %s", [t.name for t in unused]) 

808 

809 return terminals, compiled_rules, self.ignore 

810 

811 

812PackageResource = namedtuple('PackageResource', 'pkg_name path') 

813 

814 

815class FromPackageLoader: 

816 """ 

817 Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. 

818 This allows them to be compatible even from within zip files. 

819 

820 Relative imports are handled, so you can just freely use them. 

821 

822 pkg_name: The name of the package. You can probably provide `__name__` most of the time 

823 search_paths: All the path that will be search on absolute imports. 

824 """ 

825 

826 pkg_name: str 

827 search_paths: Sequence[str] 

828 

829 def __init__(self, pkg_name: str, search_paths: Sequence[str]=("", )) -> None: 

830 self.pkg_name = pkg_name 

831 self.search_paths = search_paths 

832 

833 def __repr__(self): 

834 return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) 

835 

836 def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: 

837 if base_path is None: 

838 to_try = self.search_paths 

839 else: 

840 # Check whether or not the importing grammar was loaded by this module. 

841 if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name: 

842 # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway 

843 raise IOError() 

844 to_try = [base_path.path] 

845 

846 err = None 

847 for path in to_try: 

848 full_path = os.path.join(path, grammar_path) 

849 try: 

850 text: Optional[bytes] = pkgutil.get_data(self.pkg_name, full_path) 

851 except IOError as e: 

852 err = e 

853 continue 

854 else: 

855 return PackageResource(self.pkg_name, full_path), (text.decode() if text else '') 

856 

857 raise IOError('Cannot find grammar in given paths') from err 

858 

859 

860stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) 

861 

862 

863 

864def resolve_term_references(term_dict): 

865 # TODO Solve with transitive closure (maybe) 

866 

867 while True: 

868 changed = False 

869 for name, token_tree in term_dict.items(): 

870 if token_tree is None: # Terminal added through %declare 

871 continue 

872 for exp in token_tree.find_data('value'): 

873 item ,= exp.children 

874 if isinstance(item, NonTerminal): 

875 raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name)) 

876 elif isinstance(item, Terminal): 

877 try: 

878 term_value = term_dict[item.name] 

879 except KeyError: 

880 raise GrammarError("Terminal used but not defined: %s" % item.name) 

881 assert term_value is not None 

882 exp.children[0] = term_value 

883 changed = True 

884 else: 

885 assert isinstance(item, Tree) 

886 if not changed: 

887 break 

888 

889 for name, term in term_dict.items(): 

890 if term: # Not just declared 

891 for child in term.children: 

892 ids = [id(x) for x in child.iter_subtrees()] 

893 if id(term) in ids: 

894 raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) 

895 

896 

897 

898def symbol_from_strcase(s): 

899 assert isinstance(s, str) 

900 return Terminal(s, filter_out=s.startswith('_')) if s.isupper() else NonTerminal(s) 

901 

902@inline_args 

903class PrepareGrammar(Transformer_InPlace): 

904 def terminal(self, name): 

905 return Terminal(str(name), filter_out=name.startswith('_')) 

906 

907 def nonterminal(self, name): 

908 return NonTerminal(name.value) 

909 

910 

911def _find_used_symbols(tree): 

912 assert tree.data == 'expansions' 

913 return {t.name for x in tree.find_data('expansion') 

914 for t in x.scan_values(lambda t: isinstance(t, Symbol))} 

915 

916 

917def _get_parser(): 

918 try: 

919 return _get_parser.cache 

920 except AttributeError: 

921 terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] 

922 

923 rules = [(name.lstrip('?'), x, RuleOptions(expand1=name.startswith('?'))) 

924 for name, x in RULES.items()] 

925 rules = [Rule(NonTerminal(r), [symbol_from_strcase(s) for s in x.split()], i, None, o) 

926 for r, xs, o in rules for i, x in enumerate(xs)] 

927 

928 callback = ParseTreeBuilder(rules, ST).create_callback() 

929 import re 

930 lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH']) 

931 parser_conf = ParserConf(rules, callback, ['start']) 

932 lexer_conf.lexer_type = 'basic' 

933 parser_conf.parser_type = 'lalr' 

934 _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None) 

935 return _get_parser.cache 

936 

937GRAMMAR_ERRORS = [ 

938 ('Incorrect type of value', ['a: 1\n']), 

939 ('Unclosed parenthesis', ['a: (\n']), 

940 ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), 

941 ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), 

942 ('Illegal name for rules or terminals', ['Aa:\n']), 

943 ('Alias expects lowercase name', ['a: -> "a"\n']), 

944 ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']), 

945 ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']), 

946 ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']), 

947 ('Terminal names cannot contain dots', ['A.B\n']), 

948 ('Expecting rule or terminal definition', ['"a"\n']), 

949 ('%import expects a name', ['%import "a"\n']), 

950 ('%ignore expects a value', ['%ignore %import\n']), 

951 ] 

952 

953def _translate_parser_exception(parse, e): 

954 error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True) 

955 if error: 

956 return error 

957 elif 'STRING' in e.expected: 

958 return "Expecting a value" 

959 

960def _parse_grammar(text, name, start='start'): 

961 try: 

962 tree = _get_parser().parse(text + '\n', start) 

963 except UnexpectedCharacters as e: 

964 context = e.get_context(text) 

965 raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % 

966 (e.line, e.column, name, context)) 

967 except UnexpectedToken as e: 

968 context = e.get_context(text) 

969 error = _translate_parser_exception(_get_parser().parse, e) 

970 if error: 

971 raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) 

972 raise 

973 

974 return PrepareGrammar().transform(tree) 

975 

976 

977def _error_repr(error): 

978 if isinstance(error, UnexpectedToken): 

979 error2 = _translate_parser_exception(_get_parser().parse, error) 

980 if error2: 

981 return error2 

982 expected = ', '.join(error.accepts or error.expected) 

983 return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected) 

984 else: 

985 return str(error) 

986 

987def _search_interactive_parser(interactive_parser, predicate): 

988 def expand(node): 

989 path, p = node 

990 for choice in p.choices(): 

991 t = Token(choice, '') 

992 try: 

993 new_p = p.feed_token(t) 

994 except ParseError: # Illegal 

995 pass 

996 else: 

997 yield path + (choice,), new_p 

998 

999 for path, p in bfs_all_unique([((), interactive_parser)], expand): 

1000 if predicate(p): 

1001 return path, p 

1002 

1003def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: 

1004 errors = [] 

1005 def on_error(e): 

1006 errors.append((e, _error_repr(e))) 

1007 

1008 # recover to a new line 

1009 token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices()) 

1010 for token_type in token_path: 

1011 e.interactive_parser.feed_token(Token(token_type, '')) 

1012 e.interactive_parser.feed_token(Token('_NL', '\n')) 

1013 return True 

1014 

1015 _tree = _get_parser().parse(text + '\n', start, on_error=on_error) 

1016 

1017 errors_by_line = classify(errors, lambda e: e[0].line) 

1018 errors = [el[0] for el in errors_by_line.values()] # already sorted 

1019 

1020 for e in errors: 

1021 e[0].interactive_parser = None 

1022 return errors 

1023 

1024 

1025def _get_mangle(prefix, aliases, base_mangle=None): 

1026 def mangle(s): 

1027 if s in aliases: 

1028 s = aliases[s] 

1029 else: 

1030 if s[0] == '_': 

1031 s = '_%s__%s' % (prefix, s[1:]) 

1032 else: 

1033 s = '%s__%s' % (prefix, s) 

1034 if base_mangle is not None: 

1035 s = base_mangle(s) 

1036 return s 

1037 return mangle 

1038 

1039def _mangle_definition_tree(exp, mangle): 

1040 if mangle is None: 

1041 return exp 

1042 exp = deepcopy(exp) # TODO: is this needed? 

1043 for t in exp.iter_subtrees(): 

1044 for i, c in enumerate(t.children): 

1045 if isinstance(c, Symbol): 

1046 t.children[i] = c.renamed(mangle) 

1047 

1048 return exp 

1049 

1050def _make_rule_tuple(modifiers_tree, name, params, priority_tree, expansions): 

1051 if modifiers_tree.children: 

1052 m ,= modifiers_tree.children 

1053 expand1 = '?' in m 

1054 if expand1 and name.startswith('_'): 

1055 raise GrammarError("Inlined rules (_rule) cannot use the ?rule modifier.") 

1056 keep_all_tokens = '!' in m 

1057 else: 

1058 keep_all_tokens = False 

1059 expand1 = False 

1060 

1061 if priority_tree.children: 

1062 p ,= priority_tree.children 

1063 priority = int(p) 

1064 else: 

1065 priority = None 

1066 

1067 if params is not None: 

1068 params = [t.value for t in params.children] # For the grammar parser 

1069 

1070 return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority, 

1071 template_source=(name if params else None)) 

1072 

1073 

1074class Definition: 

1075 def __init__(self, is_term, tree, params=(), options=None): 

1076 self.is_term = is_term 

1077 self.tree = tree 

1078 self.params = tuple(params) 

1079 self.options = options 

1080 

1081class GrammarBuilder: 

1082 

1083 global_keep_all_tokens: bool 

1084 import_paths: List[Union[str, Callable]] 

1085 used_files: Dict[str, str] 

1086 

1087 _definitions: Dict[str, Definition] 

1088 _ignore_names: List[str] 

1089 

1090 def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: 

1091 self.global_keep_all_tokens = global_keep_all_tokens 

1092 self.import_paths = import_paths or [] 

1093 self.used_files = used_files or {} 

1094 

1095 self._definitions: Dict[str, Definition] = {} 

1096 self._ignore_names: List[str] = [] 

1097 

1098 def _grammar_error(self, is_term, msg, *names): 

1099 args = {} 

1100 for i, name in enumerate(names, start=1): 

1101 postfix = '' if i == 1 else str(i) 

1102 args['name' + postfix] = name 

1103 args['type' + postfix] = lowercase_type = ("rule", "terminal")[is_term] 

1104 args['Type' + postfix] = lowercase_type.title() 

1105 raise GrammarError(msg.format(**args)) 

1106 

1107 def _check_options(self, is_term, options): 

1108 if is_term: 

1109 if options is None: 

1110 options = 1 

1111 elif not isinstance(options, int): 

1112 raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) 

1113 else: 

1114 if options is None: 

1115 options = RuleOptions() 

1116 elif not isinstance(options, RuleOptions): 

1117 raise GrammarError("Rules require a RuleOptions instance as 'options'") 

1118 if self.global_keep_all_tokens: 

1119 options.keep_all_tokens = True 

1120 return options 

1121 

1122 

1123 def _define(self, name, is_term, exp, params=(), options=None, *, override=False): 

1124 if name in self._definitions: 

1125 if not override: 

1126 self._grammar_error(is_term, "{Type} '{name}' defined more than once", name) 

1127 elif override: 

1128 self._grammar_error(is_term, "Cannot override a nonexisting {type} {name}", name) 

1129 

1130 if name.startswith('__'): 

1131 self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name) 

1132 

1133 self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options)) 

1134 

1135 def _extend(self, name, is_term, exp, params=(), options=None): 

1136 if name not in self._definitions: 

1137 self._grammar_error(is_term, "Can't extend {type} {name} as it wasn't defined before", name) 

1138 

1139 d = self._definitions[name] 

1140 

1141 if is_term != d.is_term: 

1142 self._grammar_error(is_term, "Cannot extend {type} {name} - one is a terminal, while the other is not.", name) 

1143 if tuple(params) != d.params: 

1144 self._grammar_error(is_term, "Cannot extend {type} with different parameters: {name}", name) 

1145 

1146 if d.tree is None: 

1147 self._grammar_error(is_term, "Can't extend {type} {name} - it is abstract.", name) 

1148 

1149 # TODO: think about what to do with 'options' 

1150 base = d.tree 

1151 

1152 assert isinstance(base, Tree) and base.data == 'expansions' 

1153 base.children.insert(0, exp) 

1154 

1155 def _ignore(self, exp_or_name): 

1156 if isinstance(exp_or_name, str): 

1157 self._ignore_names.append(exp_or_name) 

1158 else: 

1159 assert isinstance(exp_or_name, Tree) 

1160 t = exp_or_name 

1161 if t.data == 'expansions' and len(t.children) == 1: 

1162 t2 ,= t.children 

1163 if t2.data=='expansion' and len(t2.children) == 1: 

1164 item ,= t2.children 

1165 if item.data == 'value': 

1166 item ,= item.children 

1167 if isinstance(item, Terminal): 

1168 # Keep terminal name, no need to create a new definition 

1169 self._ignore_names.append(item.name) 

1170 return 

1171 

1172 name = '__IGNORE_%d'% len(self._ignore_names) 

1173 self._ignore_names.append(name) 

1174 self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY) 

1175 

1176 def _unpack_import(self, stmt, grammar_name): 

1177 if len(stmt.children) > 1: 

1178 path_node, arg1 = stmt.children 

1179 else: 

1180 path_node, = stmt.children 

1181 arg1 = None 

1182 

1183 if isinstance(arg1, Tree): # Multi import 

1184 dotted_path = tuple(path_node.children) 

1185 names = arg1.children 

1186 aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names 

1187 else: # Single import 

1188 dotted_path = tuple(path_node.children[:-1]) 

1189 if not dotted_path: 

1190 name ,= path_node.children 

1191 raise GrammarError("Nothing was imported from grammar `%s`" % name) 

1192 name = path_node.children[-1] # Get name from dotted path 

1193 aliases = {name.value: (arg1 or name).value} # Aliases if exist 

1194 

1195 if path_node.data == 'import_lib': # Import from library 

1196 base_path = None 

1197 else: # Relative import 

1198 if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script 

1199 try: 

1200 base_file = os.path.abspath(sys.modules['__main__'].__file__) 

1201 except AttributeError: 

1202 base_file = None 

1203 else: 

1204 base_file = grammar_name # Import relative to grammar file path if external grammar file 

1205 if base_file: 

1206 if isinstance(base_file, PackageResource): 

1207 base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) 

1208 else: 

1209 base_path = os.path.split(base_file)[0] 

1210 else: 

1211 base_path = os.path.abspath(os.path.curdir) 

1212 

1213 return dotted_path, base_path, aliases 

1214 

1215 def _unpack_definition(self, tree, mangle): 

1216 

1217 if tree.data == 'rule': 

1218 name, params, exp, opts = _make_rule_tuple(*tree.children) 

1219 is_term = False 

1220 else: 

1221 name = tree.children[0].value 

1222 params = () # TODO terminal templates 

1223 opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority 

1224 exp = tree.children[-1] 

1225 is_term = True 

1226 

1227 if mangle is not None: 

1228 params = tuple(mangle(p) for p in params) 

1229 name = mangle(name) 

1230 

1231 exp = _mangle_definition_tree(exp, mangle) 

1232 return name, is_term, exp, params, opts 

1233 

1234 

1235 def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None: 

1236 tree = _parse_grammar(grammar_text, grammar_name) 

1237 

1238 imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {} 

1239 

1240 for stmt in tree.children: 

1241 if stmt.data == 'import': 

1242 dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name) 

1243 try: 

1244 import_base_path, import_aliases = imports[dotted_path] 

1245 assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) 

1246 import_aliases.update(aliases) 

1247 except KeyError: 

1248 imports[dotted_path] = base_path, aliases 

1249 

1250 for dotted_path, (base_path, aliases) in imports.items(): 

1251 self.do_import(dotted_path, base_path, aliases, mangle) 

1252 

1253 for stmt in tree.children: 

1254 if stmt.data in ('term', 'rule'): 

1255 self._define(*self._unpack_definition(stmt, mangle)) 

1256 elif stmt.data == 'override': 

1257 r ,= stmt.children 

1258 self._define(*self._unpack_definition(r, mangle), override=True) 

1259 elif stmt.data == 'extend': 

1260 r ,= stmt.children 

1261 self._extend(*self._unpack_definition(r, mangle)) 

1262 elif stmt.data == 'ignore': 

1263 # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar 

1264 if mangle is None: 

1265 self._ignore(*stmt.children) 

1266 elif stmt.data == 'declare': 

1267 for symbol in stmt.children: 

1268 assert isinstance(symbol, Symbol), symbol 

1269 is_term = isinstance(symbol, Terminal) 

1270 if mangle is None: 

1271 name = symbol.name 

1272 else: 

1273 name = mangle(symbol.name) 

1274 self._define(name, is_term, None) 

1275 elif stmt.data == 'import': 

1276 pass 

1277 else: 

1278 assert False, stmt 

1279 

1280 

1281 term_defs = { name: d.tree 

1282 for name, d in self._definitions.items() 

1283 if d.is_term 

1284 } 

1285 resolve_term_references(term_defs) 

1286 

1287 

1288 def _remove_unused(self, used): 

1289 def rule_dependencies(symbol): 

1290 try: 

1291 d = self._definitions[symbol] 

1292 except KeyError: 

1293 return [] 

1294 if d.is_term: 

1295 return [] 

1296 return _find_used_symbols(d.tree) - set(d.params) 

1297 

1298 _used = set(bfs(used, rule_dependencies)) 

1299 self._definitions = {k: v for k, v in self._definitions.items() if k in _used} 

1300 

1301 

1302 def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: 

1303 assert dotted_path 

1304 mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) 

1305 grammar_path = os.path.join(*dotted_path) + EXT 

1306 to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] 

1307 for source in to_try: 

1308 try: 

1309 if callable(source): 

1310 joined_path, text = source(base_path, grammar_path) 

1311 else: 

1312 joined_path = os.path.join(source, grammar_path) 

1313 with open(joined_path, encoding='utf8') as f: 

1314 text = f.read() 

1315 except IOError: 

1316 continue 

1317 else: 

1318 h = sha256_digest(text) 

1319 if self.used_files.get(joined_path, h) != h: 

1320 raise RuntimeError("Grammar file was changed during importing") 

1321 self.used_files[joined_path] = h 

1322 

1323 gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) 

1324 gb.load_grammar(text, joined_path, mangle) 

1325 gb._remove_unused(map(mangle, aliases)) 

1326 for name in gb._definitions: 

1327 if name in self._definitions: 

1328 raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path)) 

1329 

1330 self._definitions.update(**gb._definitions) 

1331 break 

1332 else: 

1333 # Search failed. Make Python throw a nice error. 

1334 open(grammar_path, encoding='utf8') 

1335 assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) 

1336 

1337 

1338 def validate(self) -> None: 

1339 for name, d in self._definitions.items(): 

1340 params = d.params 

1341 exp = d.tree 

1342 

1343 for i, p in enumerate(params): 

1344 if p in self._definitions: 

1345 raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) 

1346 if p in params[:i]: 

1347 raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) 

1348 

1349 if exp is None: # Remaining checks don't apply to abstract rules/terminals (created with %declare) 

1350 continue 

1351 

1352 for temp in exp.find_data('template_usage'): 

1353 sym = temp.children[0].name 

1354 args = temp.children[1:] 

1355 if sym not in params: 

1356 if sym not in self._definitions: 

1357 self._grammar_error(d.is_term, "Template '%s' used but not defined (in {type} {name})" % sym, name) 

1358 if len(args) != len(self._definitions[sym].params): 

1359 expected, actual = len(self._definitions[sym].params), len(args) 

1360 self._grammar_error(d.is_term, "Wrong number of template arguments used for {name} " 

1361 "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name) 

1362 

1363 for sym in _find_used_symbols(exp): 

1364 if sym not in self._definitions and sym not in params: 

1365 self._grammar_error(d.is_term, "{Type} '{name}' used but not defined (in {type2} {name2})", sym, name) 

1366 

1367 if not set(self._definitions).issuperset(self._ignore_names): 

1368 raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) 

1369 

1370 def build(self) -> Grammar: 

1371 self.validate() 

1372 rule_defs = [] 

1373 term_defs = [] 

1374 for name, d in self._definitions.items(): 

1375 (params, exp, options) = d.params, d.tree, d.options 

1376 if d.is_term: 

1377 assert len(params) == 0 

1378 term_defs.append((name, (exp, options))) 

1379 else: 

1380 rule_defs.append((name, params, exp, options)) 

1381 # resolve_term_references(term_defs) 

1382 return Grammar(rule_defs, term_defs, self._ignore_names) 

1383 

1384 

1385def verify_used_files(file_hashes): 

1386 for path, old in file_hashes.items(): 

1387 text = None 

1388 if isinstance(path, str) and os.path.exists(path): 

1389 with open(path, encoding='utf8') as f: 

1390 text = f.read() 

1391 elif isinstance(path, PackageResource): 

1392 with suppress(IOError): 

1393 text = pkgutil.get_data(*path).decode('utf-8') 

1394 if text is None: # We don't know how to load the path. ignore it. 

1395 continue 

1396 

1397 current = sha256_digest(text) 

1398 if old != current: 

1399 logger.info("File %r changed, rebuilding Parser" % path) 

1400 return False 

1401 return True 

1402 

1403def list_grammar_imports(grammar, import_paths=[]): 

1404 "Returns a list of paths to the lark grammars imported by the given grammar (recursively)" 

1405 builder = GrammarBuilder(False, import_paths) 

1406 builder.load_grammar(grammar, '<string>') 

1407 return list(builder.used_files.keys()) 

1408 

1409def load_grammar(grammar, source, import_paths, global_keep_all_tokens): 

1410 builder = GrammarBuilder(global_keep_all_tokens, import_paths) 

1411 builder.load_grammar(grammar, source) 

1412 return builder.build(), builder.used_files 

1413 

1414 

1415def sha256_digest(s: str) -> str: 

1416 """Get the sha256 digest of a string 

1417 

1418 Supports the `usedforsecurity` argument for Python 3.9+ to allow running on 

1419 a FIPS-enabled system. 

1420 """ 

1421 if sys.version_info >= (3, 9): 

1422 return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest() 

1423 else: 

1424 return hashlib.sha256(s.encode('utf8')).hexdigest()