Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/load

1"""Parses and compiles Lark grammars into an internal representation.

2"""

4import hashlib

5import os.path

6import sys

7from collections import namedtuple

8from copy import copy, deepcopy

9import pkgutil

10from ast import literal_eval

11from contextlib import suppress

12from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence

14from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors

15from .lexer import Token, TerminalDef, PatternStr, PatternRE

17from .parse_tree_builder import ParseTreeBuilder

18from .parser_frontends import ParsingFrontend

19from .common import LexerConf, ParserConf

20from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY

21from .utils import classify, dedup_list

22from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput

24from .tree import Tree, SlottedTree as ST

25from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive

26inline_args = v_args(inline=True)

28IMPORT_PATHS = ['grammars']

30EXT = '.lark'

32_RE_FLAGS = 'imslux'

34_EMPTY = Symbol('__empty__')

36_TERMINAL_NAMES = {

37 '.' : 'DOT',

38 ',' : 'COMMA',

39 ':' : 'COLON',

40 ';' : 'SEMICOLON',

41 '+' : 'PLUS',

42 '-' : 'MINUS',

43 '*' : 'STAR',

44 '/' : 'SLASH',

45 '\\' : 'BACKSLASH',

46 '|' : 'VBAR',

47 '?' : 'QMARK',

48 '!' : 'BANG',

49 '@' : 'AT',

50 '#' : 'HASH',

51 '$' : 'DOLLAR',

52 '%' : 'PERCENT',

53 '^' : 'CIRCUMFLEX',

54 '&' : 'AMPERSAND',

55 '_' : 'UNDERSCORE',

56 '<' : 'LESSTHAN',

57 '>' : 'MORETHAN',

58 '=' : 'EQUAL',

59 '"' : 'DBLQUOTE',

60 '\'' : 'QUOTE',

61 '`' : 'BACKQUOTE',

62 '~' : 'TILDE',

63 '(' : 'LPAR',

64 ')' : 'RPAR',

65 '{' : 'LBRACE',

66 '}' : 'RBRACE',

67 '[' : 'LSQB',

68 ']' : 'RSQB',

69 '\n' : 'NEWLINE',

70 '\r\n' : 'CRLF',

71 '\t' : 'TAB',

72 ' ' : 'SPACE',

73}

75# Grammar Parser

76TERMINALS = {

77 '_LPAR': r'\(',

78 '_RPAR': r'\)',

79 '_LBRA': r'\[',

80 '_RBRA': r'\]',

81 '_LBRACE': r'\{',

82 '_RBRACE': r'\}',

83 'OP': '[+*]|[?](?![a-z_])',

84 '_COLON': ':',

85 '_COMMA': ',',

86 '_OR': r'\|',

87 '_DOT': r'\.(?!\.)',

88 '_DOTDOT': r'\.\.',

89 'TILDE': '~',

90 'RULE_MODIFIERS': '(!|![?]?|[?]!?)(?=[_a-z])',

91 'RULE': '_?[a-z][_a-z0-9]*',

92 'TERMINAL': '_?[A-Z][_A-Z0-9]*',

93 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',

94 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,

95 '_NL': r'(\r?\n)+\s*',

96 '_NL_OR': r'(\r?\n)+\s*\|',

97 'WS': r'[ \t]+',

98 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*',

99 'BACKSLASH': r'\\[ ]*\n',

100 '_TO': '->',

101 '_IGNORE': r'%ignore',

102 '_OVERRIDE': r'%override',

103 '_DECLARE': r'%declare',

104 '_EXTEND': r'%extend',

105 '_IMPORT': r'%import',

106 'NUMBER': r'[+-]?\d+',

107}

108

109RULES = {

110 'start': ['_list'],

111 '_list': ['_item', '_list _item'],

112 '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],

113

114 'rule': ['rule_modifiers RULE template_params priority _COLON expansions _NL'],

115 'rule_modifiers': ['RULE_MODIFIERS',

116 ''],

117 'priority': ['_DOT NUMBER',

118 ''],

119 'template_params': ['_LBRACE _template_params _RBRACE',

120 ''],

121 '_template_params': ['RULE',

122 '_template_params _COMMA RULE'],

123 'expansions': ['_expansions'],

124 '_expansions': ['alias',

125 '_expansions _OR alias',

126 '_expansions _NL_OR alias'],

127

128 '?alias': ['expansion _TO nonterminal', 'expansion'],

129 'expansion': ['_expansion'],

130

131 '_expansion': ['', '_expansion expr'],

132

133 '?expr': ['atom',

134 'atom OP',

135 'atom TILDE NUMBER',

136 'atom TILDE NUMBER _DOTDOT NUMBER',

137 ],

138

139 '?atom': ['_LPAR expansions _RPAR',

140 'maybe',

141 'value'],

142

143 'value': ['terminal',

144 'nonterminal',

145 'literal',

146 'range',

147 'template_usage'],

148

149 'terminal': ['TERMINAL'],

150 'nonterminal': ['RULE'],

151

152 '?name': ['RULE', 'TERMINAL'],

153 '?symbol': ['terminal', 'nonterminal'],

154

155 'maybe': ['_LBRA expansions _RBRA'],

156 'range': ['STRING _DOTDOT STRING'],

157

158 'template_usage': ['nonterminal _LBRACE _template_args _RBRACE'],

159 '_template_args': ['value',

160 '_template_args _COMMA value'],

161

162 'term': ['TERMINAL _COLON expansions _NL',

163 'TERMINAL _DOT NUMBER _COLON expansions _NL'],

164 'override': ['_OVERRIDE rule',

165 '_OVERRIDE term'],

166 'extend': ['_EXTEND rule',

167 '_EXTEND term'],

168 'ignore': ['_IGNORE expansions _NL'],

169 'declare': ['_DECLARE _declare_args _NL'],

170 'import': ['_IMPORT _import_path _NL',

171 '_IMPORT _import_path _LPAR name_list _RPAR _NL',

172 '_IMPORT _import_path _TO name _NL'],

173

174 '_import_path': ['import_lib', 'import_rel'],

175 'import_lib': ['_import_args'],

176 'import_rel': ['_DOT _import_args'],

177 '_import_args': ['name', '_import_args _DOT name'],

178

179 'name_list': ['_name_list'],

180 '_name_list': ['name', '_name_list _COMMA name'],

181

182 '_declare_args': ['symbol', '_declare_args symbol'],

183 'literal': ['REGEXP', 'STRING'],

184}

185

186

187# Value 5 keeps the number of states in the lalr parser somewhat minimal

188# It isn't optimal, but close to it. See PR #949

189SMALL_FACTOR_THRESHOLD = 5

190# The Threshold whether repeat via ~ are split up into different rules

191# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,

192# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.

193# (See PR #949)

194REPEAT_BREAK_THRESHOLD = 50

195

196

197class FindRuleSize(Transformer):

198 def __init__(self, keep_all_tokens):

199 self.keep_all_tokens = keep_all_tokens

200

201 def _will_not_get_removed(self, sym):

202 if isinstance(sym, NonTerminal):

203 return not sym.name.startswith('_')

204 if isinstance(sym, Terminal):

205 return self.keep_all_tokens or not sym.filter_out

206 if sym is _EMPTY:

207 return False

208 assert False, sym

209

210 def _args_as_int(self, args):

211 for a in args:

212 if isinstance(a, int):

213 yield a

214 elif isinstance(a, Symbol):

215 yield 1 if self._will_not_get_removed(a) else 0

216 else:

217 assert False

218

219 def expansion(self, args):

220 return sum(self._args_as_int(args))

221

222 def expansions(self, args):

223 return max(self._args_as_int(args))

224

225

226@inline_args

227class EBNF_to_BNF(Transformer_InPlace):

228 def __init__(self):

229 self.new_rules = []

230 self.rules_cache = {}

231 self.prefix = 'anon'

232 self.i = 0

233 self.rule_options = None

234

235 def _name_rule(self, inner):

236 new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)

237 self.i += 1

238 return new_name

239

240 def _add_rule(self, key, name, expansions):

241 t = NonTerminal(name)

242 self.new_rules.append((name, expansions, self.rule_options))

243 self.rules_cache[key] = t

244 return t

245

246 def _add_recurse_rule(self, type_, expr):

247 try:

248 return self.rules_cache[expr]

249 except KeyError:

250 new_name = self._name_rule(type_)

251 t = NonTerminal(new_name)

252 tree = ST('expansions', [

253 ST('expansion', [expr]),

254 ST('expansion', [t, expr])

255 ])

256 return self._add_rule(expr, new_name, tree)

257

258 def _add_repeat_rule(self, a, b, target, atom):

259 """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.

260

261 When called recursively (into target), it repeats atom for x(n) times, where:

262 x(0) = 1

263 x(n) = a(n) * x(n-1) + b

264

265 Example rule when a=3, b=4:

266

267 new_rule: target target target atom atom atom atom

268

269 """

270 key = (a, b, target, atom)

271 try:

272 return self.rules_cache[key]

273 except KeyError:

274 new_name = self._name_rule('repeat_a%d_b%d' % (a, b))

275 tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])

276 return self._add_rule(key, new_name, tree)

277

278 def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):

279 """Creates a rule that matches atom 0 to (a*n+b)-1 times.

280

281 When target matches n times atom, and target_opt 0 to n-1 times target_opt,

282

283 First we generate target * i followed by target_opt, for i from 0 to a-1

284 These match 0 to n*a - 1 times atom

285

286 Then we generate target * a followed by atom * i, for i from 0 to b-1

287 These match n*a to n*a + b-1 times atom

288

289 The created rule will not have any shift/reduce conflicts so that it can be used with lalr

290

291 Example rule when a=3, b=4:

292

293 new_rule: target_opt

294 | target target_opt

295 | target target target_opt

296

297 | target target target

298 | target target target atom

299 | target target target atom atom

300 | target target target atom atom atom

301

302 """

303 key = (a, b, target, atom, "opt")

304 try:

305 return self.rules_cache[key]

306 except KeyError:

307 new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))

308 tree = ST('expansions', [

309 ST('expansion', [target]*i + [target_opt]) for i in range(a)

310 ] + [

311 ST('expansion', [target]*a + [atom]*i) for i in range(b)

312 ])

313 return self._add_rule(key, new_name, tree)

314

315 def _generate_repeats(self, rule, mn, mx):

316 """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.

317 """

318 # For a small number of repeats, we can take the naive approach

319 if mx < REPEAT_BREAK_THRESHOLD:

320 return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])

321

322 # For large repeat values, we break the repetition into sub-rules.

323 # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.

324 # We then use small_factors to split up mn and diff up into values [(a, b), ...]

325 # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt

326 # to generate a complete rule/expression that matches the corresponding number of repeats

327 mn_target = rule

328 for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):

329 mn_target = self._add_repeat_rule(a, b, mn_target, rule)

330 if mx == mn:

331 return mn_target

332

333 diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less

334 diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)

335 diff_target = rule # Match rule 1 times

336 diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)

337 for a, b in diff_factors[:-1]:

338 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

339 diff_target = self._add_repeat_rule(a, b, diff_target, rule)

340

341 a, b = diff_factors[-1]

342 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

343

344 return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])

345

346 def expr(self, rule, op, *args):

347 if op.value == '?':

348 empty = ST('expansion', [])

349 return ST('expansions', [rule, empty])

350 elif op.value == '+':

351 # a : b c+ d

352 # -->

353 # a : b _c d

354 # _c : _c c | c;

355 return self._add_recurse_rule('plus', rule)

356 elif op.value == '*':

357 # a : b c* d

358 # -->

359 # a : b _c? d

360 # _c : _c c | c;

361 new_name = self._add_recurse_rule('star', rule)

362 return ST('expansions', [new_name, ST('expansion', [])])

363 elif op.value == '~':

364 if len(args) == 1:

365 mn = mx = int(args[0])

366 else:

367 mn, mx = map(int, args)

368 if mx < mn or mn < 0:

369 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))

370

371 return self._generate_repeats(rule, mn, mx)

372

373 assert False, op

374

375 def maybe(self, rule):

376 keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens

377 rule_size = FindRuleSize(keep_all_tokens).transform(rule)

378 empty = ST('expansion', [_EMPTY] * rule_size)

379 return ST('expansions', [rule, empty])

380

381

382class SimplifyRule_Visitor(Visitor):

383

384 @staticmethod

385 def _flatten(tree):

386 while tree.expand_kids_by_data(tree.data):

387 pass

388

389 def expansion(self, tree):

390 # rules_list unpacking

391 # a : b (c|d) e

392 # -->

393 # a : b c e | b d e

394 #

395 # In AST terms:

396 # expansion(b, expansions(c, d), e)

397 # -->

398 # expansions( expansion(b, c, e), expansion(b, d, e) )

399

400 self._flatten(tree)

401

402 for i, child in enumerate(tree.children):

403 if isinstance(child, Tree) and child.data == 'expansions':

404 tree.data = 'expansions'

405 tree.children = [self.visit(ST('expansion', [option if i == j else other

406 for j, other in enumerate(tree.children)]))

407 for option in dedup_list(child.children)]

408 self._flatten(tree)

409 break

410

411 def alias(self, tree):

412 rule, alias_name = tree.children

413 if rule.data == 'expansions':

414 aliases = []

415 for child in tree.children[0].children:

416 aliases.append(ST('alias', [child, alias_name]))

417 tree.data = 'expansions'

418 tree.children = aliases

419

420 def expansions(self, tree):

421 self._flatten(tree)

422 # Ensure all children are unique

423 if len(set(tree.children)) != len(tree.children):

424 tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use

425

426

427class RuleTreeToText(Transformer):

428 def expansions(self, x):

429 return x

430

431 def expansion(self, symbols):

432 return symbols, None

433

434 def alias(self, x):

435 (expansion, _alias), alias = x

436 assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed

437 return expansion, alias.name

438

439

440class PrepareAnonTerminals(Transformer_InPlace):

441 """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""

442

443 def __init__(self, terminals):

444 self.terminals = terminals

445 self.term_set = {td.name for td in self.terminals}

446 self.term_reverse = {td.pattern: td for td in terminals}

447 self.i = 0

448 self.rule_options = None

449

450 @inline_args

451 def pattern(self, p):

452 value = p.value

453 if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:

454 raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)

455

456 term_name = None

457

458 if isinstance(p, PatternStr):

459 try:

460 # If already defined, use the user-defined terminal name

461 term_name = self.term_reverse[p].name

462 except KeyError:

463 # Try to assign an indicative anon-terminal name

464 try:

465 term_name = _TERMINAL_NAMES[value]

466 except KeyError:

467 if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:

468 term_name = value.upper()

469

470 if term_name in self.term_set:

471 term_name = None

472

473 elif isinstance(p, PatternRE):

474 if p in self.term_reverse: # Kind of a weird placement.name

475 term_name = self.term_reverse[p].name

476 else:

477 assert False, p

478

479 if term_name is None:

480 term_name = '__ANON_%d' % self.i

481 self.i += 1

482

483 if term_name not in self.term_set:

484 assert p not in self.term_reverse

485 self.term_set.add(term_name)

486 termdef = TerminalDef(term_name, p)

487 self.term_reverse[p] = termdef

488 self.terminals.append(termdef)

489

490 filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)

491

492 return Terminal(term_name, filter_out=filter_out)

493

494

495class _ReplaceSymbols(Transformer_InPlace):

496 """Helper for ApplyTemplates"""

497

498 def __init__(self):

499 self.names = {}

500

501 def value(self, c):

502 if len(c) == 1 and isinstance(c[0], Symbol) and c[0].name in self.names:

503 return self.names[c[0].name]

504 return self.__default__('value', c, None)

505

506 def template_usage(self, c):

507 name = c[0].name

508 if name in self.names:

509 return self.__default__('template_usage', [self.names[name]] + c[1:], None)

510 return self.__default__('template_usage', c, None)

511

512

513class ApplyTemplates(Transformer_InPlace):

514 """Apply the templates, creating new rules that represent the used templates"""

515

516 def __init__(self, rule_defs):

517 self.rule_defs = rule_defs

518 self.replacer = _ReplaceSymbols()

519 self.created_templates = set()

520

521 def template_usage(self, c):

522 name = c[0].name

523 args = c[1:]

524 result_name = "%s{%s}" % (name, ",".join(a.name for a in args))

525 if result_name not in self.created_templates:

526 self.created_templates.add(result_name)

527 (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)

528 assert len(params) == len(args), args

529 result_tree = deepcopy(tree)

530 self.replacer.names = dict(zip(params, args))

531 self.replacer.transform(result_tree)

532 self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))

533 return NonTerminal(result_name)

534

535

536def _rfind(s, choices):

537 return max(s.rfind(c) for c in choices)

538

539

540def eval_escaping(s):

541 w = ''

542 i = iter(s)

543 for n in i:

544 w += n

545 if n == '\\':

546 try:

547 n2 = next(i)

548 except StopIteration:

549 raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)

550 if n2 == '\\':

551 w += '\\\\'

552 elif n2 not in 'Uuxnftr':

553 w += '\\'

554 w += n2

555 w = w.replace('\\"', '"').replace("'", "\\'")

556

557 to_eval = "u'''%s'''" % w

558 try:

559 s = literal_eval(to_eval)

560 except SyntaxError as e:

561 raise GrammarError(s, e)

562

563 return s

564

565

566def _literal_to_pattern(literal):

567 assert isinstance(literal, Token)

568 v = literal.value

569 flag_start = _rfind(v, '/"')+1

570 assert flag_start > 0

571 flags = v[flag_start:]

572 assert all(f in _RE_FLAGS for f in flags), flags

573

574 if literal.type == 'STRING' and '\n' in v:

575 raise GrammarError('You cannot put newlines in string literals')

576

577 if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:

578 raise GrammarError('You can only use newlines in regular expressions '

579 'with the `x` (verbose) flag')

580

581 v = v[:flag_start]

582 assert v[0] == v[-1] and v[0] in '"/'

583 x = v[1:-1]

584

585 s = eval_escaping(x)

586

587 if s == "":

588 raise GrammarError("Empty terminals are not allowed (%s)" % literal)

589

590 if literal.type == 'STRING':

591 s = s.replace('\\\\', '\\')

592 return PatternStr(s, flags, raw=literal.value)

593 elif literal.type == 'REGEXP':

594 return PatternRE(s, flags, raw=literal.value)

595 else:

596 assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'

597

598

599@inline_args

600class PrepareLiterals(Transformer_InPlace):

601 def literal(self, literal):

602 return ST('pattern', [_literal_to_pattern(literal)])

603

604 def range(self, start, end):

605 assert start.type == end.type == 'STRING'

606 start = start.value[1:-1]

607 end = end.value[1:-1]

608 assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1

609 regexp = '[%s-%s]' % (start, end)

610 return ST('pattern', [PatternRE(regexp)])

611

612

613def _make_joined_pattern(regexp, flags_set):

614 return PatternRE(regexp, ())

615

616class TerminalTreeToPattern(Transformer_NonRecursive):

617 def pattern(self, ps):

618 p ,= ps

619 return p

620

621 def expansion(self, items):

622 assert items

623 if len(items) == 1:

624 return items[0]

625

626 pattern = ''.join(i.to_regexp() for i in items)

627 return _make_joined_pattern(pattern, {i.flags for i in items})

628

629 def expansions(self, exps):

630 if len(exps) == 1:

631 return exps[0]

632

633 # Do a bit of sorting to make sure that the longest option is returned

634 # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)

635 exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))

636

637 pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))

638 return _make_joined_pattern(pattern, {i.flags for i in exps})

639

640 def expr(self, args):

641 inner, op = args[:2]

642 if op == '~':

643 if len(args) == 3:

644 op = "{%d}" % int(args[2])

645 else:

646 mn, mx = map(int, args[2:])

647 if mx < mn:

648 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))

649 op = "{%d,%d}" % (mn, mx)

650 else:

651 assert len(args) == 2

652 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)

653

654 def maybe(self, expr):

655 return self.expr(expr + ['?'])

656

657 def alias(self, t):

658 raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")

659

660 def value(self, v):

661 return v[0]

662

663

664class ValidateSymbols(Transformer_InPlace):

665 def value(self, v):

666 v ,= v

667 assert isinstance(v, (Tree, Symbol))

668 return v

669

670

671def nr_deepcopy_tree(t):

672 """Deepcopy tree `t` without recursion"""

673 return Transformer_NonRecursive(False).transform(t)

674

675

676class Grammar:

677

678 term_defs: List[Tuple[str, Tuple[Tree, int]]]

679 rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]

680 ignore: List[str]

681

682 def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None:

683 self.term_defs = term_defs

684 self.rule_defs = rule_defs

685 self.ignore = ignore

686

687 def compile(self, start, terminals_to_keep):

688 # We change the trees in-place (to support huge grammars)

689 # So deepcopy allows calling compile more than once.

690 term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]

691 rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]

692

693 # ===================

694 # Compile Terminals

695 # ===================

696

697 # Convert terminal-trees to strings/regexps

698

699 for name, (term_tree, priority) in term_defs:

700 if term_tree is None: # Terminal added through %declare

701 continue

702 expansions = list(term_tree.find_data('expansion'))

703 if len(expansions) == 1 and not expansions[0].children:

704 raise GrammarError("Terminals cannot be empty (%s)" % name)

705

706 transformer = PrepareLiterals() * TerminalTreeToPattern()

707 terminals = [TerminalDef(name, transformer.transform(term_tree), priority)

708 for name, (term_tree, priority) in term_defs if term_tree]

709

710 # =================

711 # Compile Rules

712 # =================

713

714 # 1. Pre-process terminals

715 anon_tokens_transf = PrepareAnonTerminals(terminals)

716 transformer = PrepareLiterals() * ValidateSymbols() * anon_tokens_transf # Adds to terminals

717

718 # 2. Inline Templates

719

720 transformer *= ApplyTemplates(rule_defs)

721

722 # 3. Convert EBNF to BNF (and apply step 1 & 2)

723 ebnf_to_bnf = EBNF_to_BNF()

724 rules = []

725 i = 0

726 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates

727 name, params, rule_tree, options = rule_defs[i]

728 i += 1

729 if len(params) != 0: # Dont transform templates

730 continue

731 rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None

732 ebnf_to_bnf.rule_options = rule_options

733 ebnf_to_bnf.prefix = name

734 anon_tokens_transf.rule_options = rule_options

735 tree = transformer.transform(rule_tree)

736 res = ebnf_to_bnf.transform(tree)

737 rules.append((name, res, options))

738 rules += ebnf_to_bnf.new_rules

739

740 assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"

741

742 # 4. Compile tree to Rule objects

743 rule_tree_to_text = RuleTreeToText()

744

745 simplify_rule = SimplifyRule_Visitor()

746 compiled_rules = []

747 for rule_content in rules:

748 name, tree, options = rule_content

749 simplify_rule.visit(tree)

750 expansions = rule_tree_to_text.transform(tree)

751

752 for i, (expansion, alias) in enumerate(expansions):

753 if alias and name.startswith('_'):

754 raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))

755

756 empty_indices = [x==_EMPTY for x in expansion]

757 if any(empty_indices):

758 exp_options = copy(options) or RuleOptions()

759 exp_options.empty_indices = empty_indices

760 expansion = [x for x in expansion if x!=_EMPTY]

761 else:

762 exp_options = options

763

764 for sym in expansion:

765 assert isinstance(sym, Symbol)

766 if sym.is_term and exp_options and exp_options.keep_all_tokens:

767 sym.filter_out = False

768 rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)

769 compiled_rules.append(rule)

770

771 # Remove duplicates of empty rules, throw error for non-empty duplicates

772 if len(set(compiled_rules)) != len(compiled_rules):

773 duplicates = classify(compiled_rules, lambda x: x)

774 for dups in duplicates.values():

775 if len(dups) > 1:

776 if dups[0].expansion:

777 raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"

778 % ''.join('\n * %s' % i for i in dups))

779

780 # Empty rule; assert all other attributes are equal

781 assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)

782

783 # Remove duplicates

784 compiled_rules = list(set(compiled_rules))

785

786 # Filter out unused rules

787 while True:

788 c = len(compiled_rules)

789 used_rules = {s for r in compiled_rules

790 for s in r.expansion

791 if isinstance(s, NonTerminal)

792 and s != r.origin}

793 used_rules |= {NonTerminal(s) for s in start}

794 compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)

795 for r in unused:

796 logger.debug("Unused rule: %s", r)

797 if len(compiled_rules) == c:

798 break

799

800 # Filter out unused terminals

801 if terminals_to_keep != '*':

802 used_terms = {t.name for r in compiled_rules

803 for t in r.expansion

804 if isinstance(t, Terminal)}

805 terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)

806 if unused:

807 logger.debug("Unused terminals: %s", [t.name for t in unused])

808

809 return terminals, compiled_rules, self.ignore

810

811

812PackageResource = namedtuple('PackageResource', 'pkg_name path')

813

814

815class FromPackageLoader:

816 """

817 Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.

818 This allows them to be compatible even from within zip files.

819

820 Relative imports are handled, so you can just freely use them.

821

822 pkg_name: The name of the package. You can probably provide `__name__` most of the time

823 search_paths: All the path that will be search on absolute imports.

824 """

825

826 pkg_name: str

827 search_paths: Sequence[str]

828

829 def __init__(self, pkg_name: str, search_paths: Sequence[str]=("", )) -> None:

830 self.pkg_name = pkg_name

831 self.search_paths = search_paths

832

833 def __repr__(self):

834 return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

835

836 def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:

837 if base_path is None:

838 to_try = self.search_paths

839 else:

840 # Check whether or not the importing grammar was loaded by this module.

841 if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:

842 # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway

843 raise IOError()

844 to_try = [base_path.path]

845

846 err = None

847 for path in to_try:

848 full_path = os.path.join(path, grammar_path)

849 try:

850 text: Optional[bytes] = pkgutil.get_data(self.pkg_name, full_path)

851 except IOError as e:

852 err = e

853 continue

854 else:

855 return PackageResource(self.pkg_name, full_path), (text.decode() if text else '')

856

857 raise IOError('Cannot find grammar in given paths') from err

858

859

860stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

861

862

863

864def resolve_term_references(term_dict):

865 # TODO Solve with transitive closure (maybe)

866

867 while True:

868 changed = False

869 for name, token_tree in term_dict.items():

870 if token_tree is None: # Terminal added through %declare

871 continue

872 for exp in token_tree.find_data('value'):

873 item ,= exp.children

874 if isinstance(item, NonTerminal):

875 raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))

876 elif isinstance(item, Terminal):

877 try:

878 term_value = term_dict[item.name]

879 except KeyError:

880 raise GrammarError("Terminal used but not defined: %s" % item.name)

881 assert term_value is not None

882 exp.children[0] = term_value

883 changed = True

884 else:

885 assert isinstance(item, Tree)

886 if not changed:

887 break

888

889 for name, term in term_dict.items():

890 if term: # Not just declared

891 for child in term.children:

892 ids = [id(x) for x in child.iter_subtrees()]

893 if id(term) in ids:

894 raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)

895

896

897

898def symbol_from_strcase(s):

899 assert isinstance(s, str)

900 return Terminal(s, filter_out=s.startswith('_')) if s.isupper() else NonTerminal(s)

901

902@inline_args

903class PrepareGrammar(Transformer_InPlace):

904 def terminal(self, name):

905 return Terminal(str(name), filter_out=name.startswith('_'))

906

907 def nonterminal(self, name):

908 return NonTerminal(name.value)

909

910

911def _find_used_symbols(tree):

912 assert tree.data == 'expansions'

913 return {t.name for x in tree.find_data('expansion')

914 for t in x.scan_values(lambda t: isinstance(t, Symbol))}

915

916

917def _get_parser():

918 try:

919 return _get_parser.cache

920 except AttributeError:

921 terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

922

923 rules = [(name.lstrip('?'), x, RuleOptions(expand1=name.startswith('?')))

924 for name, x in RULES.items()]

925 rules = [Rule(NonTerminal(r), [symbol_from_strcase(s) for s in x.split()], i, None, o)

926 for r, xs, o in rules for i, x in enumerate(xs)]

927

928 callback = ParseTreeBuilder(rules, ST).create_callback()

929 import re

930 lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH'])

931 parser_conf = ParserConf(rules, callback, ['start'])

932 lexer_conf.lexer_type = 'basic'

933 parser_conf.parser_type = 'lalr'

934 _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)

935 return _get_parser.cache

936

937GRAMMAR_ERRORS = [

938 ('Incorrect type of value', ['a: 1\n']),

939 ('Unclosed parenthesis', ['a: (\n']),

940 ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),

941 ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),

942 ('Illegal name for rules or terminals', ['Aa:\n']),

943 ('Alias expects lowercase name', ['a: -> "a"\n']),

944 ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),

945 ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),

946 ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),

947 ('Terminal names cannot contain dots', ['A.B\n']),

948 ('Expecting rule or terminal definition', ['"a"\n']),

949 ('%import expects a name', ['%import "a"\n']),

950 ('%ignore expects a value', ['%ignore %import\n']),

951 ]

952

953def _translate_parser_exception(parse, e):

954 error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)

955 if error:

956 return error

957 elif 'STRING' in e.expected:

958 return "Expecting a value"

959

960def _parse_grammar(text, name, start='start'):

961 try:

962 tree = _get_parser().parse(text + '\n', start)

963 except UnexpectedCharacters as e:

964 context = e.get_context(text)

965 raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %

966 (e.line, e.column, name, context))

967 except UnexpectedToken as e:

968 context = e.get_context(text)

969 error = _translate_parser_exception(_get_parser().parse, e)

970 if error:

971 raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))

972 raise

973

974 return PrepareGrammar().transform(tree)

975

976

977def _error_repr(error):

978 if isinstance(error, UnexpectedToken):

979 error2 = _translate_parser_exception(_get_parser().parse, error)

980 if error2:

981 return error2

982 expected = ', '.join(error.accepts or error.expected)

983 return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)

984 else:

985 return str(error)

986

987def _search_interactive_parser(interactive_parser, predicate):

988 def expand(node):

989 path, p = node

990 for choice in p.choices():

991 t = Token(choice, '')

992 try:

993 new_p = p.feed_token(t)

994 except ParseError: # Illegal

995 pass

996 else:

997 yield path + (choice,), new_p

998

999 for path, p in bfs_all_unique([((), interactive_parser)], expand):

1000 if predicate(p):

1001 return path, p

1002

1003def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]:

1004 errors = []

1005 def on_error(e):

1006 errors.append((e, _error_repr(e)))

1007

1008 # recover to a new line

1009 token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())

1010 for token_type in token_path:

1011 e.interactive_parser.feed_token(Token(token_type, ''))

1012 e.interactive_parser.feed_token(Token('_NL', '\n'))

1013 return True

1014

1015 _tree = _get_parser().parse(text + '\n', start, on_error=on_error)

1016

1017 errors_by_line = classify(errors, lambda e: e[0].line)

1018 errors = [el[0] for el in errors_by_line.values()] # already sorted

1019

1020 for e in errors:

1021 e[0].interactive_parser = None

1022 return errors

1023

1024

1025def _get_mangle(prefix, aliases, base_mangle=None):

1026 def mangle(s):

1027 if s in aliases:

1028 s = aliases[s]

1029 else:

1030 if s[0] == '_':

1031 s = '_%s__%s' % (prefix, s[1:])

1032 else:

1033 s = '%s__%s' % (prefix, s)

1034 if base_mangle is not None:

1035 s = base_mangle(s)

1036 return s

1037 return mangle

1038

1039def _mangle_definition_tree(exp, mangle):

1040 if mangle is None:

1041 return exp

1042 exp = deepcopy(exp) # TODO: is this needed?

1043 for t in exp.iter_subtrees():

1044 for i, c in enumerate(t.children):

1045 if isinstance(c, Symbol):

1046 t.children[i] = c.renamed(mangle)

1047

1048 return exp

1049

1050def _make_rule_tuple(modifiers_tree, name, params, priority_tree, expansions):

1051 if modifiers_tree.children:

1052 m ,= modifiers_tree.children

1053 expand1 = '?' in m

1054 if expand1 and name.startswith('_'):

1055 raise GrammarError("Inlined rules (_rule) cannot use the ?rule modifier.")

1056 keep_all_tokens = '!' in m

1057 else:

1058 keep_all_tokens = False

1059 expand1 = False

1060

1061 if priority_tree.children:

1062 p ,= priority_tree.children

1063 priority = int(p)

1064 else:

1065 priority = None

1066

1067 if params is not None:

1068 params = [t.value for t in params.children] # For the grammar parser

1069

1070 return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,

1071 template_source=(name if params else None))

1072

1073

1074class Definition:

1075 def __init__(self, is_term, tree, params=(), options=None):

1076 self.is_term = is_term

1077 self.tree = tree

1078 self.params = tuple(params)

1079 self.options = options

1080

1081class GrammarBuilder:

1082

1083 global_keep_all_tokens: bool

1084 import_paths: List[Union[str, Callable]]

1085 used_files: Dict[str, str]

1086

1087 _definitions: Dict[str, Definition]

1088 _ignore_names: List[str]

1089

1090 def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None:

1091 self.global_keep_all_tokens = global_keep_all_tokens

1092 self.import_paths = import_paths or []

1093 self.used_files = used_files or {}

1094

1095 self._definitions: Dict[str, Definition] = {}

1096 self._ignore_names: List[str] = []

1097

1098 def _grammar_error(self, is_term, msg, *names):

1099 args = {}

1100 for i, name in enumerate(names, start=1):

1101 postfix = '' if i == 1 else str(i)

1102 args['name' + postfix] = name

1103 args['type' + postfix] = lowercase_type = ("rule", "terminal")[is_term]

1104 args['Type' + postfix] = lowercase_type.title()

1105 raise GrammarError(msg.format(**args))

1106

1107 def _check_options(self, is_term, options):

1108 if is_term:

1109 if options is None:

1110 options = 1

1111 elif not isinstance(options, int):

1112 raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))

1113 else:

1114 if options is None:

1115 options = RuleOptions()

1116 elif not isinstance(options, RuleOptions):

1117 raise GrammarError("Rules require a RuleOptions instance as 'options'")

1118 if self.global_keep_all_tokens:

1119 options.keep_all_tokens = True

1120 return options

1121

1122

1123 def _define(self, name, is_term, exp, params=(), options=None, *, override=False):

1124 if name in self._definitions:

1125 if not override:

1126 self._grammar_error(is_term, "{Type} '{name}' defined more than once", name)

1127 elif override:

1128 self._grammar_error(is_term, "Cannot override a nonexisting {type} {name}", name)

1129

1130 if name.startswith('__'):

1131 self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name)

1132

1133 self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options))

1134

1135 def _extend(self, name, is_term, exp, params=(), options=None):

1136 if name not in self._definitions:

1137 self._grammar_error(is_term, "Can't extend {type} {name} as it wasn't defined before", name)

1138

1139 d = self._definitions[name]

1140

1141 if is_term != d.is_term:

1142 self._grammar_error(is_term, "Cannot extend {type} {name} - one is a terminal, while the other is not.", name)

1143 if tuple(params) != d.params:

1144 self._grammar_error(is_term, "Cannot extend {type} with different parameters: {name}", name)

1145

1146 if d.tree is None:

1147 self._grammar_error(is_term, "Can't extend {type} {name} - it is abstract.", name)

1148

1149 # TODO: think about what to do with 'options'

1150 base = d.tree

1151

1152 assert isinstance(base, Tree) and base.data == 'expansions'

1153 base.children.insert(0, exp)

1154

1155 def _ignore(self, exp_or_name):

1156 if isinstance(exp_or_name, str):

1157 self._ignore_names.append(exp_or_name)

1158 else:

1159 assert isinstance(exp_or_name, Tree)

1160 t = exp_or_name

1161 if t.data == 'expansions' and len(t.children) == 1:

1162 t2 ,= t.children

1163 if t2.data=='expansion' and len(t2.children) == 1:

1164 item ,= t2.children

1165 if item.data == 'value':

1166 item ,= item.children

1167 if isinstance(item, Terminal):

1168 # Keep terminal name, no need to create a new definition

1169 self._ignore_names.append(item.name)

1170 return

1171

1172 name = '__IGNORE_%d'% len(self._ignore_names)

1173 self._ignore_names.append(name)

1174 self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY)

1175

1176 def _unpack_import(self, stmt, grammar_name):

1177 if len(stmt.children) > 1:

1178 path_node, arg1 = stmt.children

1179 else:

1180 path_node, = stmt.children

1181 arg1 = None

1182

1183 if isinstance(arg1, Tree): # Multi import

1184 dotted_path = tuple(path_node.children)

1185 names = arg1.children

1186 aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names

1187 else: # Single import

1188 dotted_path = tuple(path_node.children[:-1])

1189 if not dotted_path:

1190 name ,= path_node.children

1191 raise GrammarError("Nothing was imported from grammar `%s`" % name)

1192 name = path_node.children[-1] # Get name from dotted path

1193 aliases = {name.value: (arg1 or name).value} # Aliases if exist

1194

1195 if path_node.data == 'import_lib': # Import from library

1196 base_path = None

1197 else: # Relative import

1198 if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script

1199 try:

1200 base_file = os.path.abspath(sys.modules['__main__'].__file__)

1201 except AttributeError:

1202 base_file = None

1203 else:

1204 base_file = grammar_name # Import relative to grammar file path if external grammar file

1205 if base_file:

1206 if isinstance(base_file, PackageResource):

1207 base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])

1208 else:

1209 base_path = os.path.split(base_file)[0]

1210 else:

1211 base_path = os.path.abspath(os.path.curdir)

1212

1213 return dotted_path, base_path, aliases

1214

1215 def _unpack_definition(self, tree, mangle):

1216

1217 if tree.data == 'rule':

1218 name, params, exp, opts = _make_rule_tuple(*tree.children)

1219 is_term = False

1220 else:

1221 name = tree.children[0].value

1222 params = () # TODO terminal templates

1223 opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority

1224 exp = tree.children[-1]

1225 is_term = True

1226

1227 if mangle is not None:

1228 params = tuple(mangle(p) for p in params)

1229 name = mangle(name)

1230

1231 exp = _mangle_definition_tree(exp, mangle)

1232 return name, is_term, exp, params, opts

1233

1234

1235 def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None:

1236 tree = _parse_grammar(grammar_text, grammar_name)

1237

1238 imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {}

1239

1240 for stmt in tree.children:

1241 if stmt.data == 'import':

1242 dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)

1243 try:

1244 import_base_path, import_aliases = imports[dotted_path]

1245 assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)

1246 import_aliases.update(aliases)

1247 except KeyError:

1248 imports[dotted_path] = base_path, aliases

1249

1250 for dotted_path, (base_path, aliases) in imports.items():

1251 self.do_import(dotted_path, base_path, aliases, mangle)

1252

1253 for stmt in tree.children:

1254 if stmt.data in ('term', 'rule'):

1255 self._define(*self._unpack_definition(stmt, mangle))

1256 elif stmt.data == 'override':

1257 r ,= stmt.children

1258 self._define(*self._unpack_definition(r, mangle), override=True)

1259 elif stmt.data == 'extend':

1260 r ,= stmt.children

1261 self._extend(*self._unpack_definition(r, mangle))

1262 elif stmt.data == 'ignore':

1263 # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar

1264 if mangle is None:

1265 self._ignore(*stmt.children)

1266 elif stmt.data == 'declare':

1267 for symbol in stmt.children:

1268 assert isinstance(symbol, Symbol), symbol

1269 is_term = isinstance(symbol, Terminal)

1270 if mangle is None:

1271 name = symbol.name

1272 else:

1273 name = mangle(symbol.name)

1274 self._define(name, is_term, None)

1275 elif stmt.data == 'import':

1276 pass

1277 else:

1278 assert False, stmt

1279

1280

1281 term_defs = { name: d.tree

1282 for name, d in self._definitions.items()

1283 if d.is_term

1284 }

1285 resolve_term_references(term_defs)

1286

1287

1288 def _remove_unused(self, used):

1289 def rule_dependencies(symbol):

1290 try:

1291 d = self._definitions[symbol]

1292 except KeyError:

1293 return []

1294 if d.is_term:

1295 return []

1296 return _find_used_symbols(d.tree) - set(d.params)

1297

1298 _used = set(bfs(used, rule_dependencies))

1299 self._definitions = {k: v for k, v in self._definitions.items() if k in _used}

1300

1301

1302 def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None:

1303 assert dotted_path

1304 mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)

1305 grammar_path = os.path.join(*dotted_path) + EXT

1306 to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]

1307 for source in to_try:

1308 try:

1309 if callable(source):

1310 joined_path, text = source(base_path, grammar_path)

1311 else:

1312 joined_path = os.path.join(source, grammar_path)

1313 with open(joined_path, encoding='utf8') as f:

1314 text = f.read()

1315 except IOError:

1316 continue

1317 else:

1318 h = sha256_digest(text)

1319 if self.used_files.get(joined_path, h) != h:

1320 raise RuntimeError("Grammar file was changed during importing")

1321 self.used_files[joined_path] = h

1322

1323 gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)

1324 gb.load_grammar(text, joined_path, mangle)

1325 gb._remove_unused(map(mangle, aliases))

1326 for name in gb._definitions:

1327 if name in self._definitions:

1328 raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))

1329

1330 self._definitions.update(**gb._definitions)

1331 break

1332 else:

1333 # Search failed. Make Python throw a nice error.

1334 open(grammar_path, encoding='utf8')

1335 assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)

1336

1337

1338 def validate(self) -> None:

1339 for name, d in self._definitions.items():

1340 params = d.params

1341 exp = d.tree

1342

1343 for i, p in enumerate(params):

1344 if p in self._definitions:

1345 raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))

1346 if p in params[:i]:

1347 raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))

1348

1349 if exp is None: # Remaining checks don't apply to abstract rules/terminals (created with %declare)

1350 continue

1351

1352 for temp in exp.find_data('template_usage'):

1353 sym = temp.children[0].name

1354 args = temp.children[1:]

1355 if sym not in params:

1356 if sym not in self._definitions:

1357 self._grammar_error(d.is_term, "Template '%s' used but not defined (in {type} {name})" % sym, name)

1358 if len(args) != len(self._definitions[sym].params):

1359 expected, actual = len(self._definitions[sym].params), len(args)

1360 self._grammar_error(d.is_term, "Wrong number of template arguments used for {name} "

1361 "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)

1362

1363 for sym in _find_used_symbols(exp):

1364 if sym not in self._definitions and sym not in params:

1365 self._grammar_error(d.is_term, "{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)

1366

1367 if not set(self._definitions).issuperset(self._ignore_names):

1368 raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))

1369

1370 def build(self) -> Grammar:

1371 self.validate()

1372 rule_defs = []

1373 term_defs = []

1374 for name, d in self._definitions.items():

1375 (params, exp, options) = d.params, d.tree, d.options

1376 if d.is_term:

1377 assert len(params) == 0

1378 term_defs.append((name, (exp, options)))

1379 else:

1380 rule_defs.append((name, params, exp, options))

1381 # resolve_term_references(term_defs)

1382 return Grammar(rule_defs, term_defs, self._ignore_names)

1383

1384

1385def verify_used_files(file_hashes):

1386 for path, old in file_hashes.items():

1387 text = None

1388 if isinstance(path, str) and os.path.exists(path):

1389 with open(path, encoding='utf8') as f:

1390 text = f.read()

1391 elif isinstance(path, PackageResource):

1392 with suppress(IOError):

1393 text = pkgutil.get_data(*path).decode('utf-8')

1394 if text is None: # We don't know how to load the path. ignore it.

1395 continue

1396

1397 current = sha256_digest(text)

1398 if old != current:

1399 logger.info("File %r changed, rebuilding Parser" % path)

1400 return False

1401 return True

1402

1403def list_grammar_imports(grammar, import_paths=[]):

1404 "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"

1405 builder = GrammarBuilder(False, import_paths)

1406 builder.load_grammar(grammar, '<string>')

1407 return list(builder.used_files.keys())

1408

1409def load_grammar(grammar, source, import_paths, global_keep_all_tokens):

1410 builder = GrammarBuilder(global_keep_all_tokens, import_paths)

1411 builder.load_grammar(grammar, source)

1412 return builder.build(), builder.used_files

1413

1414

1415def sha256_digest(s: str) -> str:

1416 """Get the sha256 digest of a string

1417

1418 Supports the `usedforsecurity` argument for Python 3.9+ to allow running on

1419 a FIPS-enabled system.

1420 """

1421 if sys.version_info >= (3, 9):

1422 return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest()

1423 else:

1424 return hashlib.sha256(s.encode('utf8')).hexdigest()

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/load_grammar.py: 75%

852 statements