Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/load

1"""Parses and compiles Lark grammars into an internal representation.

2"""

4import hashlib

5import os.path

6import sys

7from collections import namedtuple

8from copy import copy, deepcopy

9import pkgutil

10from ast import literal_eval

11from contextlib import suppress

12from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence, Generator

14from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors, OrderedSet

15from .lexer import Token, TerminalDef, PatternStr, PatternRE, Pattern

17from .parse_tree_builder import ParseTreeBuilder

18from .parser_frontends import ParsingFrontend

19from .common import LexerConf, ParserConf

20from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY

21from .utils import classify, dedup_list

22from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput

24from .tree import Tree, SlottedTree as ST

25from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive

26inline_args = v_args(inline=True)

28IMPORT_PATHS = ['grammars']

30EXT = '.lark'

32_RE_FLAGS = 'imslux'

34_EMPTY = Symbol('__empty__')

36_TERMINAL_NAMES = {

37 '.' : 'DOT',

38 ',' : 'COMMA',

39 ':' : 'COLON',

40 ';' : 'SEMICOLON',

41 '+' : 'PLUS',

42 '-' : 'MINUS',

43 '*' : 'STAR',

44 '/' : 'SLASH',

45 '\\' : 'BACKSLASH',

46 '|' : 'VBAR',

47 '?' : 'QMARK',

48 '!' : 'BANG',

49 '@' : 'AT',

50 '#' : 'HASH',

51 '$' : 'DOLLAR',

52 '%' : 'PERCENT',

53 '^' : 'CIRCUMFLEX',

54 '&' : 'AMPERSAND',

55 '_' : 'UNDERSCORE',

56 '<' : 'LESSTHAN',

57 '>' : 'MORETHAN',

58 '=' : 'EQUAL',

59 '"' : 'DBLQUOTE',

60 '\'' : 'QUOTE',

61 '`' : 'BACKQUOTE',

62 '~' : 'TILDE',

63 '(' : 'LPAR',

64 ')' : 'RPAR',

65 '{' : 'LBRACE',

66 '}' : 'RBRACE',

67 '[' : 'LSQB',

68 ']' : 'RSQB',

69 '\n' : 'NEWLINE',

70 '\r\n' : 'CRLF',

71 '\t' : 'TAB',

72 ' ' : 'SPACE',

73}

75# Grammar Parser

76TERMINALS = {

77 '_LPAR': r'\(',

78 '_RPAR': r'\)',

79 '_LBRA': r'\[',

80 '_RBRA': r'\]',

81 '_LBRACE': r'\{',

82 '_RBRACE': r'\}',

83 'OP': '[+*]|[?](?![a-z_])',

84 '_COLON': ':',

85 '_COMMA': ',',

86 '_OR': r'\|',

87 '_DOT': r'\.(?!\.)',

88 '_DOTDOT': r'\.\.',

89 'TILDE': '~',

90 'RULE_MODIFIERS': '(!|![?]?|[?]!?)(?=[_a-z])',

91 'RULE': '_?[a-z][_a-z0-9]*',

92 'TERMINAL': '_?[A-Z][_A-Z0-9]*',

93 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',

94 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,

95 '_NL': r'(\r?\n)+\s*',

96 '_NL_OR': r'(\r?\n)+\s*\|',

97 'WS': r'[ \t]+',

98 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*',

99 'BACKSLASH': r'\\[ ]*\n',

100 '_TO': '->',

101 '_IGNORE': r'%ignore',

102 '_OVERRIDE': r'%override',

103 '_DECLARE': r'%declare',

104 '_EXTEND': r'%extend',

105 '_IMPORT': r'%import',

106 'NUMBER': r'[+-]?\d+',

107}

108

109RULES = {

110 'start': ['_list'],

111 '_list': ['_item', '_list _item'],

112 '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],

113

114 'rule': ['rule_modifiers RULE template_params priority _COLON expansions _NL'],

115 'rule_modifiers': ['RULE_MODIFIERS',

116 ''],

117 'priority': ['_DOT NUMBER',

118 ''],

119 'template_params': ['_LBRACE _template_params _RBRACE',

120 ''],

121 '_template_params': ['RULE',

122 '_template_params _COMMA RULE'],

123 'expansions': ['_expansions'],

124 '_expansions': ['alias',

125 '_expansions _OR alias',

126 '_expansions _NL_OR alias'],

127

128 '?alias': ['expansion _TO nonterminal', 'expansion'],

129 'expansion': ['_expansion'],

130

131 '_expansion': ['', '_expansion expr'],

132

133 '?expr': ['atom',

134 'atom OP',

135 'atom TILDE NUMBER',

136 'atom TILDE NUMBER _DOTDOT NUMBER',

137 ],

138

139 '?atom': ['_LPAR expansions _RPAR',

140 'maybe',

141 'value'],

142

143 'value': ['terminal',

144 'nonterminal',

145 'literal',

146 'range',

147 'template_usage'],

148

149 'terminal': ['TERMINAL'],

150 'nonterminal': ['RULE'],

151

152 '?name': ['RULE', 'TERMINAL'],

153 '?symbol': ['terminal', 'nonterminal'],

154

155 'maybe': ['_LBRA expansions _RBRA'],

156 'range': ['STRING _DOTDOT STRING'],

157

158 'template_usage': ['nonterminal _LBRACE _template_args _RBRACE'],

159 '_template_args': ['value',

160 '_template_args _COMMA value'],

161

162 'term': ['TERMINAL _COLON expansions _NL',

163 'TERMINAL _DOT NUMBER _COLON expansions _NL'],

164 'override': ['_OVERRIDE rule',

165 '_OVERRIDE term'],

166 'extend': ['_EXTEND rule',

167 '_EXTEND term'],

168 'ignore': ['_IGNORE expansions _NL'],

169 'declare': ['_DECLARE _declare_args _NL'],

170 'import': ['_IMPORT _import_path _NL',

171 '_IMPORT _import_path _LPAR name_list _RPAR _NL',

172 '_IMPORT _import_path _TO name _NL'],

173

174 '_import_path': ['import_lib', 'import_rel'],

175 'import_lib': ['_import_args'],

176 'import_rel': ['_DOT _import_args'],

177 '_import_args': ['name', '_import_args _DOT name'],

178

179 'name_list': ['_name_list'],

180 '_name_list': ['name', '_name_list _COMMA name'],

181

182 '_declare_args': ['symbol', '_declare_args symbol'],

183 'literal': ['REGEXP', 'STRING'],

184}

185

186

187# Value 5 keeps the number of states in the lalr parser somewhat minimal

188# It isn't optimal, but close to it. See PR #949

189SMALL_FACTOR_THRESHOLD = 5

190# The Threshold whether repeat via ~ are split up into different rules

191# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,

192# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.

193# (See PR #949)

194REPEAT_BREAK_THRESHOLD = 50

195

196

197class FindRuleSize(Transformer):

198 def __init__(self, keep_all_tokens: bool):

199 self.keep_all_tokens = keep_all_tokens

200

201 def _will_not_get_removed(self, sym: Symbol) -> bool:

202 if isinstance(sym, NonTerminal):

203 return not sym.name.startswith('_')

204 if isinstance(sym, Terminal):

205 return self.keep_all_tokens or not sym.filter_out

206 if sym is _EMPTY:

207 return False

208 assert False, sym

209

210 def _args_as_int(self, args: List[Union[int, Symbol]]) -> Generator[int, None, None]:

211 for a in args:

212 if isinstance(a, int):

213 yield a

214 elif isinstance(a, Symbol):

215 yield 1 if self._will_not_get_removed(a) else 0

216 else:

217 assert False

218

219 def expansion(self, args) -> int:

220 return sum(self._args_as_int(args))

221

222 def expansions(self, args) -> int:

223 return max(self._args_as_int(args))

224

225

226@inline_args

227class EBNF_to_BNF(Transformer_InPlace):

228 def __init__(self):

229 self.new_rules = []

230 self.rules_cache = {}

231 self.prefix = 'anon'

232 self.i = 0

233 self.rule_options = None

234

235 def _name_rule(self, inner: str):

236 new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)

237 self.i += 1

238 return new_name

239

240 def _add_rule(self, key, name, expansions):

241 t = NonTerminal(name)

242 self.new_rules.append((name, expansions, self.rule_options))

243 self.rules_cache[key] = t

244 return t

245

246 def _add_recurse_rule(self, type_: str, expr: Tree):

247 try:

248 return self.rules_cache[expr]

249 except KeyError:

250 new_name = self._name_rule(type_)

251 t = NonTerminal(new_name)

252 tree = ST('expansions', [

253 ST('expansion', [expr]),

254 ST('expansion', [t, expr])

255 ])

256 return self._add_rule(expr, new_name, tree)

257

258 def _add_repeat_rule(self, a, b, target, atom):

259 """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.

260

261 When called recursively (into target), it repeats atom for x(n) times, where:

262 x(0) = 1

263 x(n) = a(n) * x(n-1) + b

264

265 Example rule when a=3, b=4:

266

267 new_rule: target target target atom atom atom atom

268

269 """

270 key = (a, b, target, atom)

271 try:

272 return self.rules_cache[key]

273 except KeyError:

274 new_name = self._name_rule('repeat_a%d_b%d' % (a, b))

275 tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])

276 return self._add_rule(key, new_name, tree)

277

278 def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):

279 """Creates a rule that matches atom 0 to (a*n+b)-1 times.

280

281 When target matches n times atom, and target_opt 0 to n-1 times target_opt,

282

283 First we generate target * i followed by target_opt, for i from 0 to a-1

284 These match 0 to n*a - 1 times atom

285

286 Then we generate target * a followed by atom * i, for i from 0 to b-1

287 These match n*a to n*a + b-1 times atom

288

289 The created rule will not have any shift/reduce conflicts so that it can be used with lalr

290

291 Example rule when a=3, b=4:

292

293 new_rule: target_opt

294 | target target_opt

295 | target target target_opt

296

297 | target target target

298 | target target target atom

299 | target target target atom atom

300 | target target target atom atom atom

301

302 """

303 key = (a, b, target, atom, "opt")

304 try:

305 return self.rules_cache[key]

306 except KeyError:

307 new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))

308 tree = ST('expansions', [

309 ST('expansion', [target]*i + [target_opt]) for i in range(a)

310 ] + [

311 ST('expansion', [target]*a + [atom]*i) for i in range(b)

312 ])

313 return self._add_rule(key, new_name, tree)

314

315 def _generate_repeats(self, rule: Tree, mn: int, mx: int):

316 """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.

317 """

318 # For a small number of repeats, we can take the naive approach

319 if mx < REPEAT_BREAK_THRESHOLD:

320 return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])

321

322 # For large repeat values, we break the repetition into sub-rules.

323 # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.

324 # We then use small_factors to split up mn and diff up into values [(a, b), ...]

325 # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt

326 # to generate a complete rule/expression that matches the corresponding number of repeats

327 mn_target = rule

328 for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):

329 mn_target = self._add_repeat_rule(a, b, mn_target, rule)

330 if mx == mn:

331 return mn_target

332

333 diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less

334 diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)

335 diff_target = rule # Match rule 1 times

336 diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)

337 for a, b in diff_factors[:-1]:

338 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

339 diff_target = self._add_repeat_rule(a, b, diff_target, rule)

340

341 a, b = diff_factors[-1]

342 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

343

344 return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])

345

346 def expr(self, rule: Tree, op: Token, *args):

347 if op.value == '?':

348 empty = ST('expansion', [])

349 return ST('expansions', [rule, empty])

350 elif op.value == '+':

351 # a : b c+ d

352 # -->

353 # a : b _c d

354 # _c : _c c | c;

355 return self._add_recurse_rule('plus', rule)

356 elif op.value == '*':

357 # a : b c* d

358 # -->

359 # a : b _c? d

360 # _c : _c c | c;

361 new_name = self._add_recurse_rule('star', rule)

362 return ST('expansions', [new_name, ST('expansion', [])])

363 elif op.value == '~':

364 if len(args) == 1:

365 mn = mx = int(args[0])

366 else:

367 mn, mx = map(int, args)

368 if mx < mn or mn < 0:

369 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))

370

371 return self._generate_repeats(rule, mn, mx)

372

373 assert False, op

374

375 def maybe(self, rule: Tree):

376 keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens

377 rule_size = FindRuleSize(keep_all_tokens).transform(rule)

378 empty = ST('expansion', [_EMPTY] * rule_size)

379 return ST('expansions', [rule, empty])

380

381

382class SimplifyRule_Visitor(Visitor):

383

384 @staticmethod

385 def _flatten(tree: Tree):

386 while tree.expand_kids_by_data(tree.data):

387 pass

388

389 def expansion(self, tree: Tree):

390 # rules_list unpacking

391 # a : b (c|d) e

392 # -->

393 # a : b c e | b d e

394 #

395 # In AST terms:

396 # expansion(b, expansions(c, d), e)

397 # -->

398 # expansions( expansion(b, c, e), expansion(b, d, e) )

399

400 self._flatten(tree)

401

402 for i, child in enumerate(tree.children):

403 if isinstance(child, Tree) and child.data == 'expansions':

404 tree.data = 'expansions'

405 tree.children = [self.visit(ST('expansion', [option if i == j else other

406 for j, other in enumerate(tree.children)]))

407 for option in dedup_list(child.children)]

408 self._flatten(tree)

409 break

410

411 def alias(self, tree):

412 rule, alias_name = tree.children

413 if rule.data == 'expansions':

414 aliases = []

415 for child in tree.children[0].children:

416 aliases.append(ST('alias', [child, alias_name]))

417 tree.data = 'expansions'

418 tree.children = aliases

419

420 def expansions(self, tree: Tree):

421 self._flatten(tree)

422 # Ensure all children are unique

423 if len(set(tree.children)) != len(tree.children):

424 tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use

425

426

427class RuleTreeToText(Transformer):

428 def expansions(self, x):

429 return x

430

431 def expansion(self, symbols):

432 return symbols, None

433

434 def alias(self, x):

435 (expansion, _alias), alias = x

436 assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed

437 return expansion, alias.name

438

439

440class PrepareAnonTerminals(Transformer_InPlace):

441 """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""

442

443 def __init__(self, terminals):

444 self.terminals = terminals

445 self.term_set = {td.name for td in self.terminals}

446 self.term_reverse = {td.pattern: td for td in terminals}

447 self.i = 0

448 self.rule_options = None

449

450 @inline_args

451 def pattern(self, p):

452 value = p.value

453 if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:

454 raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)

455

456 term_name = None

457

458 if isinstance(p, PatternStr):

459 try:

460 # If already defined, use the user-defined terminal name

461 term_name = self.term_reverse[p].name

462 except KeyError:

463 # Try to assign an indicative anon-terminal name

464 try:

465 term_name = _TERMINAL_NAMES[value]

466 except KeyError:

467 if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:

468 term_name = value.upper()

469

470 if term_name in self.term_set:

471 term_name = None

472

473 elif isinstance(p, PatternRE):

474 if p in self.term_reverse: # Kind of a weird placement.name

475 term_name = self.term_reverse[p].name

476 else:

477 assert False, p

478

479 if term_name is None:

480 term_name = '__ANON_%d' % self.i

481 self.i += 1

482

483 if term_name not in self.term_set:

484 assert p not in self.term_reverse

485 self.term_set.add(term_name)

486 termdef = TerminalDef(term_name, p)

487 self.term_reverse[p] = termdef

488 self.terminals.append(termdef)

489

490 filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)

491

492 return Terminal(term_name, filter_out=filter_out)

493

494

495class _ReplaceSymbols(Transformer_InPlace):

496 """Helper for ApplyTemplates"""

497

498 def __init__(self):

499 self.names = {}

500

501 def value(self, c):

502 if len(c) == 1 and isinstance(c[0], Symbol) and c[0].name in self.names:

503 return self.names[c[0].name]

504 return self.__default__('value', c, None)

505

506 def template_usage(self, c):

507 name = c[0].name

508 if name in self.names:

509 return self.__default__('template_usage', [self.names[name]] + c[1:], None)

510 return self.__default__('template_usage', c, None)

511

512

513class ApplyTemplates(Transformer_InPlace):

514 """Apply the templates, creating new rules that represent the used templates"""

515

516 def __init__(self, rule_defs):

517 self.rule_defs = rule_defs

518 self.replacer = _ReplaceSymbols()

519 self.created_templates = set()

520

521 def template_usage(self, c):

522 name = c[0].name

523 args = c[1:]

524 result_name = "%s{%s}" % (name, ",".join(a.name for a in args))

525 if result_name not in self.created_templates:

526 self.created_templates.add(result_name)

527 (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)

528 assert len(params) == len(args), args

529 result_tree = deepcopy(tree)

530 self.replacer.names = dict(zip(params, args))

531 self.replacer.transform(result_tree)

532 self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))

533 return NonTerminal(result_name)

534

535

536def _rfind(s, choices):

537 return max(s.rfind(c) for c in choices)

538

539

540def eval_escaping(s):

541 w = ''

542 i = iter(s)

543 for n in i:

544 w += n

545 if n == '\\':

546 try:

547 n2 = next(i)

548 except StopIteration:

549 raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)

550 if n2 == '\\':

551 w += '\\\\'

552 elif n2 not in 'Uuxnftr':

553 w += '\\'

554 w += n2

555 w = w.replace('\\"', '"').replace("'", "\\'")

556

557 to_eval = "u'''%s'''" % w

558 try:

559 s = literal_eval(to_eval)

560 except SyntaxError as e:

561 raise GrammarError(s, e)

562

563 return s

564

565

566def _literal_to_pattern(literal):

567 assert isinstance(literal, Token)

568 v = literal.value

569 flag_start = _rfind(v, '/"')+1

570 assert flag_start > 0

571 flags = v[flag_start:]

572 assert all(f in _RE_FLAGS for f in flags), flags

573

574 if literal.type == 'STRING' and '\n' in v:

575 raise GrammarError('You cannot put newlines in string literals')

576

577 if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:

578 raise GrammarError('You can only use newlines in regular expressions '

579 'with the `x` (verbose) flag')

580

581 v = v[:flag_start]

582 assert v[0] == v[-1] and v[0] in '"/'

583 x = v[1:-1]

584

585 s = eval_escaping(x)

586

587 if s == "":

588 raise GrammarError("Empty terminals are not allowed (%s)" % literal)

589

590 if literal.type == 'STRING':

591 s = s.replace('\\\\', '\\')

592 return PatternStr(s, flags, raw=literal.value)

593 elif literal.type == 'REGEXP':

594 return PatternRE(s, flags, raw=literal.value)

595 else:

596 assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'

597

598

599@inline_args

600class PrepareLiterals(Transformer_InPlace):

601 def literal(self, literal):

602 return ST('pattern', [_literal_to_pattern(literal)])

603

604 def range(self, start, end):

605 assert start.type == end.type == 'STRING'

606 start = start.value[1:-1]

607 end = end.value[1:-1]

608 assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1

609 regexp = '[%s-%s]' % (start, end)

610 return ST('pattern', [PatternRE(regexp)])

611

612

613def _make_joined_pattern(regexp, flags_set) -> PatternRE:

614 return PatternRE(regexp, ())

615

616class TerminalTreeToPattern(Transformer_NonRecursive):

617 def pattern(self, ps):

618 p ,= ps

619 return p

620

621 def expansion(self, items: List[Pattern]) -> Pattern:

622 if not items:

623 return PatternStr('')

624

625 if len(items) == 1:

626 return items[0]

627

628 pattern = ''.join(i.to_regexp() for i in items)

629 return _make_joined_pattern(pattern, {i.flags for i in items})

630

631 def expansions(self, exps: List[Pattern]) -> Pattern:

632 if len(exps) == 1:

633 return exps[0]

634

635 # Do a bit of sorting to make sure that the longest option is returned

636 # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)

637 exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))

638

639 pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))

640 return _make_joined_pattern(pattern, {i.flags for i in exps})

641

642 def expr(self, args) -> Pattern:

643 inner: Pattern

644 inner, op = args[:2]

645 if op == '~':

646 if len(args) == 3:

647 op = "{%d}" % int(args[2])

648 else:

649 mn, mx = map(int, args[2:])

650 if mx < mn:

651 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))

652 op = "{%d,%d}" % (mn, mx)

653 else:

654 assert len(args) == 2

655 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)

656

657 def maybe(self, expr):

658 return self.expr(expr + ['?'])

659

660 def alias(self, t):

661 raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")

662

663 def value(self, v):

664 return v[0]

665

666

667class ValidateSymbols(Transformer_InPlace):

668 def value(self, v):

669 v ,= v

670 assert isinstance(v, (Tree, Symbol))

671 return v

672

673

674def nr_deepcopy_tree(t):

675 """Deepcopy tree `t` without recursion"""

676 return Transformer_NonRecursive(False).transform(t)

677

678

679class Grammar:

680

681 term_defs: List[Tuple[str, Tuple[Tree, int]]]

682 rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]

683 ignore: List[str]

684

685 def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None:

686 self.term_defs = term_defs

687 self.rule_defs = rule_defs

688 self.ignore = ignore

689

690 def compile(self, start, terminals_to_keep) -> Tuple[List[TerminalDef], List[Rule], List[str]]:

691 # We change the trees in-place (to support huge grammars)

692 # So deepcopy allows calling compile more than once.

693 term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]

694 rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]

695

696 # ===================

697 # Compile Terminals

698 # ===================

699

700 # Convert terminal-trees to strings/regexps

701

702 for name, (term_tree, priority) in term_defs:

703 if term_tree is None: # Terminal added through %declare

704 continue

705 expansions = list(term_tree.find_data('expansion'))

706 if len(expansions) == 1 and not expansions[0].children:

707 raise GrammarError("Terminals cannot be empty (%s)" % name)

708

709 transformer = PrepareLiterals() * TerminalTreeToPattern()

710 terminals = [TerminalDef(name, transformer.transform(term_tree), priority)

711 for name, (term_tree, priority) in term_defs if term_tree]

712

713 # =================

714 # Compile Rules

715 # =================

716

717 # 1. Pre-process terminals

718 anon_tokens_transf = PrepareAnonTerminals(terminals)

719 transformer = PrepareLiterals() * ValidateSymbols() * anon_tokens_transf # Adds to terminals

720

721 # 2. Inline Templates

722

723 transformer *= ApplyTemplates(rule_defs)

724

725 # 3. Convert EBNF to BNF (and apply step 1 & 2)

726 ebnf_to_bnf = EBNF_to_BNF()

727 rules = []

728 i = 0

729 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates

730 name, params, rule_tree, options = rule_defs[i]

731 i += 1

732 if len(params) != 0: # Dont transform templates

733 continue

734 rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None

735 ebnf_to_bnf.rule_options = rule_options

736 ebnf_to_bnf.prefix = name

737 anon_tokens_transf.rule_options = rule_options

738 tree = transformer.transform(rule_tree)

739 res: Tree = ebnf_to_bnf.transform(tree)

740 rules.append((name, res, options))

741 rules += ebnf_to_bnf.new_rules

742

743 assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"

744

745 # 4. Compile tree to Rule objects

746 rule_tree_to_text = RuleTreeToText()

747

748 simplify_rule = SimplifyRule_Visitor()

749 compiled_rules: List[Rule] = []

750 for rule_content in rules:

751 name, tree, options = rule_content

752 simplify_rule.visit(tree)

753 expansions = rule_tree_to_text.transform(tree)

754

755 for i, (expansion, alias) in enumerate(expansions):

756 if alias and name.startswith('_'):

757 raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))

758

759 empty_indices = tuple(x==_EMPTY for x in expansion)

760 if any(empty_indices):

761 exp_options = copy(options) or RuleOptions()

762 exp_options.empty_indices = empty_indices

763 expansion = [x for x in expansion if x!=_EMPTY]

764 else:

765 exp_options = options

766

767 for sym in expansion:

768 assert isinstance(sym, Symbol)

769 if sym.is_term and exp_options and exp_options.keep_all_tokens:

770 assert isinstance(sym, Terminal)

771 sym.filter_out = False

772 rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)

773 compiled_rules.append(rule)

774

775 # Remove duplicates of empty rules, throw error for non-empty duplicates

776 if len(set(compiled_rules)) != len(compiled_rules):

777 duplicates = classify(compiled_rules, lambda x: x)

778 for dups in duplicates.values():

779 if len(dups) > 1:

780 if dups[0].expansion:

781 raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"

782 % ''.join('\n * %s' % i for i in dups))

783

784 # Empty rule; assert all other attributes are equal

785 assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)

786

787 # Remove duplicates

788 compiled_rules = list(OrderedSet(compiled_rules))

789

790 # Filter out unused rules

791 while True:

792 c = len(compiled_rules)

793 used_rules = {s for r in compiled_rules

794 for s in r.expansion

795 if isinstance(s, NonTerminal)

796 and s != r.origin}

797 used_rules |= {NonTerminal(s) for s in start}

798 compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)

799 for r in unused:

800 logger.debug("Unused rule: %s", r)

801 if len(compiled_rules) == c:

802 break

803

804 # Filter out unused terminals

805 if terminals_to_keep != '*':

806 used_terms = {t.name for r in compiled_rules

807 for t in r.expansion

808 if isinstance(t, Terminal)}

809 terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)

810 if unused:

811 logger.debug("Unused terminals: %s", [t.name for t in unused])

812

813 return terminals, compiled_rules, self.ignore

814

815

816PackageResource = namedtuple('PackageResource', 'pkg_name path')

817

818

819class FromPackageLoader:

820 """

821 Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.

822 This allows them to be compatible even from within zip files.

823

824 Relative imports are handled, so you can just freely use them.

825

826 pkg_name: The name of the package. You can probably provide `__name__` most of the time

827 search_paths: All the path that will be search on absolute imports.

828 """

829

830 pkg_name: str

831 search_paths: Sequence[str]

832

833 def __init__(self, pkg_name: str, search_paths: Sequence[str]=("", )) -> None:

834 self.pkg_name = pkg_name

835 self.search_paths = search_paths

836

837 def __repr__(self):

838 return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

839

840 def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:

841 if base_path is None:

842 to_try = self.search_paths

843 else:

844 # Check whether or not the importing grammar was loaded by this module.

845 if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:

846 # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway

847 raise IOError()

848 to_try = [base_path.path]

849

850 err = None

851 for path in to_try:

852 full_path = os.path.join(path, grammar_path)

853 try:

854 text: Optional[bytes] = pkgutil.get_data(self.pkg_name, full_path)

855 except IOError as e:

856 err = e

857 continue

858 else:

859 return PackageResource(self.pkg_name, full_path), (text.decode() if text else '')

860

861 raise IOError('Cannot find grammar in given paths') from err

862

863

864stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

865

866

867

868def resolve_term_references(term_dict):

869 # TODO Solve with transitive closure (maybe)

870

871 while True:

872 changed = False

873 for name, token_tree in term_dict.items():

874 if token_tree is None: # Terminal added through %declare

875 continue

876 for exp in token_tree.find_data('value'):

877 item ,= exp.children

878 if isinstance(item, NonTerminal):

879 raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))

880 elif isinstance(item, Terminal):

881 try:

882 term_value = term_dict[item.name]

883 except KeyError:

884 raise GrammarError("Terminal used but not defined: %s" % item.name)

885 assert term_value is not None

886 exp.children[0] = term_value

887 changed = True

888 else:

889 assert isinstance(item, Tree)

890 if not changed:

891 break

892

893 for name, term in term_dict.items():

894 if term: # Not just declared

895 for child in term.children:

896 ids = [id(x) for x in child.iter_subtrees()]

897 if id(term) in ids:

898 raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)

899

900

901

902def symbol_from_strcase(s):

903 assert isinstance(s, str)

904 return Terminal(s, filter_out=s.startswith('_')) if s.isupper() else NonTerminal(s)

905

906@inline_args

907class PrepareGrammar(Transformer_InPlace):

908 def terminal(self, name):

909 return Terminal(str(name), filter_out=name.startswith('_'))

910

911 def nonterminal(self, name):

912 return NonTerminal(name.value)

913

914

915def _find_used_symbols(tree):

916 assert tree.data == 'expansions'

917 return {t.name for x in tree.find_data('expansion')

918 for t in x.scan_values(lambda t: isinstance(t, Symbol))}

919

920

921def _get_parser():

922 try:

923 return _get_parser.cache

924 except AttributeError:

925 terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

926

927 rules = [(name.lstrip('?'), x, RuleOptions(expand1=name.startswith('?')))

928 for name, x in RULES.items()]

929 rules = [Rule(NonTerminal(r), [symbol_from_strcase(s) for s in x.split()], i, None, o)

930 for r, xs, o in rules for i, x in enumerate(xs)]

931

932 callback = ParseTreeBuilder(rules, ST).create_callback()

933 import re

934 lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH'])

935 parser_conf = ParserConf(rules, callback, ['start'])

936 lexer_conf.lexer_type = 'basic'

937 parser_conf.parser_type = 'lalr'

938 _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)

939 return _get_parser.cache

940

941GRAMMAR_ERRORS = [

942 ('Incorrect type of value', ['a: 1\n']),

943 ('Unclosed parenthesis', ['a: (\n']),

944 ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),

945 ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),

946 ('Illegal name for rules or terminals', ['Aa:\n']),

947 ('Alias expects lowercase name', ['a: -> "a"\n']),

948 ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),

949 ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),

950 ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),

951 ('Terminal names cannot contain dots', ['A.B\n']),

952 ('Expecting rule or terminal definition', ['"a"\n']),

953 ('%import expects a name', ['%import "a"\n']),

954 ('%ignore expects a value', ['%ignore %import\n']),

955 ]

956

957def _translate_parser_exception(parse, e):

958 error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)

959 if error:

960 return error

961 elif 'STRING' in e.expected:

962 return "Expecting a value"

963

964def _parse_grammar(text, name, start='start'):

965 try:

966 tree = _get_parser().parse(text + '\n', start)

967 except UnexpectedCharacters as e:

968 context = e.get_context(text)

969 raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %

970 (e.line, e.column, name, context))

971 except UnexpectedToken as e:

972 context = e.get_context(text)

973 error = _translate_parser_exception(_get_parser().parse, e)

974 if error:

975 raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))

976 raise

977

978 return PrepareGrammar().transform(tree)

979

980

981def _error_repr(error):

982 if isinstance(error, UnexpectedToken):

983 error2 = _translate_parser_exception(_get_parser().parse, error)

984 if error2:

985 return error2

986 expected = ', '.join(error.accepts or error.expected)

987 return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)

988 else:

989 return str(error)

990

991def _search_interactive_parser(interactive_parser, predicate):

992 def expand(node):

993 path, p = node

994 for choice in p.choices():

995 t = Token(choice, '')

996 try:

997 new_p = p.feed_token(t)

998 except ParseError: # Illegal

999 pass

1000 else:

1001 yield path + (choice,), new_p

1002

1003 for path, p in bfs_all_unique([((), interactive_parser)], expand):

1004 if predicate(p):

1005 return path, p

1006

1007def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]:

1008 errors = []

1009 def on_error(e):

1010 errors.append((e, _error_repr(e)))

1011

1012 # recover to a new line

1013 token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())

1014 for token_type in token_path:

1015 e.interactive_parser.feed_token(Token(token_type, ''))

1016 e.interactive_parser.feed_token(Token('_NL', '\n'))

1017 return True

1018

1019 _tree = _get_parser().parse(text + '\n', start, on_error=on_error)

1020

1021 errors_by_line = classify(errors, lambda e: e[0].line)

1022 errors = [el[0] for el in errors_by_line.values()] # already sorted

1023

1024 for e in errors:

1025 e[0].interactive_parser = None

1026 return errors

1027

1028

1029def _get_mangle(prefix, aliases, base_mangle=None):

1030 def mangle(s):

1031 if s in aliases:

1032 s = aliases[s]

1033 else:

1034 if s[0] == '_':

1035 s = '_%s__%s' % (prefix, s[1:])

1036 else:

1037 s = '%s__%s' % (prefix, s)

1038 if base_mangle is not None:

1039 s = base_mangle(s)

1040 return s

1041 return mangle

1042

1043def _mangle_definition_tree(exp, mangle):

1044 if mangle is None:

1045 return exp

1046 exp = deepcopy(exp) # TODO: is this needed?

1047 for t in exp.iter_subtrees():

1048 for i, c in enumerate(t.children):

1049 if isinstance(c, Symbol):

1050 t.children[i] = c.renamed(mangle)

1051

1052 return exp

1053

1054def _make_rule_tuple(modifiers_tree, name, params, priority_tree, expansions):

1055 if modifiers_tree.children:

1056 m ,= modifiers_tree.children

1057 expand1 = '?' in m

1058 if expand1 and name.startswith('_'):

1059 raise GrammarError("Inlined rules (_rule) cannot use the ?rule modifier.")

1060 keep_all_tokens = '!' in m

1061 else:

1062 keep_all_tokens = False

1063 expand1 = False

1064

1065 if priority_tree.children:

1066 p ,= priority_tree.children

1067 priority = int(p)

1068 else:

1069 priority = None

1070

1071 if params is not None:

1072 params = [t.value for t in params.children] # For the grammar parser

1073

1074 return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,

1075 template_source=(name if params else None))

1076

1077

1078class Definition:

1079 def __init__(self, is_term, tree, params=(), options=None):

1080 self.is_term = is_term

1081 self.tree = tree

1082 self.params = tuple(params)

1083 self.options = options

1084

1085class GrammarBuilder:

1086

1087 global_keep_all_tokens: bool

1088 import_paths: List[Union[str, Callable]]

1089 used_files: Dict[str, str]

1090

1091 _definitions: Dict[str, Definition]

1092 _ignore_names: List[str]

1093

1094 def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None:

1095 self.global_keep_all_tokens = global_keep_all_tokens

1096 self.import_paths = import_paths or []

1097 self.used_files = used_files or {}

1098

1099 self._definitions: Dict[str, Definition] = {}

1100 self._ignore_names: List[str] = []

1101

1102 def _grammar_error(self, is_term, msg, *names):

1103 args = {}

1104 for i, name in enumerate(names, start=1):

1105 postfix = '' if i == 1 else str(i)

1106 args['name' + postfix] = name

1107 args['type' + postfix] = lowercase_type = ("rule", "terminal")[is_term]

1108 args['Type' + postfix] = lowercase_type.title()

1109 raise GrammarError(msg.format(**args))

1110

1111 def _check_options(self, is_term, options):

1112 if is_term:

1113 if options is None:

1114 options = 1

1115 elif not isinstance(options, int):

1116 raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))

1117 else:

1118 if options is None:

1119 options = RuleOptions()

1120 elif not isinstance(options, RuleOptions):

1121 raise GrammarError("Rules require a RuleOptions instance as 'options'")

1122 if self.global_keep_all_tokens:

1123 options.keep_all_tokens = True

1124 return options

1125

1126

1127 def _define(self, name, is_term, exp, params=(), options=None, *, override=False):

1128 if name in self._definitions:

1129 if not override:

1130 self._grammar_error(is_term, "{Type} '{name}' defined more than once", name)

1131 elif override:

1132 self._grammar_error(is_term, "Cannot override a nonexisting {type} {name}", name)

1133

1134 if name.startswith('__'):

1135 self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name)

1136

1137 self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options))

1138

1139 def _extend(self, name, is_term, exp, params=(), options=None):

1140 if name not in self._definitions:

1141 self._grammar_error(is_term, "Can't extend {type} {name} as it wasn't defined before", name)

1142

1143 d = self._definitions[name]

1144

1145 if is_term != d.is_term:

1146 self._grammar_error(is_term, "Cannot extend {type} {name} - one is a terminal, while the other is not.", name)

1147 if tuple(params) != d.params:

1148 self._grammar_error(is_term, "Cannot extend {type} with different parameters: {name}", name)

1149

1150 if d.tree is None:

1151 self._grammar_error(is_term, "Can't extend {type} {name} - it is abstract.", name)

1152

1153 # TODO: think about what to do with 'options'

1154 base = d.tree

1155

1156 assert isinstance(base, Tree) and base.data == 'expansions'

1157 base.children.insert(0, exp)

1158

1159 def _ignore(self, exp_or_name):

1160 if isinstance(exp_or_name, str):

1161 self._ignore_names.append(exp_or_name)

1162 else:

1163 assert isinstance(exp_or_name, Tree)

1164 t = exp_or_name

1165 if t.data == 'expansions' and len(t.children) == 1:

1166 t2 ,= t.children

1167 if t2.data=='expansion' and len(t2.children) == 1:

1168 item ,= t2.children

1169 if item.data == 'value':

1170 item ,= item.children

1171 if isinstance(item, Terminal):

1172 # Keep terminal name, no need to create a new definition

1173 self._ignore_names.append(item.name)

1174 return

1175

1176 name = '__IGNORE_%d'% len(self._ignore_names)

1177 self._ignore_names.append(name)

1178 self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY)

1179

1180 def _unpack_import(self, stmt, grammar_name):

1181 if len(stmt.children) > 1:

1182 path_node, arg1 = stmt.children

1183 else:

1184 path_node, = stmt.children

1185 arg1 = None

1186

1187 if isinstance(arg1, Tree): # Multi import

1188 dotted_path = tuple(path_node.children)

1189 names = arg1.children

1190 aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names

1191 else: # Single import

1192 dotted_path = tuple(path_node.children[:-1])

1193 if not dotted_path:

1194 name ,= path_node.children

1195 raise GrammarError("Nothing was imported from grammar `%s`" % name)

1196 name = path_node.children[-1] # Get name from dotted path

1197 aliases = {name.value: (arg1 or name).value} # Aliases if exist

1198

1199 if path_node.data == 'import_lib': # Import from library

1200 base_path = None

1201 else: # Relative import

1202 if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script

1203 try:

1204 base_file = os.path.abspath(sys.modules['__main__'].__file__)

1205 except AttributeError:

1206 base_file = None

1207 else:

1208 base_file = grammar_name # Import relative to grammar file path if external grammar file

1209 if base_file:

1210 if isinstance(base_file, PackageResource):

1211 base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])

1212 else:

1213 base_path = os.path.split(base_file)[0]

1214 else:

1215 base_path = os.path.abspath(os.path.curdir)

1216

1217 return dotted_path, base_path, aliases

1218

1219 def _unpack_definition(self, tree, mangle):

1220

1221 if tree.data == 'rule':

1222 name, params, exp, opts = _make_rule_tuple(*tree.children)

1223 is_term = False

1224 else:

1225 name = tree.children[0].value

1226 params = () # TODO terminal templates

1227 opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority

1228 exp = tree.children[-1]

1229 is_term = True

1230

1231 if mangle is not None:

1232 params = tuple(mangle(p) for p in params)

1233 name = mangle(name)

1234

1235 exp = _mangle_definition_tree(exp, mangle)

1236 return name, is_term, exp, params, opts

1237

1238

1239 def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None:

1240 tree = _parse_grammar(grammar_text, grammar_name)

1241

1242 imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {}

1243

1244 for stmt in tree.children:

1245 if stmt.data == 'import':

1246 dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)

1247 try:

1248 import_base_path, import_aliases = imports[dotted_path]

1249 assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)

1250 import_aliases.update(aliases)

1251 except KeyError:

1252 imports[dotted_path] = base_path, aliases

1253

1254 for dotted_path, (base_path, aliases) in imports.items():

1255 self.do_import(dotted_path, base_path, aliases, mangle)

1256

1257 for stmt in tree.children:

1258 if stmt.data in ('term', 'rule'):

1259 self._define(*self._unpack_definition(stmt, mangle))

1260 elif stmt.data == 'override':

1261 r ,= stmt.children

1262 self._define(*self._unpack_definition(r, mangle), override=True)

1263 elif stmt.data == 'extend':

1264 r ,= stmt.children

1265 self._extend(*self._unpack_definition(r, mangle))

1266 elif stmt.data == 'ignore':

1267 # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar

1268 if mangle is None:

1269 self._ignore(*stmt.children)

1270 elif stmt.data == 'declare':

1271 for symbol in stmt.children:

1272 assert isinstance(symbol, Symbol), symbol

1273 is_term = isinstance(symbol, Terminal)

1274 if mangle is None:

1275 name = symbol.name

1276 else:

1277 name = mangle(symbol.name)

1278 self._define(name, is_term, None)

1279 elif stmt.data == 'import':

1280 pass

1281 else:

1282 assert False, stmt

1283

1284

1285 term_defs = { name: d.tree

1286 for name, d in self._definitions.items()

1287 if d.is_term

1288 }

1289 resolve_term_references(term_defs)

1290

1291

1292 def _remove_unused(self, used):

1293 def rule_dependencies(symbol):

1294 try:

1295 d = self._definitions[symbol]

1296 except KeyError:

1297 return []

1298 if d.is_term:

1299 return []

1300 return _find_used_symbols(d.tree) - set(d.params)

1301

1302 _used = set(bfs(used, rule_dependencies))

1303 self._definitions = {k: v for k, v in self._definitions.items() if k in _used}

1304

1305

1306 def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None:

1307 assert dotted_path

1308 mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)

1309 grammar_path = os.path.join(*dotted_path) + EXT

1310 to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]

1311 for source in to_try:

1312 try:

1313 if callable(source):

1314 joined_path, text = source(base_path, grammar_path)

1315 else:

1316 joined_path = os.path.join(source, grammar_path)

1317 with open(joined_path, encoding='utf8') as f:

1318 text = f.read()

1319 except IOError:

1320 continue

1321 else:

1322 h = sha256_digest(text)

1323 if self.used_files.get(joined_path, h) != h:

1324 raise RuntimeError("Grammar file was changed during importing")

1325 self.used_files[joined_path] = h

1326

1327 gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)

1328 gb.load_grammar(text, joined_path, mangle)

1329 gb._remove_unused(map(mangle, aliases))

1330 for name in gb._definitions:

1331 if name in self._definitions:

1332 raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))

1333

1334 self._definitions.update(**gb._definitions)

1335 break

1336 else:

1337 # Search failed. Make Python throw a nice error.

1338 open(grammar_path, encoding='utf8')

1339 assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)

1340

1341

1342 def validate(self) -> None:

1343 for name, d in self._definitions.items():

1344 params = d.params

1345 exp = d.tree

1346

1347 for i, p in enumerate(params):

1348 if p in self._definitions:

1349 raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))

1350 if p in params[:i]:

1351 raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))

1352

1353 if exp is None: # Remaining checks don't apply to abstract rules/terminals (created with %declare)

1354 continue

1355

1356 for temp in exp.find_data('template_usage'):

1357 sym = temp.children[0].name

1358 args = temp.children[1:]

1359 if sym not in params:

1360 if sym not in self._definitions:

1361 self._grammar_error(d.is_term, "Template '%s' used but not defined (in {type} {name})" % sym, name)

1362 if len(args) != len(self._definitions[sym].params):

1363 expected, actual = len(self._definitions[sym].params), len(args)

1364 self._grammar_error(d.is_term, "Wrong number of template arguments used for {name} "

1365 "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)

1366

1367 for sym in _find_used_symbols(exp):

1368 if sym not in self._definitions and sym not in params:

1369 self._grammar_error(d.is_term, "{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)

1370

1371 if not set(self._definitions).issuperset(self._ignore_names):

1372 raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))

1373

1374 def build(self) -> Grammar:

1375 self.validate()

1376 rule_defs = []

1377 term_defs = []

1378 for name, d in self._definitions.items():

1379 (params, exp, options) = d.params, d.tree, d.options

1380 if d.is_term:

1381 assert len(params) == 0

1382 term_defs.append((name, (exp, options)))

1383 else:

1384 rule_defs.append((name, params, exp, options))

1385 # resolve_term_references(term_defs)

1386 return Grammar(rule_defs, term_defs, self._ignore_names)

1387

1388

1389def verify_used_files(file_hashes):

1390 for path, old in file_hashes.items():

1391 text = None

1392 if isinstance(path, str) and os.path.exists(path):

1393 with open(path, encoding='utf8') as f:

1394 text = f.read()

1395 elif isinstance(path, PackageResource):

1396 with suppress(IOError):

1397 text = pkgutil.get_data(*path).decode('utf-8')

1398 if text is None: # We don't know how to load the path. ignore it.

1399 continue

1400

1401 current = sha256_digest(text)

1402 if old != current:

1403 logger.info("File %r changed, rebuilding Parser" % path)

1404 return False

1405 return True

1406

1407def list_grammar_imports(grammar, import_paths=[]):

1408 "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"

1409 builder = GrammarBuilder(False, import_paths)

1410 builder.load_grammar(grammar, '<string>')

1411 return list(builder.used_files.keys())

1412

1413def load_grammar(grammar, source, import_paths, global_keep_all_tokens):

1414 builder = GrammarBuilder(global_keep_all_tokens, import_paths)

1415 builder.load_grammar(grammar, source)

1416 return builder.build(), builder.used_files

1417

1418

1419def sha256_digest(s: str) -> str:

1420 """Get the sha256 digest of a string

1421

1422 Supports the `usedforsecurity` argument for Python 3.9+ to allow running on

1423 a FIPS-enabled system.

1424 """

1425 if sys.version_info >= (3, 9):

1426 return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest()

1427 else:

1428 return hashlib.sha256(s.encode('utf8')).hexdigest()

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/load_grammar.py: 75%

854 statements