Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/earley_forest.py: 72%

463 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-14 06:19 +0000

1""""This module implements an SPPF implementation 

2 

3This is used as the primary output mechanism for the Earley parser 

4in order to store complex ambiguities. 

5 

6Full reference and more details is here: 

7https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ 

8""" 

9 

10from typing import Type, AbstractSet 

11from random import randint 

12from collections import deque 

13from operator import attrgetter 

14from importlib import import_module 

15from functools import partial 

16 

17from ..parse_tree_builder import AmbiguousIntermediateExpander 

18from ..visitors import Discard 

19from ..utils import logger, OrderedSet 

20from ..tree import Tree 

21 

22class ForestNode: 

23 pass 

24 

25class SymbolNode(ForestNode): 

26 """ 

27 A Symbol Node represents a symbol (or Intermediate LR0). 

28 

29 Symbol nodes are keyed by the symbol (s). For intermediate nodes 

30 s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol 

31 nodes, s will be a string representing the non-terminal origin (i.e. 

32 the left hand side of the rule). 

33 

34 The children of a Symbol or Intermediate Node will always be Packed Nodes; 

35 with each Packed Node child representing a single derivation of a production. 

36 

37 Hence a Symbol Node with a single child is unambiguous. 

38 

39 Parameters: 

40 s: A Symbol, or a tuple of (rule, ptr) for an intermediate node. 

41 start: The index of the start of the substring matched by this symbol (inclusive). 

42 end: The index of the end of the substring matched by this symbol (exclusive). 

43 

44 Properties: 

45 is_intermediate: True if this node is an intermediate node. 

46 priority: The priority of the node's symbol. 

47 """ 

48 Set: Type[AbstractSet] = set # Overridden by StableSymbolNode 

49 __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash') 

50 def __init__(self, s, start, end): 

51 self.s = s 

52 self.start = start 

53 self.end = end 

54 self._children = self.Set() 

55 self.paths = self.Set() 

56 self.paths_loaded = False 

57 

58 ### We use inf here as it can be safely negated without resorting to conditionals, 

59 # unlike None or float('NaN'), and sorts appropriately. 

60 self.priority = float('-inf') 

61 self.is_intermediate = isinstance(s, tuple) 

62 self._hash = hash((self.s, self.start, self.end)) 

63 

64 def add_family(self, lr0, rule, start, left, right): 

65 self._children.add(PackedNode(self, lr0, rule, start, left, right)) 

66 

67 def add_path(self, transitive, node): 

68 self.paths.add((transitive, node)) 

69 

70 def load_paths(self): 

71 for transitive, node in self.paths: 

72 if transitive.next_titem is not None: 

73 vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end) 

74 vn.add_path(transitive.next_titem, node) 

75 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn) 

76 else: 

77 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node) 

78 self.paths_loaded = True 

79 

80 @property 

81 def is_ambiguous(self): 

82 """Returns True if this node is ambiguous.""" 

83 return len(self.children) > 1 

84 

85 @property 

86 def children(self): 

87 """Returns a list of this node's children sorted from greatest to 

88 least priority.""" 

89 if not self.paths_loaded: 

90 self.load_paths() 

91 return sorted(self._children, key=attrgetter('sort_key')) 

92 

93 def __iter__(self): 

94 return iter(self._children) 

95 

96 def __eq__(self, other): 

97 if not isinstance(other, SymbolNode): 

98 return False 

99 return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end) 

100 

101 def __hash__(self): 

102 return self._hash 

103 

104 def __repr__(self): 

105 if self.is_intermediate: 

106 rule = self.s[0] 

107 ptr = self.s[1] 

108 before = ( expansion.name for expansion in rule.expansion[:ptr] ) 

109 after = ( expansion.name for expansion in rule.expansion[ptr:] ) 

110 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) 

111 else: 

112 symbol = self.s.name 

113 return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority) 

114 

115class StableSymbolNode(SymbolNode): 

116 "A version of SymbolNode that uses OrderedSet for output stability" 

117 Set = OrderedSet 

118 

119class PackedNode(ForestNode): 

120 """ 

121 A Packed Node represents a single derivation in a symbol node. 

122 

123 Parameters: 

124 rule: The rule associated with this node. 

125 parent: The parent of this node. 

126 left: The left child of this node. ``None`` if one does not exist. 

127 right: The right child of this node. ``None`` if one does not exist. 

128 priority: The priority of this node. 

129 """ 

130 __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash') 

131 def __init__(self, parent, s, rule, start, left, right): 

132 self.parent = parent 

133 self.s = s 

134 self.start = start 

135 self.rule = rule 

136 self.left = left 

137 self.right = right 

138 self.priority = float('-inf') 

139 self._hash = hash((self.left, self.right)) 

140 

141 @property 

142 def is_empty(self): 

143 return self.left is None and self.right is None 

144 

145 @property 

146 def sort_key(self): 

147 """ 

148 Used to sort PackedNode children of SymbolNodes. 

149 A SymbolNode has multiple PackedNodes if it matched 

150 ambiguously. Hence, we use the sort order to identify 

151 the order in which ambiguous children should be considered. 

152 """ 

153 return self.is_empty, -self.priority, self.rule.order 

154 

155 @property 

156 def children(self): 

157 """Returns a list of this node's children.""" 

158 return [x for x in [self.left, self.right] if x is not None] 

159 

160 def __iter__(self): 

161 yield self.left 

162 yield self.right 

163 

164 def __eq__(self, other): 

165 if not isinstance(other, PackedNode): 

166 return False 

167 return self is other or (self.left == other.left and self.right == other.right) 

168 

169 def __hash__(self): 

170 return self._hash 

171 

172 def __repr__(self): 

173 if isinstance(self.s, tuple): 

174 rule = self.s[0] 

175 ptr = self.s[1] 

176 before = ( expansion.name for expansion in rule.expansion[:ptr] ) 

177 after = ( expansion.name for expansion in rule.expansion[ptr:] ) 

178 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) 

179 else: 

180 symbol = self.s.name 

181 return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order) 

182 

183class TokenNode(ForestNode): 

184 """ 

185 A Token Node represents a matched terminal and is always a leaf node. 

186 

187 Parameters: 

188 token: The Token associated with this node. 

189 term: The TerminalDef matched by the token. 

190 priority: The priority of this node. 

191 """ 

192 __slots__ = ('token', 'term', 'priority', '_hash') 

193 def __init__(self, token, term, priority=None): 

194 self.token = token 

195 self.term = term 

196 if priority is not None: 

197 self.priority = priority 

198 else: 

199 self.priority = term.priority if term is not None else 0 

200 self._hash = hash(token) 

201 

202 def __eq__(self, other): 

203 if not isinstance(other, TokenNode): 

204 return False 

205 return self is other or (self.token == other.token) 

206 

207 def __hash__(self): 

208 return self._hash 

209 

210 def __repr__(self): 

211 return repr(self.token) 

212 

213class ForestVisitor: 

214 """ 

215 An abstract base class for building forest visitors. 

216 

217 This class performs a controllable depth-first walk of an SPPF. 

218 The visitor will not enter cycles and will backtrack if one is encountered. 

219 Subclasses are notified of cycles through the ``on_cycle`` method. 

220 

221 Behavior for visit events is defined by overriding the 

222 ``visit*node*`` functions. 

223 

224 The walk is controlled by the return values of the ``visit*node_in`` 

225 methods. Returning a node(s) will schedule them to be visited. The visitor 

226 will begin to backtrack if no nodes are returned. 

227 

228 Parameters: 

229 single_visit: If ``True``, non-Token nodes will only be visited once. 

230 """ 

231 

232 def __init__(self, single_visit=False): 

233 self.single_visit = single_visit 

234 

235 def visit_token_node(self, node): 

236 """Called when a ``Token`` is visited. ``Token`` nodes are always leaves.""" 

237 pass 

238 

239 def visit_symbol_node_in(self, node): 

240 """Called when a symbol node is visited. Nodes that are returned 

241 will be scheduled to be visited. If ``visit_intermediate_node_in`` 

242 is not implemented, this function will be called for intermediate 

243 nodes as well.""" 

244 pass 

245 

246 def visit_symbol_node_out(self, node): 

247 """Called after all nodes returned from a corresponding ``visit_symbol_node_in`` 

248 call have been visited. If ``visit_intermediate_node_out`` 

249 is not implemented, this function will be called for intermediate 

250 nodes as well.""" 

251 pass 

252 

253 def visit_packed_node_in(self, node): 

254 """Called when a packed node is visited. Nodes that are returned 

255 will be scheduled to be visited. """ 

256 pass 

257 

258 def visit_packed_node_out(self, node): 

259 """Called after all nodes returned from a corresponding ``visit_packed_node_in`` 

260 call have been visited.""" 

261 pass 

262 

263 def on_cycle(self, node, path): 

264 """Called when a cycle is encountered. 

265 

266 Parameters: 

267 node: The node that causes a cycle. 

268 path: The list of nodes being visited: nodes that have been 

269 entered but not exited. The first element is the root in a forest 

270 visit, and the last element is the node visited most recently. 

271 ``path`` should be treated as read-only. 

272 """ 

273 pass 

274 

275 def get_cycle_in_path(self, node, path): 

276 """A utility function for use in ``on_cycle`` to obtain a slice of 

277 ``path`` that only contains the nodes that make up the cycle.""" 

278 index = len(path) - 1 

279 while id(path[index]) != id(node): 

280 index -= 1 

281 return path[index:] 

282 

283 def visit(self, root): 

284 # Visiting is a list of IDs of all symbol/intermediate nodes currently in 

285 # the stack. It serves two purposes: to detect when we 'recurse' in and out 

286 # of a symbol/intermediate so that we can process both up and down. Also, 

287 # since the SPPF can have cycles it allows us to detect if we're trying 

288 # to recurse into a node that's already on the stack (infinite recursion). 

289 visiting = set() 

290 

291 # set of all nodes that have been visited 

292 visited = set() 

293 

294 # a list of nodes that are currently being visited 

295 # used for the `on_cycle` callback 

296 path = [] 

297 

298 # We do not use recursion here to walk the Forest due to the limited 

299 # stack size in python. Therefore input_stack is essentially our stack. 

300 input_stack = deque([root]) 

301 

302 # It is much faster to cache these as locals since they are called 

303 # many times in large parses. 

304 vpno = getattr(self, 'visit_packed_node_out') 

305 vpni = getattr(self, 'visit_packed_node_in') 

306 vsno = getattr(self, 'visit_symbol_node_out') 

307 vsni = getattr(self, 'visit_symbol_node_in') 

308 vino = getattr(self, 'visit_intermediate_node_out', vsno) 

309 vini = getattr(self, 'visit_intermediate_node_in', vsni) 

310 vtn = getattr(self, 'visit_token_node') 

311 oc = getattr(self, 'on_cycle') 

312 

313 while input_stack: 

314 current = next(reversed(input_stack)) 

315 try: 

316 next_node = next(current) 

317 except StopIteration: 

318 input_stack.pop() 

319 continue 

320 except TypeError: 

321 ### If the current object is not an iterator, pass through to Token/SymbolNode 

322 pass 

323 else: 

324 if next_node is None: 

325 continue 

326 

327 if id(next_node) in visiting: 

328 oc(next_node, path) 

329 continue 

330 

331 input_stack.append(next_node) 

332 continue 

333 

334 if isinstance(current, TokenNode): 

335 vtn(current.token) 

336 input_stack.pop() 

337 continue 

338 

339 current_id = id(current) 

340 if current_id in visiting: 

341 if isinstance(current, PackedNode): 

342 vpno(current) 

343 elif current.is_intermediate: 

344 vino(current) 

345 else: 

346 vsno(current) 

347 input_stack.pop() 

348 path.pop() 

349 visiting.remove(current_id) 

350 visited.add(current_id) 

351 elif self.single_visit and current_id in visited: 

352 input_stack.pop() 

353 else: 

354 visiting.add(current_id) 

355 path.append(current) 

356 if isinstance(current, PackedNode): 

357 next_node = vpni(current) 

358 elif current.is_intermediate: 

359 next_node = vini(current) 

360 else: 

361 next_node = vsni(current) 

362 if next_node is None: 

363 continue 

364 

365 if not isinstance(next_node, ForestNode): 

366 next_node = iter(next_node) 

367 elif id(next_node) in visiting: 

368 oc(next_node, path) 

369 continue 

370 

371 input_stack.append(next_node) 

372 

373class ForestTransformer(ForestVisitor): 

374 """The base class for a bottom-up forest transformation. Most users will 

375 want to use ``TreeForestTransformer`` instead as it has a friendlier 

376 interface and covers most use cases. 

377 

378 Transformations are applied via inheritance and overriding of the 

379 ``transform*node`` methods. 

380 

381 ``transform_token_node`` receives a ``Token`` as an argument. 

382 All other methods receive the node that is being transformed and 

383 a list of the results of the transformations of that node's children. 

384 The return value of these methods are the resulting transformations. 

385 

386 If ``Discard`` is raised in a node's transformation, no data from that node 

387 will be passed to its parent's transformation. 

388 """ 

389 

390 def __init__(self): 

391 super(ForestTransformer, self).__init__() 

392 # results of transformations 

393 self.data = dict() 

394 # used to track parent nodes 

395 self.node_stack = deque() 

396 

397 def transform(self, root): 

398 """Perform a transformation on an SPPF.""" 

399 self.node_stack.append('result') 

400 self.data['result'] = [] 

401 self.visit(root) 

402 assert len(self.data['result']) <= 1 

403 if self.data['result']: 

404 return self.data['result'][0] 

405 

406 def transform_symbol_node(self, node, data): 

407 """Transform a symbol node.""" 

408 return node 

409 

410 def transform_intermediate_node(self, node, data): 

411 """Transform an intermediate node.""" 

412 return node 

413 

414 def transform_packed_node(self, node, data): 

415 """Transform a packed node.""" 

416 return node 

417 

418 def transform_token_node(self, node): 

419 """Transform a ``Token``.""" 

420 return node 

421 

422 def visit_symbol_node_in(self, node): 

423 self.node_stack.append(id(node)) 

424 self.data[id(node)] = [] 

425 return node.children 

426 

427 def visit_packed_node_in(self, node): 

428 self.node_stack.append(id(node)) 

429 self.data[id(node)] = [] 

430 return node.children 

431 

432 def visit_token_node(self, node): 

433 transformed = self.transform_token_node(node) 

434 if transformed is not Discard: 

435 self.data[self.node_stack[-1]].append(transformed) 

436 

437 def _visit_node_out_helper(self, node, method): 

438 self.node_stack.pop() 

439 transformed = method(node, self.data[id(node)]) 

440 if transformed is not Discard: 

441 self.data[self.node_stack[-1]].append(transformed) 

442 del self.data[id(node)] 

443 

444 def visit_symbol_node_out(self, node): 

445 self._visit_node_out_helper(node, self.transform_symbol_node) 

446 

447 def visit_intermediate_node_out(self, node): 

448 self._visit_node_out_helper(node, self.transform_intermediate_node) 

449 

450 def visit_packed_node_out(self, node): 

451 self._visit_node_out_helper(node, self.transform_packed_node) 

452 

453 

454class ForestSumVisitor(ForestVisitor): 

455 """ 

456 A visitor for prioritizing ambiguous parts of the Forest. 

457 

458 This visitor is used when support for explicit priorities on 

459 rules is requested (whether normal, or invert). It walks the 

460 forest (or subsets thereof) and cascades properties upwards 

461 from the leaves. 

462 

463 It would be ideal to do this during parsing, however this would 

464 require processing each Earley item multiple times. That's 

465 a big performance drawback; so running a forest walk is the 

466 lesser of two evils: there can be significantly more Earley 

467 items created during parsing than there are SPPF nodes in the 

468 final tree. 

469 """ 

470 def __init__(self): 

471 super(ForestSumVisitor, self).__init__(single_visit=True) 

472 

473 def visit_packed_node_in(self, node): 

474 yield node.left 

475 yield node.right 

476 

477 def visit_symbol_node_in(self, node): 

478 return iter(node.children) 

479 

480 def visit_packed_node_out(self, node): 

481 priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0 

482 priority += getattr(node.right, 'priority', 0) 

483 priority += getattr(node.left, 'priority', 0) 

484 node.priority = priority 

485 

486 def visit_symbol_node_out(self, node): 

487 node.priority = max(child.priority for child in node.children) 

488 

489class PackedData(): 

490 """Used in transformationss of packed nodes to distinguish the data 

491 that comes from the left child and the right child. 

492 """ 

493 

494 class _NoData(): 

495 pass 

496 

497 NO_DATA = _NoData() 

498 

499 def __init__(self, node, data): 

500 self.left = self.NO_DATA 

501 self.right = self.NO_DATA 

502 if data: 

503 if node.left is not None: 

504 self.left = data[0] 

505 if len(data) > 1: 

506 self.right = data[1] 

507 else: 

508 self.right = data[0] 

509 

510class ForestToParseTree(ForestTransformer): 

511 """Used by the earley parser when ambiguity equals 'resolve' or 

512 'explicit'. Transforms an SPPF into an (ambiguous) parse tree. 

513 

514 Parameters: 

515 tree_class: The tree class to use for construction 

516 callbacks: A dictionary of rules to functions that output a tree 

517 prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes 

518 resolve_ambiguity: If True, ambiguities will be resolved based on 

519 priorities. Otherwise, `_ambig` nodes will be in the resulting tree. 

520 use_cache: If True, the results of packed node transformations will be cached. 

521 """ 

522 

523 def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True): 

524 super(ForestToParseTree, self).__init__() 

525 self.tree_class = tree_class 

526 self.callbacks = callbacks 

527 self.prioritizer = prioritizer 

528 self.resolve_ambiguity = resolve_ambiguity 

529 self._use_cache = use_cache 

530 self._cache = {} 

531 self._on_cycle_retreat = False 

532 self._cycle_node = None 

533 self._successful_visits = set() 

534 

535 def visit(self, root): 

536 if self.prioritizer: 

537 self.prioritizer.visit(root) 

538 super(ForestToParseTree, self).visit(root) 

539 self._cache = {} 

540 

541 def on_cycle(self, node, path): 

542 logger.debug("Cycle encountered in the SPPF at node: %s. " 

543 "As infinite ambiguities cannot be represented in a tree, " 

544 "this family of derivations will be discarded.", node) 

545 self._cycle_node = node 

546 self._on_cycle_retreat = True 

547 

548 def _check_cycle(self, node): 

549 if self._on_cycle_retreat: 

550 if id(node) == id(self._cycle_node) or id(node) in self._successful_visits: 

551 self._cycle_node = None 

552 self._on_cycle_retreat = False 

553 else: 

554 return Discard 

555 

556 def _collapse_ambig(self, children): 

557 new_children = [] 

558 for child in children: 

559 if hasattr(child, 'data') and child.data == '_ambig': 

560 new_children += child.children 

561 else: 

562 new_children.append(child) 

563 return new_children 

564 

565 def _call_rule_func(self, node, data): 

566 # called when transforming children of symbol nodes 

567 # data is a list of trees or tokens that correspond to the 

568 # symbol's rule expansion 

569 return self.callbacks[node.rule](data) 

570 

571 def _call_ambig_func(self, node, data): 

572 # called when transforming a symbol node 

573 # data is a list of trees where each tree's data is 

574 # equal to the name of the symbol or one of its aliases. 

575 if len(data) > 1: 

576 return self.tree_class('_ambig', data) 

577 elif data: 

578 return data[0] 

579 return Discard 

580 

581 def transform_symbol_node(self, node, data): 

582 if id(node) not in self._successful_visits: 

583 return Discard 

584 r = self._check_cycle(node) 

585 if r is Discard: 

586 return r 

587 self._successful_visits.remove(id(node)) 

588 data = self._collapse_ambig(data) 

589 return self._call_ambig_func(node, data) 

590 

591 def transform_intermediate_node(self, node, data): 

592 if id(node) not in self._successful_visits: 

593 return Discard 

594 r = self._check_cycle(node) 

595 if r is Discard: 

596 return r 

597 self._successful_visits.remove(id(node)) 

598 if len(data) > 1: 

599 children = [self.tree_class('_inter', c) for c in data] 

600 return self.tree_class('_iambig', children) 

601 return data[0] 

602 

603 def transform_packed_node(self, node, data): 

604 r = self._check_cycle(node) 

605 if r is Discard: 

606 return r 

607 if self.resolve_ambiguity and id(node.parent) in self._successful_visits: 

608 return Discard 

609 if self._use_cache and id(node) in self._cache: 

610 return self._cache[id(node)] 

611 children = [] 

612 assert len(data) <= 2 

613 data = PackedData(node, data) 

614 if data.left is not PackedData.NO_DATA: 

615 if node.left.is_intermediate and isinstance(data.left, list): 

616 children += data.left 

617 else: 

618 children.append(data.left) 

619 if data.right is not PackedData.NO_DATA: 

620 children.append(data.right) 

621 if node.parent.is_intermediate: 

622 return self._cache.setdefault(id(node), children) 

623 return self._cache.setdefault(id(node), self._call_rule_func(node, children)) 

624 

625 def visit_symbol_node_in(self, node): 

626 super(ForestToParseTree, self).visit_symbol_node_in(node) 

627 if self._on_cycle_retreat: 

628 return 

629 return node.children 

630 

631 def visit_packed_node_in(self, node): 

632 self._on_cycle_retreat = False 

633 to_visit = super(ForestToParseTree, self).visit_packed_node_in(node) 

634 if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits: 

635 if not self._use_cache or id(node) not in self._cache: 

636 return to_visit 

637 

638 def visit_packed_node_out(self, node): 

639 super(ForestToParseTree, self).visit_packed_node_out(node) 

640 if not self._on_cycle_retreat: 

641 self._successful_visits.add(id(node.parent)) 

642 

643def handles_ambiguity(func): 

644 """Decorator for methods of subclasses of ``TreeForestTransformer``. 

645 Denotes that the method should receive a list of transformed derivations.""" 

646 func.handles_ambiguity = True 

647 return func 

648 

649class TreeForestTransformer(ForestToParseTree): 

650 """A ``ForestTransformer`` with a tree ``Transformer``-like interface. 

651 By default, it will construct a tree. 

652 

653 Methods provided via inheritance are called based on the rule/symbol 

654 names of nodes in the forest. 

655 

656 Methods that act on rules will receive a list of the results of the 

657 transformations of the rule's children. By default, trees and tokens. 

658 

659 Methods that act on tokens will receive a token. 

660 

661 Alternatively, methods that act on rules may be annotated with 

662 ``handles_ambiguity``. In this case, the function will receive a list 

663 of all the transformations of all the derivations of the rule. 

664 By default, a list of trees where each tree.data is equal to the 

665 rule name or one of its aliases. 

666 

667 Non-tree transformations are made possible by override of 

668 ``__default__``, ``__default_token__``, and ``__default_ambig__``. 

669 

670 Note: 

671 Tree shaping features such as inlined rules and token filtering are 

672 not built into the transformation. Positions are also not propagated. 

673 

674 Parameters: 

675 tree_class: The tree class to use for construction 

676 prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF. 

677 resolve_ambiguity: If True, ambiguities will be resolved based on priorities. 

678 use_cache (bool): If True, caches the results of some transformations, 

679 potentially improving performance when ``resolve_ambiguity==False``. 

680 Only use if you know what you are doing: i.e. All transformation 

681 functions are pure and referentially transparent. 

682 """ 

683 

684 def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False): 

685 super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache) 

686 

687 def __default__(self, name, data): 

688 """Default operation on tree (for override). 

689 

690 Returns a tree with name with data as children. 

691 """ 

692 return self.tree_class(name, data) 

693 

694 def __default_ambig__(self, name, data): 

695 """Default operation on ambiguous rule (for override). 

696 

697 Wraps data in an '_ambig_' node if it contains more than 

698 one element. 

699 """ 

700 if len(data) > 1: 

701 return self.tree_class('_ambig', data) 

702 elif data: 

703 return data[0] 

704 return Discard 

705 

706 def __default_token__(self, node): 

707 """Default operation on ``Token`` (for override). 

708 

709 Returns ``node``. 

710 """ 

711 return node 

712 

713 def transform_token_node(self, node): 

714 return getattr(self, node.type, self.__default_token__)(node) 

715 

716 def _call_rule_func(self, node, data): 

717 name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name 

718 user_func = getattr(self, name, self.__default__) 

719 if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'): 

720 user_func = partial(self.__default__, name) 

721 if not self.resolve_ambiguity: 

722 wrapper = partial(AmbiguousIntermediateExpander, self.tree_class) 

723 user_func = wrapper(user_func) 

724 return user_func(data) 

725 

726 def _call_ambig_func(self, node, data): 

727 name = node.s.name 

728 user_func = getattr(self, name, self.__default_ambig__) 

729 if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'): 

730 user_func = partial(self.__default_ambig__, name) 

731 return user_func(data) 

732 

733class ForestToPyDotVisitor(ForestVisitor): 

734 """ 

735 A Forest visitor which writes the SPPF to a PNG. 

736 

737 The SPPF can get really large, really quickly because 

738 of the amount of meta-data it stores, so this is probably 

739 only useful for trivial trees and learning how the SPPF 

740 is structured. 

741 """ 

742 def __init__(self, rankdir="TB"): 

743 super(ForestToPyDotVisitor, self).__init__(single_visit=True) 

744 self.pydot = import_module('pydot') 

745 self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir) 

746 

747 def visit(self, root, filename): 

748 super(ForestToPyDotVisitor, self).visit(root) 

749 try: 

750 self.graph.write_png(filename) 

751 except FileNotFoundError as e: 

752 logger.error("Could not write png: ", e) 

753 

754 def visit_token_node(self, node): 

755 graph_node_id = str(id(node)) 

756 graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"')) 

757 graph_node_color = 0x808080 

758 graph_node_style = "\"filled,rounded\"" 

759 graph_node_shape = "diamond" 

760 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) 

761 self.graph.add_node(graph_node) 

762 

763 def visit_packed_node_in(self, node): 

764 graph_node_id = str(id(node)) 

765 graph_node_label = repr(node) 

766 graph_node_color = 0x808080 

767 graph_node_style = "filled" 

768 graph_node_shape = "diamond" 

769 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) 

770 self.graph.add_node(graph_node) 

771 yield node.left 

772 yield node.right 

773 

774 def visit_packed_node_out(self, node): 

775 graph_node_id = str(id(node)) 

776 graph_node = self.graph.get_node(graph_node_id)[0] 

777 for child in [node.left, node.right]: 

778 if child is not None: 

779 child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child)) 

780 child_graph_node = self.graph.get_node(child_graph_node_id)[0] 

781 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) 

782 else: 

783 #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay. 

784 child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890)) 

785 child_graph_node_style = "invis" 

786 child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None") 

787 child_edge_style = "invis" 

788 self.graph.add_node(child_graph_node) 

789 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style)) 

790 

791 def visit_symbol_node_in(self, node): 

792 graph_node_id = str(id(node)) 

793 graph_node_label = repr(node) 

794 graph_node_color = 0x808080 

795 graph_node_style = "\"filled\"" 

796 if node.is_intermediate: 

797 graph_node_shape = "ellipse" 

798 else: 

799 graph_node_shape = "rectangle" 

800 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) 

801 self.graph.add_node(graph_node) 

802 return iter(node.children) 

803 

804 def visit_symbol_node_out(self, node): 

805 graph_node_id = str(id(node)) 

806 graph_node = self.graph.get_node(graph_node_id)[0] 

807 for child in node.children: 

808 child_graph_node_id = str(id(child)) 

809 child_graph_node = self.graph.get_node(child_graph_node_id)[0] 

810 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))