Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/earley_forest.py: 72%
463 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:19 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:19 +0000
1""""This module implements an SPPF implementation
3This is used as the primary output mechanism for the Earley parser
4in order to store complex ambiguities.
6Full reference and more details is here:
7https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
8"""
10from typing import Type, AbstractSet
11from random import randint
12from collections import deque
13from operator import attrgetter
14from importlib import import_module
15from functools import partial
17from ..parse_tree_builder import AmbiguousIntermediateExpander
18from ..visitors import Discard
19from ..utils import logger, OrderedSet
20from ..tree import Tree
22class ForestNode:
23 pass
25class SymbolNode(ForestNode):
26 """
27 A Symbol Node represents a symbol (or Intermediate LR0).
29 Symbol nodes are keyed by the symbol (s). For intermediate nodes
30 s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
31 nodes, s will be a string representing the non-terminal origin (i.e.
32 the left hand side of the rule).
34 The children of a Symbol or Intermediate Node will always be Packed Nodes;
35 with each Packed Node child representing a single derivation of a production.
37 Hence a Symbol Node with a single child is unambiguous.
39 Parameters:
40 s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
41 start: The index of the start of the substring matched by this symbol (inclusive).
42 end: The index of the end of the substring matched by this symbol (exclusive).
44 Properties:
45 is_intermediate: True if this node is an intermediate node.
46 priority: The priority of the node's symbol.
47 """
48 Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
49 __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
50 def __init__(self, s, start, end):
51 self.s = s
52 self.start = start
53 self.end = end
54 self._children = self.Set()
55 self.paths = self.Set()
56 self.paths_loaded = False
58 ### We use inf here as it can be safely negated without resorting to conditionals,
59 # unlike None or float('NaN'), and sorts appropriately.
60 self.priority = float('-inf')
61 self.is_intermediate = isinstance(s, tuple)
62 self._hash = hash((self.s, self.start, self.end))
64 def add_family(self, lr0, rule, start, left, right):
65 self._children.add(PackedNode(self, lr0, rule, start, left, right))
67 def add_path(self, transitive, node):
68 self.paths.add((transitive, node))
70 def load_paths(self):
71 for transitive, node in self.paths:
72 if transitive.next_titem is not None:
73 vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
74 vn.add_path(transitive.next_titem, node)
75 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
76 else:
77 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
78 self.paths_loaded = True
80 @property
81 def is_ambiguous(self):
82 """Returns True if this node is ambiguous."""
83 return len(self.children) > 1
85 @property
86 def children(self):
87 """Returns a list of this node's children sorted from greatest to
88 least priority."""
89 if not self.paths_loaded:
90 self.load_paths()
91 return sorted(self._children, key=attrgetter('sort_key'))
93 def __iter__(self):
94 return iter(self._children)
96 def __eq__(self, other):
97 if not isinstance(other, SymbolNode):
98 return False
99 return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)
101 def __hash__(self):
102 return self._hash
104 def __repr__(self):
105 if self.is_intermediate:
106 rule = self.s[0]
107 ptr = self.s[1]
108 before = ( expansion.name for expansion in rule.expansion[:ptr] )
109 after = ( expansion.name for expansion in rule.expansion[ptr:] )
110 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
111 else:
112 symbol = self.s.name
113 return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
115class StableSymbolNode(SymbolNode):
116 "A version of SymbolNode that uses OrderedSet for output stability"
117 Set = OrderedSet
119class PackedNode(ForestNode):
120 """
121 A Packed Node represents a single derivation in a symbol node.
123 Parameters:
124 rule: The rule associated with this node.
125 parent: The parent of this node.
126 left: The left child of this node. ``None`` if one does not exist.
127 right: The right child of this node. ``None`` if one does not exist.
128 priority: The priority of this node.
129 """
130 __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
131 def __init__(self, parent, s, rule, start, left, right):
132 self.parent = parent
133 self.s = s
134 self.start = start
135 self.rule = rule
136 self.left = left
137 self.right = right
138 self.priority = float('-inf')
139 self._hash = hash((self.left, self.right))
141 @property
142 def is_empty(self):
143 return self.left is None and self.right is None
145 @property
146 def sort_key(self):
147 """
148 Used to sort PackedNode children of SymbolNodes.
149 A SymbolNode has multiple PackedNodes if it matched
150 ambiguously. Hence, we use the sort order to identify
151 the order in which ambiguous children should be considered.
152 """
153 return self.is_empty, -self.priority, self.rule.order
155 @property
156 def children(self):
157 """Returns a list of this node's children."""
158 return [x for x in [self.left, self.right] if x is not None]
160 def __iter__(self):
161 yield self.left
162 yield self.right
164 def __eq__(self, other):
165 if not isinstance(other, PackedNode):
166 return False
167 return self is other or (self.left == other.left and self.right == other.right)
169 def __hash__(self):
170 return self._hash
172 def __repr__(self):
173 if isinstance(self.s, tuple):
174 rule = self.s[0]
175 ptr = self.s[1]
176 before = ( expansion.name for expansion in rule.expansion[:ptr] )
177 after = ( expansion.name for expansion in rule.expansion[ptr:] )
178 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
179 else:
180 symbol = self.s.name
181 return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
183class TokenNode(ForestNode):
184 """
185 A Token Node represents a matched terminal and is always a leaf node.
187 Parameters:
188 token: The Token associated with this node.
189 term: The TerminalDef matched by the token.
190 priority: The priority of this node.
191 """
192 __slots__ = ('token', 'term', 'priority', '_hash')
193 def __init__(self, token, term, priority=None):
194 self.token = token
195 self.term = term
196 if priority is not None:
197 self.priority = priority
198 else:
199 self.priority = term.priority if term is not None else 0
200 self._hash = hash(token)
202 def __eq__(self, other):
203 if not isinstance(other, TokenNode):
204 return False
205 return self is other or (self.token == other.token)
207 def __hash__(self):
208 return self._hash
210 def __repr__(self):
211 return repr(self.token)
213class ForestVisitor:
214 """
215 An abstract base class for building forest visitors.
217 This class performs a controllable depth-first walk of an SPPF.
218 The visitor will not enter cycles and will backtrack if one is encountered.
219 Subclasses are notified of cycles through the ``on_cycle`` method.
221 Behavior for visit events is defined by overriding the
222 ``visit*node*`` functions.
224 The walk is controlled by the return values of the ``visit*node_in``
225 methods. Returning a node(s) will schedule them to be visited. The visitor
226 will begin to backtrack if no nodes are returned.
228 Parameters:
229 single_visit: If ``True``, non-Token nodes will only be visited once.
230 """
232 def __init__(self, single_visit=False):
233 self.single_visit = single_visit
235 def visit_token_node(self, node):
236 """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
237 pass
239 def visit_symbol_node_in(self, node):
240 """Called when a symbol node is visited. Nodes that are returned
241 will be scheduled to be visited. If ``visit_intermediate_node_in``
242 is not implemented, this function will be called for intermediate
243 nodes as well."""
244 pass
246 def visit_symbol_node_out(self, node):
247 """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
248 call have been visited. If ``visit_intermediate_node_out``
249 is not implemented, this function will be called for intermediate
250 nodes as well."""
251 pass
253 def visit_packed_node_in(self, node):
254 """Called when a packed node is visited. Nodes that are returned
255 will be scheduled to be visited. """
256 pass
258 def visit_packed_node_out(self, node):
259 """Called after all nodes returned from a corresponding ``visit_packed_node_in``
260 call have been visited."""
261 pass
263 def on_cycle(self, node, path):
264 """Called when a cycle is encountered.
266 Parameters:
267 node: The node that causes a cycle.
268 path: The list of nodes being visited: nodes that have been
269 entered but not exited. The first element is the root in a forest
270 visit, and the last element is the node visited most recently.
271 ``path`` should be treated as read-only.
272 """
273 pass
275 def get_cycle_in_path(self, node, path):
276 """A utility function for use in ``on_cycle`` to obtain a slice of
277 ``path`` that only contains the nodes that make up the cycle."""
278 index = len(path) - 1
279 while id(path[index]) != id(node):
280 index -= 1
281 return path[index:]
283 def visit(self, root):
284 # Visiting is a list of IDs of all symbol/intermediate nodes currently in
285 # the stack. It serves two purposes: to detect when we 'recurse' in and out
286 # of a symbol/intermediate so that we can process both up and down. Also,
287 # since the SPPF can have cycles it allows us to detect if we're trying
288 # to recurse into a node that's already on the stack (infinite recursion).
289 visiting = set()
291 # set of all nodes that have been visited
292 visited = set()
294 # a list of nodes that are currently being visited
295 # used for the `on_cycle` callback
296 path = []
298 # We do not use recursion here to walk the Forest due to the limited
299 # stack size in python. Therefore input_stack is essentially our stack.
300 input_stack = deque([root])
302 # It is much faster to cache these as locals since they are called
303 # many times in large parses.
304 vpno = getattr(self, 'visit_packed_node_out')
305 vpni = getattr(self, 'visit_packed_node_in')
306 vsno = getattr(self, 'visit_symbol_node_out')
307 vsni = getattr(self, 'visit_symbol_node_in')
308 vino = getattr(self, 'visit_intermediate_node_out', vsno)
309 vini = getattr(self, 'visit_intermediate_node_in', vsni)
310 vtn = getattr(self, 'visit_token_node')
311 oc = getattr(self, 'on_cycle')
313 while input_stack:
314 current = next(reversed(input_stack))
315 try:
316 next_node = next(current)
317 except StopIteration:
318 input_stack.pop()
319 continue
320 except TypeError:
321 ### If the current object is not an iterator, pass through to Token/SymbolNode
322 pass
323 else:
324 if next_node is None:
325 continue
327 if id(next_node) in visiting:
328 oc(next_node, path)
329 continue
331 input_stack.append(next_node)
332 continue
334 if isinstance(current, TokenNode):
335 vtn(current.token)
336 input_stack.pop()
337 continue
339 current_id = id(current)
340 if current_id in visiting:
341 if isinstance(current, PackedNode):
342 vpno(current)
343 elif current.is_intermediate:
344 vino(current)
345 else:
346 vsno(current)
347 input_stack.pop()
348 path.pop()
349 visiting.remove(current_id)
350 visited.add(current_id)
351 elif self.single_visit and current_id in visited:
352 input_stack.pop()
353 else:
354 visiting.add(current_id)
355 path.append(current)
356 if isinstance(current, PackedNode):
357 next_node = vpni(current)
358 elif current.is_intermediate:
359 next_node = vini(current)
360 else:
361 next_node = vsni(current)
362 if next_node is None:
363 continue
365 if not isinstance(next_node, ForestNode):
366 next_node = iter(next_node)
367 elif id(next_node) in visiting:
368 oc(next_node, path)
369 continue
371 input_stack.append(next_node)
373class ForestTransformer(ForestVisitor):
374 """The base class for a bottom-up forest transformation. Most users will
375 want to use ``TreeForestTransformer`` instead as it has a friendlier
376 interface and covers most use cases.
378 Transformations are applied via inheritance and overriding of the
379 ``transform*node`` methods.
381 ``transform_token_node`` receives a ``Token`` as an argument.
382 All other methods receive the node that is being transformed and
383 a list of the results of the transformations of that node's children.
384 The return value of these methods are the resulting transformations.
386 If ``Discard`` is raised in a node's transformation, no data from that node
387 will be passed to its parent's transformation.
388 """
390 def __init__(self):
391 super(ForestTransformer, self).__init__()
392 # results of transformations
393 self.data = dict()
394 # used to track parent nodes
395 self.node_stack = deque()
397 def transform(self, root):
398 """Perform a transformation on an SPPF."""
399 self.node_stack.append('result')
400 self.data['result'] = []
401 self.visit(root)
402 assert len(self.data['result']) <= 1
403 if self.data['result']:
404 return self.data['result'][0]
406 def transform_symbol_node(self, node, data):
407 """Transform a symbol node."""
408 return node
410 def transform_intermediate_node(self, node, data):
411 """Transform an intermediate node."""
412 return node
414 def transform_packed_node(self, node, data):
415 """Transform a packed node."""
416 return node
418 def transform_token_node(self, node):
419 """Transform a ``Token``."""
420 return node
422 def visit_symbol_node_in(self, node):
423 self.node_stack.append(id(node))
424 self.data[id(node)] = []
425 return node.children
427 def visit_packed_node_in(self, node):
428 self.node_stack.append(id(node))
429 self.data[id(node)] = []
430 return node.children
432 def visit_token_node(self, node):
433 transformed = self.transform_token_node(node)
434 if transformed is not Discard:
435 self.data[self.node_stack[-1]].append(transformed)
437 def _visit_node_out_helper(self, node, method):
438 self.node_stack.pop()
439 transformed = method(node, self.data[id(node)])
440 if transformed is not Discard:
441 self.data[self.node_stack[-1]].append(transformed)
442 del self.data[id(node)]
444 def visit_symbol_node_out(self, node):
445 self._visit_node_out_helper(node, self.transform_symbol_node)
447 def visit_intermediate_node_out(self, node):
448 self._visit_node_out_helper(node, self.transform_intermediate_node)
450 def visit_packed_node_out(self, node):
451 self._visit_node_out_helper(node, self.transform_packed_node)
454class ForestSumVisitor(ForestVisitor):
455 """
456 A visitor for prioritizing ambiguous parts of the Forest.
458 This visitor is used when support for explicit priorities on
459 rules is requested (whether normal, or invert). It walks the
460 forest (or subsets thereof) and cascades properties upwards
461 from the leaves.
463 It would be ideal to do this during parsing, however this would
464 require processing each Earley item multiple times. That's
465 a big performance drawback; so running a forest walk is the
466 lesser of two evils: there can be significantly more Earley
467 items created during parsing than there are SPPF nodes in the
468 final tree.
469 """
470 def __init__(self):
471 super(ForestSumVisitor, self).__init__(single_visit=True)
473 def visit_packed_node_in(self, node):
474 yield node.left
475 yield node.right
477 def visit_symbol_node_in(self, node):
478 return iter(node.children)
480 def visit_packed_node_out(self, node):
481 priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
482 priority += getattr(node.right, 'priority', 0)
483 priority += getattr(node.left, 'priority', 0)
484 node.priority = priority
486 def visit_symbol_node_out(self, node):
487 node.priority = max(child.priority for child in node.children)
489class PackedData():
490 """Used in transformationss of packed nodes to distinguish the data
491 that comes from the left child and the right child.
492 """
494 class _NoData():
495 pass
497 NO_DATA = _NoData()
499 def __init__(self, node, data):
500 self.left = self.NO_DATA
501 self.right = self.NO_DATA
502 if data:
503 if node.left is not None:
504 self.left = data[0]
505 if len(data) > 1:
506 self.right = data[1]
507 else:
508 self.right = data[0]
510class ForestToParseTree(ForestTransformer):
511 """Used by the earley parser when ambiguity equals 'resolve' or
512 'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
514 Parameters:
515 tree_class: The tree class to use for construction
516 callbacks: A dictionary of rules to functions that output a tree
517 prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
518 resolve_ambiguity: If True, ambiguities will be resolved based on
519 priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
520 use_cache: If True, the results of packed node transformations will be cached.
521 """
523 def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
524 super(ForestToParseTree, self).__init__()
525 self.tree_class = tree_class
526 self.callbacks = callbacks
527 self.prioritizer = prioritizer
528 self.resolve_ambiguity = resolve_ambiguity
529 self._use_cache = use_cache
530 self._cache = {}
531 self._on_cycle_retreat = False
532 self._cycle_node = None
533 self._successful_visits = set()
535 def visit(self, root):
536 if self.prioritizer:
537 self.prioritizer.visit(root)
538 super(ForestToParseTree, self).visit(root)
539 self._cache = {}
541 def on_cycle(self, node, path):
542 logger.debug("Cycle encountered in the SPPF at node: %s. "
543 "As infinite ambiguities cannot be represented in a tree, "
544 "this family of derivations will be discarded.", node)
545 self._cycle_node = node
546 self._on_cycle_retreat = True
548 def _check_cycle(self, node):
549 if self._on_cycle_retreat:
550 if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
551 self._cycle_node = None
552 self._on_cycle_retreat = False
553 else:
554 return Discard
556 def _collapse_ambig(self, children):
557 new_children = []
558 for child in children:
559 if hasattr(child, 'data') and child.data == '_ambig':
560 new_children += child.children
561 else:
562 new_children.append(child)
563 return new_children
565 def _call_rule_func(self, node, data):
566 # called when transforming children of symbol nodes
567 # data is a list of trees or tokens that correspond to the
568 # symbol's rule expansion
569 return self.callbacks[node.rule](data)
571 def _call_ambig_func(self, node, data):
572 # called when transforming a symbol node
573 # data is a list of trees where each tree's data is
574 # equal to the name of the symbol or one of its aliases.
575 if len(data) > 1:
576 return self.tree_class('_ambig', data)
577 elif data:
578 return data[0]
579 return Discard
581 def transform_symbol_node(self, node, data):
582 if id(node) not in self._successful_visits:
583 return Discard
584 r = self._check_cycle(node)
585 if r is Discard:
586 return r
587 self._successful_visits.remove(id(node))
588 data = self._collapse_ambig(data)
589 return self._call_ambig_func(node, data)
591 def transform_intermediate_node(self, node, data):
592 if id(node) not in self._successful_visits:
593 return Discard
594 r = self._check_cycle(node)
595 if r is Discard:
596 return r
597 self._successful_visits.remove(id(node))
598 if len(data) > 1:
599 children = [self.tree_class('_inter', c) for c in data]
600 return self.tree_class('_iambig', children)
601 return data[0]
603 def transform_packed_node(self, node, data):
604 r = self._check_cycle(node)
605 if r is Discard:
606 return r
607 if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
608 return Discard
609 if self._use_cache and id(node) in self._cache:
610 return self._cache[id(node)]
611 children = []
612 assert len(data) <= 2
613 data = PackedData(node, data)
614 if data.left is not PackedData.NO_DATA:
615 if node.left.is_intermediate and isinstance(data.left, list):
616 children += data.left
617 else:
618 children.append(data.left)
619 if data.right is not PackedData.NO_DATA:
620 children.append(data.right)
621 if node.parent.is_intermediate:
622 return self._cache.setdefault(id(node), children)
623 return self._cache.setdefault(id(node), self._call_rule_func(node, children))
625 def visit_symbol_node_in(self, node):
626 super(ForestToParseTree, self).visit_symbol_node_in(node)
627 if self._on_cycle_retreat:
628 return
629 return node.children
631 def visit_packed_node_in(self, node):
632 self._on_cycle_retreat = False
633 to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
634 if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
635 if not self._use_cache or id(node) not in self._cache:
636 return to_visit
638 def visit_packed_node_out(self, node):
639 super(ForestToParseTree, self).visit_packed_node_out(node)
640 if not self._on_cycle_retreat:
641 self._successful_visits.add(id(node.parent))
643def handles_ambiguity(func):
644 """Decorator for methods of subclasses of ``TreeForestTransformer``.
645 Denotes that the method should receive a list of transformed derivations."""
646 func.handles_ambiguity = True
647 return func
649class TreeForestTransformer(ForestToParseTree):
650 """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
651 By default, it will construct a tree.
653 Methods provided via inheritance are called based on the rule/symbol
654 names of nodes in the forest.
656 Methods that act on rules will receive a list of the results of the
657 transformations of the rule's children. By default, trees and tokens.
659 Methods that act on tokens will receive a token.
661 Alternatively, methods that act on rules may be annotated with
662 ``handles_ambiguity``. In this case, the function will receive a list
663 of all the transformations of all the derivations of the rule.
664 By default, a list of trees where each tree.data is equal to the
665 rule name or one of its aliases.
667 Non-tree transformations are made possible by override of
668 ``__default__``, ``__default_token__``, and ``__default_ambig__``.
670 Note:
671 Tree shaping features such as inlined rules and token filtering are
672 not built into the transformation. Positions are also not propagated.
674 Parameters:
675 tree_class: The tree class to use for construction
676 prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
677 resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
678 use_cache (bool): If True, caches the results of some transformations,
679 potentially improving performance when ``resolve_ambiguity==False``.
680 Only use if you know what you are doing: i.e. All transformation
681 functions are pure and referentially transparent.
682 """
684 def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
685 super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
687 def __default__(self, name, data):
688 """Default operation on tree (for override).
690 Returns a tree with name with data as children.
691 """
692 return self.tree_class(name, data)
694 def __default_ambig__(self, name, data):
695 """Default operation on ambiguous rule (for override).
697 Wraps data in an '_ambig_' node if it contains more than
698 one element.
699 """
700 if len(data) > 1:
701 return self.tree_class('_ambig', data)
702 elif data:
703 return data[0]
704 return Discard
706 def __default_token__(self, node):
707 """Default operation on ``Token`` (for override).
709 Returns ``node``.
710 """
711 return node
713 def transform_token_node(self, node):
714 return getattr(self, node.type, self.__default_token__)(node)
716 def _call_rule_func(self, node, data):
717 name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
718 user_func = getattr(self, name, self.__default__)
719 if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
720 user_func = partial(self.__default__, name)
721 if not self.resolve_ambiguity:
722 wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
723 user_func = wrapper(user_func)
724 return user_func(data)
726 def _call_ambig_func(self, node, data):
727 name = node.s.name
728 user_func = getattr(self, name, self.__default_ambig__)
729 if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
730 user_func = partial(self.__default_ambig__, name)
731 return user_func(data)
733class ForestToPyDotVisitor(ForestVisitor):
734 """
735 A Forest visitor which writes the SPPF to a PNG.
737 The SPPF can get really large, really quickly because
738 of the amount of meta-data it stores, so this is probably
739 only useful for trivial trees and learning how the SPPF
740 is structured.
741 """
742 def __init__(self, rankdir="TB"):
743 super(ForestToPyDotVisitor, self).__init__(single_visit=True)
744 self.pydot = import_module('pydot')
745 self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
747 def visit(self, root, filename):
748 super(ForestToPyDotVisitor, self).visit(root)
749 try:
750 self.graph.write_png(filename)
751 except FileNotFoundError as e:
752 logger.error("Could not write png: ", e)
754 def visit_token_node(self, node):
755 graph_node_id = str(id(node))
756 graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
757 graph_node_color = 0x808080
758 graph_node_style = "\"filled,rounded\""
759 graph_node_shape = "diamond"
760 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
761 self.graph.add_node(graph_node)
763 def visit_packed_node_in(self, node):
764 graph_node_id = str(id(node))
765 graph_node_label = repr(node)
766 graph_node_color = 0x808080
767 graph_node_style = "filled"
768 graph_node_shape = "diamond"
769 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
770 self.graph.add_node(graph_node)
771 yield node.left
772 yield node.right
774 def visit_packed_node_out(self, node):
775 graph_node_id = str(id(node))
776 graph_node = self.graph.get_node(graph_node_id)[0]
777 for child in [node.left, node.right]:
778 if child is not None:
779 child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
780 child_graph_node = self.graph.get_node(child_graph_node_id)[0]
781 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
782 else:
783 #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
784 child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
785 child_graph_node_style = "invis"
786 child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
787 child_edge_style = "invis"
788 self.graph.add_node(child_graph_node)
789 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
791 def visit_symbol_node_in(self, node):
792 graph_node_id = str(id(node))
793 graph_node_label = repr(node)
794 graph_node_color = 0x808080
795 graph_node_style = "\"filled\""
796 if node.is_intermediate:
797 graph_node_shape = "ellipse"
798 else:
799 graph_node_shape = "rectangle"
800 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
801 self.graph.add_node(graph_node)
802 return iter(node.children)
804 def visit_symbol_node_out(self, node):
805 graph_node_id = str(id(node))
806 graph_node = self.graph.get_node(graph_node_id)[0]
807 for child in node.children:
808 child_graph_node_id = str(id(child))
809 child_graph_node = self.graph.get_node(child_graph_node_id)[0]
810 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))