1""""This module implements an SPPF implementation
2
3This is used as the primary output mechanism for the Earley parser
4in order to store complex ambiguities.
5
6Full reference and more details is here:
7https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
8"""
9
10from typing import Type, AbstractSet
11from random import randint
12from collections import deque
13from operator import attrgetter
14from importlib import import_module
15from functools import partial
16
17from ..parse_tree_builder import AmbiguousIntermediateExpander
18from ..visitors import Discard
19from ..utils import logger, OrderedSet
20from ..tree import Tree
21
22class ForestNode:
23 pass
24
25class SymbolNode(ForestNode):
26 """
27 A Symbol Node represents a symbol (or Intermediate LR0).
28
29 Symbol nodes are keyed by the symbol (s). For intermediate nodes
30 s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
31 nodes, s will be a string representing the non-terminal origin (i.e.
32 the left hand side of the rule).
33
34 The children of a Symbol or Intermediate Node will always be Packed Nodes;
35 with each Packed Node child representing a single derivation of a production.
36
37 Hence a Symbol Node with a single child is unambiguous.
38
39 Parameters:
40 s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
41 start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
42 end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
43
44 Properties:
45 is_intermediate: True if this node is an intermediate node.
46 priority: The priority of the node's symbol.
47 """
48 Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
49 __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
50 def __init__(self, s, start, end):
51 self.s = s
52 self.start = start
53 self.end = end
54 self._children = self.Set()
55 self.paths = self.Set()
56 self.paths_loaded = False
57
58 ### We use inf here as it can be safely negated without resorting to conditionals,
59 # unlike None or float('NaN'), and sorts appropriately.
60 self.priority = float('-inf')
61 self.is_intermediate = isinstance(s, tuple)
62
63 def add_family(self, lr0, rule, start, left, right):
64 self._children.add(PackedNode(self, lr0, rule, start, left, right))
65
66 def add_path(self, transitive, node):
67 self.paths.add((transitive, node))
68
69 def load_paths(self):
70 for transitive, node in self.paths:
71 if transitive.next_titem is not None:
72 vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
73 vn.add_path(transitive.next_titem, node)
74 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
75 else:
76 self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
77 self.paths_loaded = True
78
79 @property
80 def is_ambiguous(self):
81 """Returns True if this node is ambiguous."""
82 return len(self.children) > 1
83
84 @property
85 def children(self):
86 """Returns a list of this node's children sorted from greatest to
87 least priority."""
88 if not self.paths_loaded:
89 self.load_paths()
90 return sorted(self._children, key=attrgetter('sort_key'))
91
92 def __iter__(self):
93 return iter(self._children)
94
95 def __repr__(self):
96 if self.is_intermediate:
97 rule = self.s[0]
98 ptr = self.s[1]
99 before = ( expansion.name for expansion in rule.expansion[:ptr] )
100 after = ( expansion.name for expansion in rule.expansion[ptr:] )
101 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
102 else:
103 symbol = self.s.name
104 return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
105
106class StableSymbolNode(SymbolNode):
107 "A version of SymbolNode that uses OrderedSet for output stability"
108 Set = OrderedSet
109
110class PackedNode(ForestNode):
111 """
112 A Packed Node represents a single derivation in a symbol node.
113
114 Parameters:
115 rule: The rule associated with this node.
116 parent: The parent of this node.
117 left: The left child of this node. ``None`` if one does not exist.
118 right: The right child of this node. ``None`` if one does not exist.
119 priority: The priority of this node.
120 """
121 __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
122 def __init__(self, parent, s, rule, start, left, right):
123 self.parent = parent
124 self.s = s
125 self.start = start
126 self.rule = rule
127 self.left = left
128 self.right = right
129 self.priority = float('-inf')
130 self._hash = hash((self.left, self.right))
131
132 @property
133 def is_empty(self):
134 return self.left is None and self.right is None
135
136 @property
137 def sort_key(self):
138 """
139 Used to sort PackedNode children of SymbolNodes.
140 A SymbolNode has multiple PackedNodes if it matched
141 ambiguously. Hence, we use the sort order to identify
142 the order in which ambiguous children should be considered.
143 """
144 return self.is_empty, -self.priority, self.rule.order
145
146 @property
147 def children(self):
148 """Returns a list of this node's children."""
149 return [x for x in [self.left, self.right] if x is not None]
150
151 def __iter__(self):
152 yield self.left
153 yield self.right
154
155 def __eq__(self, other):
156 if not isinstance(other, PackedNode):
157 return False
158 return self is other or (self.left == other.left and self.right == other.right)
159
160 def __hash__(self):
161 return self._hash
162
163 def __repr__(self):
164 if isinstance(self.s, tuple):
165 rule = self.s[0]
166 ptr = self.s[1]
167 before = ( expansion.name for expansion in rule.expansion[:ptr] )
168 after = ( expansion.name for expansion in rule.expansion[ptr:] )
169 symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
170 else:
171 symbol = self.s.name
172 return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
173
174class TokenNode(ForestNode):
175 """
176 A Token Node represents a matched terminal and is always a leaf node.
177
178 Parameters:
179 token: The Token associated with this node.
180 term: The TerminalDef matched by the token.
181 priority: The priority of this node.
182 """
183 __slots__ = ('token', 'term', 'priority', '_hash')
184 def __init__(self, token, term, priority=None):
185 self.token = token
186 self.term = term
187 if priority is not None:
188 self.priority = priority
189 else:
190 self.priority = term.priority if term is not None else 0
191 self._hash = hash(token)
192
193 def __eq__(self, other):
194 if not isinstance(other, TokenNode):
195 return False
196 return self is other or (self.token == other.token)
197
198 def __hash__(self):
199 return self._hash
200
201 def __repr__(self):
202 return repr(self.token)
203
204class ForestVisitor:
205 """
206 An abstract base class for building forest visitors.
207
208 This class performs a controllable depth-first walk of an SPPF.
209 The visitor will not enter cycles and will backtrack if one is encountered.
210 Subclasses are notified of cycles through the ``on_cycle`` method.
211
212 Behavior for visit events is defined by overriding the
213 ``visit*node*`` functions.
214
215 The walk is controlled by the return values of the ``visit*node_in``
216 methods. Returning a node(s) will schedule them to be visited. The visitor
217 will begin to backtrack if no nodes are returned.
218
219 Parameters:
220 single_visit: If ``True``, non-Token nodes will only be visited once.
221 """
222
223 def __init__(self, single_visit=False):
224 self.single_visit = single_visit
225
226 def visit_token_node(self, node):
227 """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
228 pass
229
230 def visit_symbol_node_in(self, node):
231 """Called when a symbol node is visited. Nodes that are returned
232 will be scheduled to be visited. If ``visit_intermediate_node_in``
233 is not implemented, this function will be called for intermediate
234 nodes as well."""
235 pass
236
237 def visit_symbol_node_out(self, node):
238 """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
239 call have been visited. If ``visit_intermediate_node_out``
240 is not implemented, this function will be called for intermediate
241 nodes as well."""
242 pass
243
244 def visit_packed_node_in(self, node):
245 """Called when a packed node is visited. Nodes that are returned
246 will be scheduled to be visited. """
247 pass
248
249 def visit_packed_node_out(self, node):
250 """Called after all nodes returned from a corresponding ``visit_packed_node_in``
251 call have been visited."""
252 pass
253
254 def on_cycle(self, node, path):
255 """Called when a cycle is encountered.
256
257 Parameters:
258 node: The node that causes a cycle.
259 path: The list of nodes being visited: nodes that have been
260 entered but not exited. The first element is the root in a forest
261 visit, and the last element is the node visited most recently.
262 ``path`` should be treated as read-only.
263 """
264 pass
265
266 def get_cycle_in_path(self, node, path):
267 """A utility function for use in ``on_cycle`` to obtain a slice of
268 ``path`` that only contains the nodes that make up the cycle."""
269 index = len(path) - 1
270 while id(path[index]) != id(node):
271 index -= 1
272 return path[index:]
273
274 def visit(self, root):
275 # Visiting is a list of IDs of all symbol/intermediate nodes currently in
276 # the stack. It serves two purposes: to detect when we 'recurse' in and out
277 # of a symbol/intermediate so that we can process both up and down. Also,
278 # since the SPPF can have cycles it allows us to detect if we're trying
279 # to recurse into a node that's already on the stack (infinite recursion).
280 visiting = set()
281
282 # set of all nodes that have been visited
283 visited = set()
284
285 # a list of nodes that are currently being visited
286 # used for the `on_cycle` callback
287 path = []
288
289 # We do not use recursion here to walk the Forest due to the limited
290 # stack size in python. Therefore input_stack is essentially our stack.
291 input_stack = deque([root])
292
293 # It is much faster to cache these as locals since they are called
294 # many times in large parses.
295 vpno = getattr(self, 'visit_packed_node_out')
296 vpni = getattr(self, 'visit_packed_node_in')
297 vsno = getattr(self, 'visit_symbol_node_out')
298 vsni = getattr(self, 'visit_symbol_node_in')
299 vino = getattr(self, 'visit_intermediate_node_out', vsno)
300 vini = getattr(self, 'visit_intermediate_node_in', vsni)
301 vtn = getattr(self, 'visit_token_node')
302 oc = getattr(self, 'on_cycle')
303
304 while input_stack:
305 current = next(reversed(input_stack))
306 try:
307 next_node = next(current)
308 except StopIteration:
309 input_stack.pop()
310 continue
311 except TypeError:
312 ### If the current object is not an iterator, pass through to Token/SymbolNode
313 pass
314 else:
315 if next_node is None:
316 continue
317
318 if id(next_node) in visiting:
319 oc(next_node, path)
320 continue
321
322 input_stack.append(next_node)
323 continue
324
325 if isinstance(current, TokenNode):
326 vtn(current.token)
327 input_stack.pop()
328 continue
329
330 current_id = id(current)
331 if current_id in visiting:
332 if isinstance(current, PackedNode):
333 vpno(current)
334 elif current.is_intermediate:
335 vino(current)
336 else:
337 vsno(current)
338 input_stack.pop()
339 path.pop()
340 visiting.remove(current_id)
341 visited.add(current_id)
342 elif self.single_visit and current_id in visited:
343 input_stack.pop()
344 else:
345 visiting.add(current_id)
346 path.append(current)
347 if isinstance(current, PackedNode):
348 next_node = vpni(current)
349 elif current.is_intermediate:
350 next_node = vini(current)
351 else:
352 next_node = vsni(current)
353 if next_node is None:
354 continue
355
356 if not isinstance(next_node, ForestNode):
357 next_node = iter(next_node)
358 elif id(next_node) in visiting:
359 oc(next_node, path)
360 continue
361
362 input_stack.append(next_node)
363
364class ForestTransformer(ForestVisitor):
365 """The base class for a bottom-up forest transformation. Most users will
366 want to use ``TreeForestTransformer`` instead as it has a friendlier
367 interface and covers most use cases.
368
369 Transformations are applied via inheritance and overriding of the
370 ``transform*node`` methods.
371
372 ``transform_token_node`` receives a ``Token`` as an argument.
373 All other methods receive the node that is being transformed and
374 a list of the results of the transformations of that node's children.
375 The return value of these methods are the resulting transformations.
376
377 If ``Discard`` is raised in a node's transformation, no data from that node
378 will be passed to its parent's transformation.
379 """
380
381 def __init__(self):
382 super(ForestTransformer, self).__init__()
383 # results of transformations
384 self.data = dict()
385 # used to track parent nodes
386 self.node_stack = deque()
387
388 def transform(self, root):
389 """Perform a transformation on an SPPF."""
390 self.node_stack.append('result')
391 self.data['result'] = []
392 self.visit(root)
393 assert len(self.data['result']) <= 1
394 if self.data['result']:
395 return self.data['result'][0]
396
397 def transform_symbol_node(self, node, data):
398 """Transform a symbol node."""
399 return node
400
401 def transform_intermediate_node(self, node, data):
402 """Transform an intermediate node."""
403 return node
404
405 def transform_packed_node(self, node, data):
406 """Transform a packed node."""
407 return node
408
409 def transform_token_node(self, node):
410 """Transform a ``Token``."""
411 return node
412
413 def visit_symbol_node_in(self, node):
414 self.node_stack.append(id(node))
415 self.data[id(node)] = []
416 return node.children
417
418 def visit_packed_node_in(self, node):
419 self.node_stack.append(id(node))
420 self.data[id(node)] = []
421 return node.children
422
423 def visit_token_node(self, node):
424 transformed = self.transform_token_node(node)
425 if transformed is not Discard:
426 self.data[self.node_stack[-1]].append(transformed)
427
428 def _visit_node_out_helper(self, node, method):
429 self.node_stack.pop()
430 transformed = method(node, self.data[id(node)])
431 if transformed is not Discard:
432 self.data[self.node_stack[-1]].append(transformed)
433 del self.data[id(node)]
434
435 def visit_symbol_node_out(self, node):
436 self._visit_node_out_helper(node, self.transform_symbol_node)
437
438 def visit_intermediate_node_out(self, node):
439 self._visit_node_out_helper(node, self.transform_intermediate_node)
440
441 def visit_packed_node_out(self, node):
442 self._visit_node_out_helper(node, self.transform_packed_node)
443
444
445class ForestSumVisitor(ForestVisitor):
446 """
447 A visitor for prioritizing ambiguous parts of the Forest.
448
449 This visitor is used when support for explicit priorities on
450 rules is requested (whether normal, or invert). It walks the
451 forest (or subsets thereof) and cascades properties upwards
452 from the leaves.
453
454 It would be ideal to do this during parsing, however this would
455 require processing each Earley item multiple times. That's
456 a big performance drawback; so running a forest walk is the
457 lesser of two evils: there can be significantly more Earley
458 items created during parsing than there are SPPF nodes in the
459 final tree.
460 """
461 def __init__(self):
462 super(ForestSumVisitor, self).__init__(single_visit=True)
463
464 def visit_packed_node_in(self, node):
465 yield node.left
466 yield node.right
467
468 def visit_symbol_node_in(self, node):
469 return iter(node.children)
470
471 def visit_packed_node_out(self, node):
472 priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
473 priority += getattr(node.right, 'priority', 0)
474 priority += getattr(node.left, 'priority', 0)
475 node.priority = priority
476
477 def visit_symbol_node_out(self, node):
478 node.priority = max(child.priority for child in node.children)
479
480class PackedData():
481 """Used in transformationss of packed nodes to distinguish the data
482 that comes from the left child and the right child.
483 """
484
485 class _NoData():
486 pass
487
488 NO_DATA = _NoData()
489
490 def __init__(self, node, data):
491 self.left = self.NO_DATA
492 self.right = self.NO_DATA
493 if data:
494 if node.left is not None:
495 self.left = data[0]
496 if len(data) > 1:
497 self.right = data[1]
498 else:
499 self.right = data[0]
500
501class ForestToParseTree(ForestTransformer):
502 """Used by the earley parser when ambiguity equals 'resolve' or
503 'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
504
505 Parameters:
506 tree_class: The tree class to use for construction
507 callbacks: A dictionary of rules to functions that output a tree
508 prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
509 resolve_ambiguity: If True, ambiguities will be resolved based on
510 priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
511 use_cache: If True, the results of packed node transformations will be cached.
512 """
513
514 def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
515 super(ForestToParseTree, self).__init__()
516 self.tree_class = tree_class
517 self.callbacks = callbacks
518 self.prioritizer = prioritizer
519 self.resolve_ambiguity = resolve_ambiguity
520 self._use_cache = use_cache
521 self._cache = {}
522 self._on_cycle_retreat = False
523 self._cycle_node = None
524 self._successful_visits = set()
525
526 def visit(self, root):
527 if self.prioritizer:
528 self.prioritizer.visit(root)
529 super(ForestToParseTree, self).visit(root)
530 self._cache = {}
531
532 def on_cycle(self, node, path):
533 logger.debug("Cycle encountered in the SPPF at node: %s. "
534 "As infinite ambiguities cannot be represented in a tree, "
535 "this family of derivations will be discarded.", node)
536 self._cycle_node = node
537 self._on_cycle_retreat = True
538
539 def _check_cycle(self, node):
540 if self._on_cycle_retreat:
541 if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
542 self._cycle_node = None
543 self._on_cycle_retreat = False
544 else:
545 return Discard
546
547 def _collapse_ambig(self, children):
548 new_children = []
549 for child in children:
550 if hasattr(child, 'data') and child.data == '_ambig':
551 new_children += child.children
552 else:
553 new_children.append(child)
554 return new_children
555
556 def _call_rule_func(self, node, data):
557 # called when transforming children of symbol nodes
558 # data is a list of trees or tokens that correspond to the
559 # symbol's rule expansion
560 return self.callbacks[node.rule](data)
561
562 def _call_ambig_func(self, node, data):
563 # called when transforming a symbol node
564 # data is a list of trees where each tree's data is
565 # equal to the name of the symbol or one of its aliases.
566 if len(data) > 1:
567 return self.tree_class('_ambig', data)
568 elif data:
569 return data[0]
570 return Discard
571
572 def transform_symbol_node(self, node, data):
573 if id(node) not in self._successful_visits:
574 return Discard
575 r = self._check_cycle(node)
576 if r is Discard:
577 return r
578 self._successful_visits.remove(id(node))
579 data = self._collapse_ambig(data)
580 return self._call_ambig_func(node, data)
581
582 def transform_intermediate_node(self, node, data):
583 if id(node) not in self._successful_visits:
584 return Discard
585 r = self._check_cycle(node)
586 if r is Discard:
587 return r
588 self._successful_visits.remove(id(node))
589 if len(data) > 1:
590 children = [self.tree_class('_inter', c) for c in data]
591 return self.tree_class('_iambig', children)
592 return data[0]
593
594 def transform_packed_node(self, node, data):
595 r = self._check_cycle(node)
596 if r is Discard:
597 return r
598 if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
599 return Discard
600 if self._use_cache and id(node) in self._cache:
601 return self._cache[id(node)]
602 children = []
603 assert len(data) <= 2
604 data = PackedData(node, data)
605 if data.left is not PackedData.NO_DATA:
606 if node.left.is_intermediate and isinstance(data.left, list):
607 children += data.left
608 else:
609 children.append(data.left)
610 if data.right is not PackedData.NO_DATA:
611 children.append(data.right)
612 transformed = children if node.parent.is_intermediate else self._call_rule_func(node, children)
613 if self._use_cache:
614 self._cache[id(node)] = transformed
615 return transformed
616
617 def visit_symbol_node_in(self, node):
618 super(ForestToParseTree, self).visit_symbol_node_in(node)
619 if self._on_cycle_retreat:
620 return
621 return node.children
622
623 def visit_packed_node_in(self, node):
624 self._on_cycle_retreat = False
625 to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
626 if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
627 if not self._use_cache or id(node) not in self._cache:
628 return to_visit
629
630 def visit_packed_node_out(self, node):
631 super(ForestToParseTree, self).visit_packed_node_out(node)
632 if not self._on_cycle_retreat:
633 self._successful_visits.add(id(node.parent))
634
635def handles_ambiguity(func):
636 """Decorator for methods of subclasses of ``TreeForestTransformer``.
637 Denotes that the method should receive a list of transformed derivations."""
638 func.handles_ambiguity = True
639 return func
640
641class TreeForestTransformer(ForestToParseTree):
642 """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
643 By default, it will construct a tree.
644
645 Methods provided via inheritance are called based on the rule/symbol
646 names of nodes in the forest.
647
648 Methods that act on rules will receive a list of the results of the
649 transformations of the rule's children. By default, trees and tokens.
650
651 Methods that act on tokens will receive a token.
652
653 Alternatively, methods that act on rules may be annotated with
654 ``handles_ambiguity``. In this case, the function will receive a list
655 of all the transformations of all the derivations of the rule.
656 By default, a list of trees where each tree.data is equal to the
657 rule name or one of its aliases.
658
659 Non-tree transformations are made possible by override of
660 ``__default__``, ``__default_token__``, and ``__default_ambig__``.
661
662 Note:
663 Tree shaping features such as inlined rules and token filtering are
664 not built into the transformation. Positions are also not propagated.
665
666 Parameters:
667 tree_class: The tree class to use for construction
668 prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
669 resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
670 use_cache (bool): If True, caches the results of some transformations,
671 potentially improving performance when ``resolve_ambiguity==False``.
672 Only use if you know what you are doing: i.e. All transformation
673 functions are pure and referentially transparent.
674 """
675
676 def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
677 super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
678
679 def __default__(self, name, data):
680 """Default operation on tree (for override).
681
682 Returns a tree with name with data as children.
683 """
684 return self.tree_class(name, data)
685
686 def __default_ambig__(self, name, data):
687 """Default operation on ambiguous rule (for override).
688
689 Wraps data in an '_ambig_' node if it contains more than
690 one element.
691 """
692 if len(data) > 1:
693 return self.tree_class('_ambig', data)
694 elif data:
695 return data[0]
696 return Discard
697
698 def __default_token__(self, node):
699 """Default operation on ``Token`` (for override).
700
701 Returns ``node``.
702 """
703 return node
704
705 def transform_token_node(self, node):
706 return getattr(self, node.type, self.__default_token__)(node)
707
708 def _call_rule_func(self, node, data):
709 name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
710 user_func = getattr(self, name, self.__default__)
711 if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
712 user_func = partial(self.__default__, name)
713 if not self.resolve_ambiguity:
714 wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
715 user_func = wrapper(user_func)
716 return user_func(data)
717
718 def _call_ambig_func(self, node, data):
719 name = node.s.name
720 user_func = getattr(self, name, self.__default_ambig__)
721 if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
722 user_func = partial(self.__default_ambig__, name)
723 return user_func(data)
724
725class ForestToPyDotVisitor(ForestVisitor):
726 """
727 A Forest visitor which writes the SPPF to a PNG.
728
729 The SPPF can get really large, really quickly because
730 of the amount of meta-data it stores, so this is probably
731 only useful for trivial trees and learning how the SPPF
732 is structured.
733 """
734 def __init__(self, rankdir="TB"):
735 super(ForestToPyDotVisitor, self).__init__(single_visit=True)
736 self.pydot = import_module('pydot')
737 self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
738
739 def visit(self, root, filename):
740 super(ForestToPyDotVisitor, self).visit(root)
741 try:
742 self.graph.write_png(filename)
743 except FileNotFoundError as e:
744 logger.error("Could not write png: ", e)
745
746 def visit_token_node(self, node):
747 graph_node_id = str(id(node))
748 graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
749 graph_node_color = 0x808080
750 graph_node_style = "\"filled,rounded\""
751 graph_node_shape = "diamond"
752 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
753 self.graph.add_node(graph_node)
754
755 def visit_packed_node_in(self, node):
756 graph_node_id = str(id(node))
757 graph_node_label = repr(node)
758 graph_node_color = 0x808080
759 graph_node_style = "filled"
760 graph_node_shape = "diamond"
761 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
762 self.graph.add_node(graph_node)
763 yield node.left
764 yield node.right
765
766 def visit_packed_node_out(self, node):
767 graph_node_id = str(id(node))
768 graph_node = self.graph.get_node(graph_node_id)[0]
769 for child in [node.left, node.right]:
770 if child is not None:
771 child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
772 child_graph_node = self.graph.get_node(child_graph_node_id)[0]
773 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
774 else:
775 #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
776 child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
777 child_graph_node_style = "invis"
778 child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
779 child_edge_style = "invis"
780 self.graph.add_node(child_graph_node)
781 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
782
783 def visit_symbol_node_in(self, node):
784 graph_node_id = str(id(node))
785 graph_node_label = repr(node)
786 graph_node_color = 0x808080
787 graph_node_style = "\"filled\""
788 if node.is_intermediate:
789 graph_node_shape = "ellipse"
790 else:
791 graph_node_shape = "rectangle"
792 graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
793 self.graph.add_node(graph_node)
794 return iter(node.children)
795
796 def visit_symbol_node_out(self, node):
797 graph_node_id = str(id(node))
798 graph_node = self.graph.get_node(graph_node_id)[0]
799 for child in node.children:
800 child_graph_node_id = str(id(child))
801 child_graph_node = self.graph.get_node(child_graph_node_id)[0]
802 self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))