Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parsimonious/nodes.py: 72%
82 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1"""Nodes that make up parse trees
3Parsing spits out a tree of these, which you can then tell to walk itself and
4spit out a useful value. Or you can walk it yourself; the structural attributes
5are public.
7"""
8# TODO: If this is slow, think about using cElementTree or something.
9from inspect import isfunction
10from sys import version_info, exc_info
12from parsimonious.exceptions import VisitationError, UndefinedLabel
15class Node(object):
16 """A parse tree node
18 Consider these immutable once constructed. As a side effect of a
19 memory-saving strategy in the cache, multiple references to a single
20 ``Node`` might be returned in a single parse tree. So, if you start
21 messing with one, you'll see surprising parallel changes pop up elsewhere.
23 My philosophy is that parse trees (and their nodes) should be
24 representation-agnostic. That is, they shouldn't get all mixed up with what
25 the final rendered form of a wiki page (or the intermediate representation
26 of a programming language, or whatever) is going to be: you should be able
27 to parse once and render several representations from the tree, one after
28 another.
30 """
31 # I tried making this subclass list, but it got ugly. I had to construct
32 # invalid ones and patch them up later, and there were other problems.
33 __slots__ = ['expr', # The expression that generated me
34 'full_text', # The full text fed to the parser
35 'start', # The position in the text where that expr started matching
36 'end', # The position after start where the expr first didn't
37 # match. [start:end] follow Python slice conventions.
38 'children'] # List of child parse tree nodes
40 def __init__(self, expr, full_text, start, end, children=None):
41 self.expr = expr
42 self.full_text = full_text
43 self.start = start
44 self.end = end
45 self.children = children or []
47 @property
48 def expr_name(self):
49 # backwards compatibility
50 return self.expr.name
52 def __iter__(self):
53 """Support looping over my children and doing tuple unpacks on me.
55 It can be very handy to unpack nodes in arg lists; see
56 :class:`PegVisitor` for an example.
58 """
59 return iter(self.children)
61 @property
62 def text(self):
63 """Return the text this node matched."""
64 return self.full_text[self.start:self.end]
66 # From here down is just stuff for testing and debugging.
68 def prettily(self, error=None):
69 """Return a unicode, pretty-printed representation of me.
71 :arg error: The node to highlight because an error occurred there
73 """
74 # TODO: If a Node appears multiple times in the tree, we'll point to
75 # them all. Whoops.
76 def indent(text):
77 return '\n'.join((' ' + line) for line in text.splitlines())
78 ret = [u'<%s%s matching "%s">%s' % (
79 self.__class__.__name__,
80 (' called "%s"' % self.expr_name) if self.expr_name else '',
81 self.text,
82 ' <-- *** We were here. ***' if error is self else '')]
83 for n in self:
84 ret.append(indent(n.prettily(error=error)))
85 return '\n'.join(ret)
87 def __str__(self):
88 """Return a compact, human-readable representation of me."""
89 return self.prettily()
91 def __eq__(self, other):
92 """Support by-value deep comparison with other nodes for testing."""
93 if not isinstance(other, Node):
94 return NotImplemented
96 return (self.expr == other.expr and
97 self.full_text == other.full_text and
98 self.start == other.start and
99 self.end == other.end and
100 self.children == other.children)
102 def __ne__(self, other):
103 return not self == other
105 def __repr__(self, top_level=True):
106 """Return a bit of code (though not an expression) that will recreate
107 me."""
108 # repr() of unicode flattens everything out to ASCII, so we don't need
109 # to explicitly encode things afterward.
110 ret = ["s = %r" % self.full_text] if top_level else []
111 ret.append("%s(%r, s, %s, %s%s)" % (
112 self.__class__.__name__,
113 self.expr,
114 self.start,
115 self.end,
116 (', children=[%s]' %
117 ', '.join([c.__repr__(top_level=False) for c in self.children]))
118 if self.children else ''))
119 return '\n'.join(ret)
122class RegexNode(Node):
123 """Node returned from a ``Regex`` expression
125 Grants access to the ``re.Match`` object, in case you want to access
126 capturing groups, etc.
128 """
129 __slots__ = ['match']
132class RuleDecoratorMeta(type):
133 def __new__(metaclass, name, bases, namespace):
134 def unvisit(name):
135 """Remove any leading "visit_" from a method name."""
136 return name[6:] if name.startswith('visit_') else name
138 methods = [v for k, v in namespace.items() if
139 hasattr(v, '_rule') and isfunction(v)]
140 if methods:
141 from parsimonious.grammar import Grammar # circular import dodge
143 methods.sort(key=(lambda x: x.func_code.co_firstlineno)
144 if version_info[0] < 3 else
145 (lambda x: x.__code__.co_firstlineno))
146 # Possible enhancement: once we get the Grammar extensibility story
147 # solidified, we can have @rules *add* to the default grammar
148 # rather than pave over it.
149 namespace['grammar'] = Grammar(
150 '\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__),
151 expr=m._rule)
152 for m in methods))
153 return super(RuleDecoratorMeta,
154 metaclass).__new__(metaclass, name, bases, namespace)
157class NodeVisitor(object, metaclass=RuleDecoratorMeta):
158 """A shell for writing things that turn parse trees into something useful
160 Performs a depth-first traversal of an AST. Subclass this, add methods for
161 each expr you care about, instantiate, and call
162 ``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API
163 is very similar to that of ``ast.NodeVisitor``.
165 These could easily all be static methods, but that would add at least as
166 much weirdness at the call site as the ``()`` for instantiation. And this
167 way, we support subclasses that require state: options, for example, or a
168 symbol table constructed from a programming language's AST.
170 We never transform the parse tree in place, because...
172 * There are likely multiple references to the same ``Node`` object in a
173 parse tree, and changes to one reference would surprise you elsewhere.
174 * It makes it impossible to report errors: you'd end up with the "error"
175 arrow pointing someplace in a half-transformed mishmash of nodes--and
176 that's assuming you're even transforming the tree into another tree.
177 Heaven forbid you're making it into a string or something else.
179 """
181 #: The :term:`default grammar`: the one recommended for use with this
182 #: visitor. If you populate this, you will be able to call
183 #: :meth:`NodeVisitor.parse()` as a shortcut.
184 grammar = None
186 #: Classes of exceptions you actually intend to raise during visitation
187 #: and which should propagate out of the visitor. These will not be
188 #: wrapped in a VisitationError when they arise.
189 unwrapped_exceptions = ()
191 # TODO: If we need to optimize this, we can go back to putting subclasses
192 # in charge of visiting children; they know when not to bother. Or we can
193 # mark nodes as not descent-worthy in the grammar.
194 def visit(self, node):
195 """Walk a parse tree, transforming it into another representation.
197 Recursively descend a parse tree, dispatching to the method named after
198 the rule in the :class:`~parsimonious.grammar.Grammar` that produced
199 each node. If, for example, a rule was... ::
201 bold = '<b>'
203 ...the ``visit_bold()`` method would be called. It is your
204 responsibility to subclass :class:`NodeVisitor` and implement those
205 methods.
207 """
208 method = getattr(self, 'visit_' + node.expr_name, self.generic_visit)
210 # Call that method, and show where in the tree it failed if it blows
211 # up.
212 try:
213 return method(node, [self.visit(n) for n in node])
214 except (VisitationError, UndefinedLabel):
215 # Don't catch and re-wrap already-wrapped exceptions.
216 raise
217 except Exception as exc:
218 # implentors may define exception classes that should not be
219 # wrapped.
220 if isinstance(exc, self.unwrapped_exceptions):
221 raise
222 # Catch any exception, and tack on a parse tree so it's easier to
223 # see where it went wrong.
224 exc_class = type(exc)
225 raise VisitationError(exc, exc_class, node) from exc
227 def generic_visit(self, node, visited_children):
228 """Default visitor method
230 :arg node: The node we're visiting
231 :arg visited_children: The results of visiting the children of that
232 node, in a list
234 I'm not sure there's an implementation of this that makes sense across
235 all (or even most) use cases, so we leave it to subclasses to implement
236 for now.
238 """
239 raise NotImplementedError('No visitor method was defined for this expression: %s' %
240 node.expr.as_rule())
242 # Convenience methods:
244 def parse(self, text, pos=0):
245 """Parse some text with this Visitor's default grammar and return the
246 result of visiting it.
248 ``SomeVisitor().parse('some_string')`` is a shortcut for
249 ``SomeVisitor().visit(some_grammar.parse('some_string'))``.
251 """
252 return self._parse_or_match(text, pos, 'parse')
254 def match(self, text, pos=0):
255 """Parse and visit some text with this Visitor's default grammar, but
256 don't insist on parsing all the way to the end.
258 ``SomeVisitor().match('some_string')`` is a shortcut for
259 ``SomeVisitor().visit(some_grammar.match('some_string'))``.
261 """
262 return self._parse_or_match(text, pos, 'match')
264 # Internal convenience methods to help you write your own visitors:
266 def lift_child(self, node, children):
267 """Lift the sole child of ``node`` up to replace the node."""
268 first_child, = children
269 return first_child
271 # Private methods:
273 def _parse_or_match(self, text, pos, method_name):
274 """Execute a parse or match on the default grammar, followed by a
275 visitation.
277 Raise RuntimeError if there is no default grammar specified.
279 """
280 if not self.grammar:
281 raise RuntimeError(
282 "The {cls}.{method}() shortcut won't work because {cls} was "
283 "never associated with a specific " "grammar. Fill out its "
284 "`grammar` attribute, and try again.".format(
285 cls=self.__class__.__name__,
286 method=method_name))
287 return self.visit(getattr(self.grammar, method_name)(text, pos=pos))
290def rule(rule_string):
291 """Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it.
293 The following will arrange for the ``visit_digit`` method to receive the
294 results of the ``~"[0-9]"`` parse rule::
296 @rule('~"[0-9]"')
297 def visit_digit(self, node, visited_children):
298 ...
300 Notice that there is no "digit = " as part of the rule; that gets inferred
301 from the method name.
303 In cases where there is only one kind of visitor interested in a grammar,
304 using ``@rule`` saves you having to look back and forth between the visitor
305 and the grammar definition.
307 On an implementation level, all ``@rule`` rules get stitched together into
308 a :class:`~parsimonious.Grammar` that becomes the NodeVisitor's
309 :term:`default grammar`.
311 Typically, the choice of a default rule for this grammar is simple: whatever
312 ``@rule`` comes first in the class is the default. But the choice may become
313 surprising if you divide the ``@rule`` calls among subclasses. At the
314 moment, which method "comes first" is decided simply by comparing line
315 numbers, so whatever method is on the smallest-numbered line will be the
316 default. In a future release, this will change to pick the
317 first ``@rule`` call on the basemost class that has one. That way, a
318 subclass which does not override the default rule's ``visit_*`` method
319 won't unintentionally change which rule is the default.
321 """
322 def decorator(method):
323 method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it.
324 return method
325 return decorator