Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parsimonious/nodes.py: 72%

82 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:15 +0000

1"""Nodes that make up parse trees 

2 

3Parsing spits out a tree of these, which you can then tell to walk itself and 

4spit out a useful value. Or you can walk it yourself; the structural attributes 

5are public. 

6 

7""" 

8# TODO: If this is slow, think about using cElementTree or something. 

9from inspect import isfunction 

10from sys import version_info, exc_info 

11 

12from parsimonious.exceptions import VisitationError, UndefinedLabel 

13 

14 

15class Node(object): 

16 """A parse tree node 

17 

18 Consider these immutable once constructed. As a side effect of a 

19 memory-saving strategy in the cache, multiple references to a single 

20 ``Node`` might be returned in a single parse tree. So, if you start 

21 messing with one, you'll see surprising parallel changes pop up elsewhere. 

22 

23 My philosophy is that parse trees (and their nodes) should be 

24 representation-agnostic. That is, they shouldn't get all mixed up with what 

25 the final rendered form of a wiki page (or the intermediate representation 

26 of a programming language, or whatever) is going to be: you should be able 

27 to parse once and render several representations from the tree, one after 

28 another. 

29 

30 """ 

31 # I tried making this subclass list, but it got ugly. I had to construct 

32 # invalid ones and patch them up later, and there were other problems. 

33 __slots__ = ['expr', # The expression that generated me 

34 'full_text', # The full text fed to the parser 

35 'start', # The position in the text where that expr started matching 

36 'end', # The position after start where the expr first didn't 

37 # match. [start:end] follow Python slice conventions. 

38 'children'] # List of child parse tree nodes 

39 

40 def __init__(self, expr, full_text, start, end, children=None): 

41 self.expr = expr 

42 self.full_text = full_text 

43 self.start = start 

44 self.end = end 

45 self.children = children or [] 

46 

47 @property 

48 def expr_name(self): 

49 # backwards compatibility 

50 return self.expr.name 

51 

52 def __iter__(self): 

53 """Support looping over my children and doing tuple unpacks on me. 

54 

55 It can be very handy to unpack nodes in arg lists; see 

56 :class:`PegVisitor` for an example. 

57 

58 """ 

59 return iter(self.children) 

60 

61 @property 

62 def text(self): 

63 """Return the text this node matched.""" 

64 return self.full_text[self.start:self.end] 

65 

66 # From here down is just stuff for testing and debugging. 

67 

68 def prettily(self, error=None): 

69 """Return a unicode, pretty-printed representation of me. 

70 

71 :arg error: The node to highlight because an error occurred there 

72 

73 """ 

74 # TODO: If a Node appears multiple times in the tree, we'll point to 

75 # them all. Whoops. 

76 def indent(text): 

77 return '\n'.join((' ' + line) for line in text.splitlines()) 

78 ret = [u'<%s%s matching "%s">%s' % ( 

79 self.__class__.__name__, 

80 (' called "%s"' % self.expr_name) if self.expr_name else '', 

81 self.text, 

82 ' <-- *** We were here. ***' if error is self else '')] 

83 for n in self: 

84 ret.append(indent(n.prettily(error=error))) 

85 return '\n'.join(ret) 

86 

87 def __str__(self): 

88 """Return a compact, human-readable representation of me.""" 

89 return self.prettily() 

90 

91 def __eq__(self, other): 

92 """Support by-value deep comparison with other nodes for testing.""" 

93 if not isinstance(other, Node): 

94 return NotImplemented 

95 

96 return (self.expr == other.expr and 

97 self.full_text == other.full_text and 

98 self.start == other.start and 

99 self.end == other.end and 

100 self.children == other.children) 

101 

102 def __ne__(self, other): 

103 return not self == other 

104 

105 def __repr__(self, top_level=True): 

106 """Return a bit of code (though not an expression) that will recreate 

107 me.""" 

108 # repr() of unicode flattens everything out to ASCII, so we don't need 

109 # to explicitly encode things afterward. 

110 ret = ["s = %r" % self.full_text] if top_level else [] 

111 ret.append("%s(%r, s, %s, %s%s)" % ( 

112 self.__class__.__name__, 

113 self.expr, 

114 self.start, 

115 self.end, 

116 (', children=[%s]' % 

117 ', '.join([c.__repr__(top_level=False) for c in self.children])) 

118 if self.children else '')) 

119 return '\n'.join(ret) 

120 

121 

122class RegexNode(Node): 

123 """Node returned from a ``Regex`` expression 

124 

125 Grants access to the ``re.Match`` object, in case you want to access 

126 capturing groups, etc. 

127 

128 """ 

129 __slots__ = ['match'] 

130 

131 

132class RuleDecoratorMeta(type): 

133 def __new__(metaclass, name, bases, namespace): 

134 def unvisit(name): 

135 """Remove any leading "visit_" from a method name.""" 

136 return name[6:] if name.startswith('visit_') else name 

137 

138 methods = [v for k, v in namespace.items() if 

139 hasattr(v, '_rule') and isfunction(v)] 

140 if methods: 

141 from parsimonious.grammar import Grammar # circular import dodge 

142 

143 methods.sort(key=(lambda x: x.func_code.co_firstlineno) 

144 if version_info[0] < 3 else 

145 (lambda x: x.__code__.co_firstlineno)) 

146 # Possible enhancement: once we get the Grammar extensibility story 

147 # solidified, we can have @rules *add* to the default grammar 

148 # rather than pave over it. 

149 namespace['grammar'] = Grammar( 

150 '\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__), 

151 expr=m._rule) 

152 for m in methods)) 

153 return super(RuleDecoratorMeta, 

154 metaclass).__new__(metaclass, name, bases, namespace) 

155 

156 

157class NodeVisitor(object, metaclass=RuleDecoratorMeta): 

158 """A shell for writing things that turn parse trees into something useful 

159 

160 Performs a depth-first traversal of an AST. Subclass this, add methods for 

161 each expr you care about, instantiate, and call 

162 ``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API 

163 is very similar to that of ``ast.NodeVisitor``. 

164 

165 These could easily all be static methods, but that would add at least as 

166 much weirdness at the call site as the ``()`` for instantiation. And this 

167 way, we support subclasses that require state: options, for example, or a 

168 symbol table constructed from a programming language's AST. 

169 

170 We never transform the parse tree in place, because... 

171 

172 * There are likely multiple references to the same ``Node`` object in a 

173 parse tree, and changes to one reference would surprise you elsewhere. 

174 * It makes it impossible to report errors: you'd end up with the "error" 

175 arrow pointing someplace in a half-transformed mishmash of nodes--and 

176 that's assuming you're even transforming the tree into another tree. 

177 Heaven forbid you're making it into a string or something else. 

178 

179 """ 

180 

181 #: The :term:`default grammar`: the one recommended for use with this 

182 #: visitor. If you populate this, you will be able to call 

183 #: :meth:`NodeVisitor.parse()` as a shortcut. 

184 grammar = None 

185 

186 #: Classes of exceptions you actually intend to raise during visitation 

187 #: and which should propagate out of the visitor. These will not be 

188 #: wrapped in a VisitationError when they arise. 

189 unwrapped_exceptions = () 

190 

191 # TODO: If we need to optimize this, we can go back to putting subclasses 

192 # in charge of visiting children; they know when not to bother. Or we can 

193 # mark nodes as not descent-worthy in the grammar. 

194 def visit(self, node): 

195 """Walk a parse tree, transforming it into another representation. 

196 

197 Recursively descend a parse tree, dispatching to the method named after 

198 the rule in the :class:`~parsimonious.grammar.Grammar` that produced 

199 each node. If, for example, a rule was... :: 

200 

201 bold = '<b>' 

202 

203 ...the ``visit_bold()`` method would be called. It is your 

204 responsibility to subclass :class:`NodeVisitor` and implement those 

205 methods. 

206 

207 """ 

208 method = getattr(self, 'visit_' + node.expr_name, self.generic_visit) 

209 

210 # Call that method, and show where in the tree it failed if it blows 

211 # up. 

212 try: 

213 return method(node, [self.visit(n) for n in node]) 

214 except (VisitationError, UndefinedLabel): 

215 # Don't catch and re-wrap already-wrapped exceptions. 

216 raise 

217 except Exception as exc: 

218 # implentors may define exception classes that should not be 

219 # wrapped. 

220 if isinstance(exc, self.unwrapped_exceptions): 

221 raise 

222 # Catch any exception, and tack on a parse tree so it's easier to 

223 # see where it went wrong. 

224 exc_class = type(exc) 

225 raise VisitationError(exc, exc_class, node) from exc 

226 

227 def generic_visit(self, node, visited_children): 

228 """Default visitor method 

229 

230 :arg node: The node we're visiting 

231 :arg visited_children: The results of visiting the children of that 

232 node, in a list 

233 

234 I'm not sure there's an implementation of this that makes sense across 

235 all (or even most) use cases, so we leave it to subclasses to implement 

236 for now. 

237 

238 """ 

239 raise NotImplementedError('No visitor method was defined for this expression: %s' % 

240 node.expr.as_rule()) 

241 

242 # Convenience methods: 

243 

244 def parse(self, text, pos=0): 

245 """Parse some text with this Visitor's default grammar and return the 

246 result of visiting it. 

247 

248 ``SomeVisitor().parse('some_string')`` is a shortcut for 

249 ``SomeVisitor().visit(some_grammar.parse('some_string'))``. 

250 

251 """ 

252 return self._parse_or_match(text, pos, 'parse') 

253 

254 def match(self, text, pos=0): 

255 """Parse and visit some text with this Visitor's default grammar, but 

256 don't insist on parsing all the way to the end. 

257 

258 ``SomeVisitor().match('some_string')`` is a shortcut for 

259 ``SomeVisitor().visit(some_grammar.match('some_string'))``. 

260 

261 """ 

262 return self._parse_or_match(text, pos, 'match') 

263 

264 # Internal convenience methods to help you write your own visitors: 

265 

266 def lift_child(self, node, children): 

267 """Lift the sole child of ``node`` up to replace the node.""" 

268 first_child, = children 

269 return first_child 

270 

271 # Private methods: 

272 

273 def _parse_or_match(self, text, pos, method_name): 

274 """Execute a parse or match on the default grammar, followed by a 

275 visitation. 

276 

277 Raise RuntimeError if there is no default grammar specified. 

278 

279 """ 

280 if not self.grammar: 

281 raise RuntimeError( 

282 "The {cls}.{method}() shortcut won't work because {cls} was " 

283 "never associated with a specific " "grammar. Fill out its " 

284 "`grammar` attribute, and try again.".format( 

285 cls=self.__class__.__name__, 

286 method=method_name)) 

287 return self.visit(getattr(self.grammar, method_name)(text, pos=pos)) 

288 

289 

290def rule(rule_string): 

291 """Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it. 

292 

293 The following will arrange for the ``visit_digit`` method to receive the 

294 results of the ``~"[0-9]"`` parse rule:: 

295 

296 @rule('~"[0-9]"') 

297 def visit_digit(self, node, visited_children): 

298 ... 

299 

300 Notice that there is no "digit = " as part of the rule; that gets inferred 

301 from the method name. 

302 

303 In cases where there is only one kind of visitor interested in a grammar, 

304 using ``@rule`` saves you having to look back and forth between the visitor 

305 and the grammar definition. 

306 

307 On an implementation level, all ``@rule`` rules get stitched together into 

308 a :class:`~parsimonious.Grammar` that becomes the NodeVisitor's 

309 :term:`default grammar`. 

310 

311 Typically, the choice of a default rule for this grammar is simple: whatever 

312 ``@rule`` comes first in the class is the default. But the choice may become 

313 surprising if you divide the ``@rule`` calls among subclasses. At the 

314 moment, which method "comes first" is decided simply by comparing line 

315 numbers, so whatever method is on the smallest-numbered line will be the 

316 default. In a future release, this will change to pick the 

317 first ``@rule`` call on the basemost class that has one. That way, a 

318 subclass which does not override the default rule's ``visit_*`` method 

319 won't unintentionally change which rule is the default. 

320 

321 """ 

322 def decorator(method): 

323 method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it. 

324 return method 

325 return decorator