Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/blib2to3/pgen2/parse.py: 97%
181 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
4"""Parser engine for the grammar tables generated by pgen.
6The grammar table must be loaded first.
8See Parser/parser.c in the Python distribution for additional info on
9how this parsing engine works.
11"""
12import copy
13from contextlib import contextmanager
15# Local imports
16from . import grammar, token, tokenize
17from typing import (
18 cast,
19 Any,
20 Optional,
21 Text,
22 Union,
23 Tuple,
24 Dict,
25 List,
26 Iterator,
27 Callable,
28 Set,
29 TYPE_CHECKING,
30)
31from blib2to3.pgen2.grammar import Grammar
32from blib2to3.pytree import convert, NL, Context, RawNode, Leaf, Node
34if TYPE_CHECKING:
35 from blib2to3.pgen2.driver import TokenProxy
38Results = Dict[Text, NL]
39Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
40DFA = List[List[Tuple[int, int]]]
41DFAS = Tuple[DFA, Dict[int, int]]
44def lam_sub(grammar: Grammar, node: RawNode) -> NL:
45 assert node[3] is not None
46 return Node(type=node[0], children=node[3], context=node[2])
49# A placeholder node, used when parser is backtracking.
50DUMMY_NODE = (-1, None, None, None)
53def stack_copy(
54 stack: List[Tuple[DFAS, int, RawNode]]
55) -> List[Tuple[DFAS, int, RawNode]]:
56 """Nodeless stack copy."""
57 return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]
60class Recorder:
61 def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
62 self.parser = parser
63 self._ilabels = ilabels
64 self.context = context # not really matter
66 self._dead_ilabels: Set[int] = set()
67 self._start_point = self.parser.stack
68 self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
70 @property
71 def ilabels(self) -> Set[int]:
72 return self._dead_ilabels.symmetric_difference(self._ilabels)
74 @contextmanager
75 def switch_to(self, ilabel: int) -> Iterator[None]:
76 with self.backtrack():
77 self.parser.stack = self._points[ilabel]
78 try:
79 yield
80 except ParseError:
81 self._dead_ilabels.add(ilabel)
82 finally:
83 self.parser.stack = self._start_point
85 @contextmanager
86 def backtrack(self) -> Iterator[None]:
87 """
88 Use the node-level invariant ones for basic parsing operations (push/pop/shift).
89 These still will operate on the stack; but they won't create any new nodes, or
90 modify the contents of any other existing nodes.
92 This saves us a ton of time when we are backtracking, since we
93 want to restore to the initial state as quick as possible, which
94 can only be done by having as little mutatations as possible.
95 """
96 is_backtracking = self.parser.is_backtracking
97 try:
98 self.parser.is_backtracking = True
99 yield
100 finally:
101 self.parser.is_backtracking = is_backtracking
103 def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None:
104 func: Callable[..., Any]
105 if raw:
106 func = self.parser._addtoken
107 else:
108 func = self.parser.addtoken
110 for ilabel in self.ilabels:
111 with self.switch_to(ilabel):
112 args = [tok_type, tok_val, self.context]
113 if raw:
114 args.insert(0, ilabel)
115 func(*args)
117 def determine_route(self, value: Optional[Text] = None, force: bool = False) -> Optional[int]:
118 alive_ilabels = self.ilabels
119 if len(alive_ilabels) == 0:
120 *_, most_successful_ilabel = self._dead_ilabels
121 raise ParseError("bad input", most_successful_ilabel, value, self.context)
123 ilabel, *rest = alive_ilabels
124 if force or not rest:
125 return ilabel
126 else:
127 return None
130class ParseError(Exception):
131 """Exception to signal the parser is stuck."""
133 def __init__(
134 self, msg: Text, type: Optional[int], value: Optional[Text], context: Context
135 ) -> None:
136 Exception.__init__(
137 self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
138 )
139 self.msg = msg
140 self.type = type
141 self.value = value
142 self.context = context
145class Parser(object):
146 """Parser engine.
148 The proper usage sequence is:
150 p = Parser(grammar, [converter]) # create instance
151 p.setup([start]) # prepare for parsing
152 <for each input token>:
153 if p.addtoken(...): # parse a token; may raise ParseError
154 break
155 root = p.rootnode # root of abstract syntax tree
157 A Parser instance may be reused by calling setup() repeatedly.
159 A Parser instance contains state pertaining to the current token
160 sequence, and should not be used concurrently by different threads
161 to parse separate token sequences.
163 See driver.py for how to get input tokens by tokenizing a file or
164 string.
166 Parsing is complete when addtoken() returns True; the root of the
167 abstract syntax tree can then be retrieved from the rootnode
168 instance variable. When a syntax error occurs, addtoken() raises
169 the ParseError exception. There is no error recovery; the parser
170 cannot be used after a syntax error was reported (but it can be
171 reinitialized by calling setup()).
173 """
175 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
176 """Constructor.
178 The grammar argument is a grammar.Grammar instance; see the
179 grammar module for more information.
181 The parser is not ready yet for parsing; you must call the
182 setup() method to get it started.
184 The optional convert argument is a function mapping concrete
185 syntax tree nodes to abstract syntax tree nodes. If not
186 given, no conversion is done and the syntax tree produced is
187 the concrete syntax tree. If given, it must be a function of
188 two arguments, the first being the grammar (a grammar.Grammar
189 instance), and the second being the concrete syntax tree node
190 to be converted. The syntax tree is converted from the bottom
191 up.
193 **post-note: the convert argument is ignored since for Black's
194 usage, convert will always be blib2to3.pytree.convert. Allowing
195 this to be dynamic hurts mypyc's ability to use early binding.
196 These docs are left for historical and informational value.
198 A concrete syntax tree node is a (type, value, context, nodes)
199 tuple, where type is the node type (a token or symbol number),
200 value is None for symbols and a string for tokens, context is
201 None or an opaque value used for error reporting (typically a
202 (lineno, offset) pair), and nodes is a list of children for
203 symbols, and None for tokens.
205 An abstract syntax tree node may be anything; this is entirely
206 up to the converter function.
208 """
209 self.grammar = grammar
210 # See note in docstring above. TL;DR this is ignored.
211 self.convert = convert or lam_sub
212 self.is_backtracking = False
214 def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
215 """Prepare for parsing.
217 This *must* be called before starting to parse.
219 The optional argument is an alternative start symbol; it
220 defaults to the grammar's start symbol.
222 You can use a Parser instance to parse any number of programs;
223 each time you call setup() the parser is reset to an initial
224 state determined by the (implicit or explicit) start symbol.
226 """
227 if start is None:
228 start = self.grammar.start
229 # Each stack entry is a tuple: (dfa, state, node).
230 # A node is a tuple: (type, value, context, children),
231 # where children is a list of nodes or None, and context may be None.
232 newnode: RawNode = (start, None, None, [])
233 stackentry = (self.grammar.dfas[start], 0, newnode)
234 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
235 self.rootnode: Optional[NL] = None
236 self.used_names: Set[str] = set()
237 self.proxy = proxy
239 def addtoken(self, type: int, value: Text, context: Context) -> bool:
240 """Add a token; return True iff this is the end of the program."""
241 # Map from token to label
242 ilabels = self.classify(type, value, context)
243 assert len(ilabels) >= 1
245 # If we have only one state to advance, we'll directly
246 # take it as is.
247 if len(ilabels) == 1:
248 [ilabel] = ilabels
249 return self._addtoken(ilabel, type, value, context)
251 # If there are multiple states which we can advance (only
252 # happen under soft-keywords), then we will try all of them
253 # in parallel and as soon as one state can reach further than
254 # the rest, we'll choose that one. This is a pretty hacky
255 # and hopefully temporary algorithm.
256 #
257 # For a more detailed explanation, check out this post:
258 # https://tree.science/what-the-backtracking.html
260 with self.proxy.release() as proxy:
261 counter, force = 0, False
262 recorder = Recorder(self, ilabels, context)
263 recorder.add_token(type, value, raw=True)
265 next_token_value = value
266 while recorder.determine_route(next_token_value) is None:
267 if not proxy.can_advance(counter):
268 force = True
269 break
271 next_token_type, next_token_value, *_ = proxy.eat(counter)
272 if next_token_type in (tokenize.COMMENT, tokenize.NL):
273 counter += 1
274 continue
276 if next_token_type == tokenize.OP:
277 next_token_type = grammar.opmap[next_token_value]
279 recorder.add_token(next_token_type, next_token_value)
280 counter += 1
282 ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
283 assert ilabel is not None
285 return self._addtoken(ilabel, type, value, context)
287 def _addtoken(self, ilabel: int, type: int, value: Text, context: Context) -> bool:
288 # Loop until the token is shifted; may raise exceptions
289 while True:
290 dfa, state, node = self.stack[-1]
291 states, first = dfa
292 arcs = states[state]
293 # Look for a state with this label
294 for i, newstate in arcs:
295 t = self.grammar.labels[i][0]
296 if t >= 256:
297 # See if it's a symbol and if we're in its first set
298 itsdfa = self.grammar.dfas[t]
299 itsstates, itsfirst = itsdfa
300 if ilabel in itsfirst:
301 # Push a symbol
302 self.push(t, itsdfa, newstate, context)
303 break # To continue the outer while loop
305 elif ilabel == i:
306 # Look it up in the list of labels
307 # Shift a token; we're done with it
308 self.shift(type, value, newstate, context)
309 # Pop while we are in an accept-only state
310 state = newstate
311 while states[state] == [(0, state)]:
312 self.pop()
313 if not self.stack:
314 # Done parsing!
315 return True
316 dfa, state, node = self.stack[-1]
317 states, first = dfa
318 # Done with this token
319 return False
321 else:
322 if (0, state) in arcs:
323 # An accepting state, pop it and try something else
324 self.pop()
325 if not self.stack:
326 # Done parsing, but another token is input
327 raise ParseError("too much input", type, value, context)
328 else:
329 # No success finding a transition
330 raise ParseError("bad input", type, value, context)
332 def classify(self, type: int, value: Text, context: Context) -> List[int]:
333 """Turn a token into a label. (Internal)
335 Depending on whether the value is a soft-keyword or not,
336 this function may return multiple labels to choose from."""
337 if type == token.NAME:
338 # Keep a listing of all used names
339 self.used_names.add(value)
340 # Check for reserved words
341 if value in self.grammar.keywords:
342 return [self.grammar.keywords[value]]
343 elif value in self.grammar.soft_keywords:
344 assert type in self.grammar.tokens
345 return [
346 self.grammar.soft_keywords[value],
347 self.grammar.tokens[type],
348 ]
350 ilabel = self.grammar.tokens.get(type)
351 if ilabel is None:
352 raise ParseError("bad token", type, value, context)
353 return [ilabel]
355 def shift(self, type: int, value: Text, newstate: int, context: Context) -> None:
356 """Shift a token. (Internal)"""
357 if self.is_backtracking:
358 dfa, state, _ = self.stack[-1]
359 self.stack[-1] = (dfa, newstate, DUMMY_NODE)
360 else:
361 dfa, state, node = self.stack[-1]
362 rawnode: RawNode = (type, value, context, None)
363 newnode = convert(self.grammar, rawnode)
364 assert node[-1] is not None
365 node[-1].append(newnode)
366 self.stack[-1] = (dfa, newstate, node)
368 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
369 """Push a nonterminal. (Internal)"""
370 if self.is_backtracking:
371 dfa, state, _ = self.stack[-1]
372 self.stack[-1] = (dfa, newstate, DUMMY_NODE)
373 self.stack.append((newdfa, 0, DUMMY_NODE))
374 else:
375 dfa, state, node = self.stack[-1]
376 newnode: RawNode = (type, None, context, [])
377 self.stack[-1] = (dfa, newstate, node)
378 self.stack.append((newdfa, 0, newnode))
380 def pop(self) -> None:
381 """Pop a nonterminal. (Internal)"""
382 if self.is_backtracking:
383 self.stack.pop()
384 else:
385 popdfa, popstate, popnode = self.stack.pop()
386 newnode = convert(self.grammar, popnode)
387 if self.stack:
388 dfa, state, node = self.stack[-1]
389 assert node[-1] is not None
390 node[-1].append(newnode)
391 else:
392 self.rootnode = newnode
393 self.rootnode.used_names = self.used_names