Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/pgen2/generator.py: 16%
159 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3#
4# Modifications:
5# Copyright David Halter and Contributors
6# Modifications are dual-licensed: MIT and PSF.
7# 99% of the code is different from pgen2, now.
8#
9# A fork of `parso.pgen2.generator`.
10# https://github.com/davidhalter/parso/blob/master/parso/pgen2/generator.py
11#
12# The following changes were made:
13# - Type stubs were directly applied.
14# pyre-unsafe
16"""
17This module defines the data structures used to represent a grammar.
19Specifying grammars in pgen is possible with this grammar::
21 grammar: (NEWLINE | rule)* ENDMARKER
22 rule: NAME ':' rhs NEWLINE
23 rhs: items ('|' items)*
24 items: item+
25 item: '[' rhs ']' | atom ['+' | '*']
26 atom: '(' rhs ')' | NAME | STRING
28This grammar is self-referencing.
30This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
31Most of the code has been refactored to make it more Pythonic. Since this was a
32"copy" of the CPython Parser parser "pgen", there was some work needed to make
33it more readable. It should also be slightly faster than the original pgen2,
34because we made some optimizations.
35"""
37from ast import literal_eval
38from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union
40from libcst._parser.parso.pgen2.grammar_parser import GrammarParser, NFAState
42_TokenTypeT = TypeVar("_TokenTypeT")
45class DFAPlan:
46 """
47 Plans are used for the parser to create stack nodes and do the proper
48 DFA state transitions.
49 """
51 def __init__(
52 self, next_dfa: "DFAState", dfa_pushes: Sequence["DFAState"] = []
53 ) -> None:
54 self.next_dfa = next_dfa
55 self.dfa_pushes = dfa_pushes
57 def __repr__(self) -> str:
58 return "%s(%s, %s)" % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
61class DFAState(Generic[_TokenTypeT]):
62 """
63 The DFAState object is the core class for pretty much anything. DFAState
64 are the vertices of an ordered graph while arcs and transitions are the
65 edges.
67 Arcs are the initial edges, where most DFAStates are not connected and
68 transitions are then calculated to connect the DFA state machines that have
69 different nonterminals.
70 """
72 def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState) -> None:
73 self.from_rule = from_rule
74 self.nfa_set = nfa_set
75 self.arcs: Mapping[
76 str, DFAState
77 ] = {} # map from terminals/nonterminals to DFAState
78 # In an intermediary step we set these nonterminal arcs (which has the
79 # same structure as arcs). These don't contain terminals anymore.
80 self.nonterminal_arcs: Mapping[str, DFAState] = {}
82 # Transitions are basically the only thing that the parser is using
83 # with is_final. Everyting else is purely here to create a parser.
84 self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}
85 self.is_final = final in nfa_set
87 def add_arc(self, next_, label):
88 assert isinstance(label, str)
89 assert label not in self.arcs
90 assert isinstance(next_, DFAState)
91 self.arcs[label] = next_
93 def unifystate(self, old, new):
94 for label, next_ in self.arcs.items():
95 if next_ is old:
96 self.arcs[label] = new
98 def __eq__(self, other):
99 # Equality test -- ignore the nfa_set instance variable
100 assert isinstance(other, DFAState)
101 if self.is_final != other.is_final:
102 return False
103 # Can't just return self.arcs == other.arcs, because that
104 # would invoke this method recursively, with cycles...
105 if len(self.arcs) != len(other.arcs):
106 return False
107 for label, next_ in self.arcs.items():
108 if next_ is not other.arcs.get(label):
109 return False
110 return True
112 def __repr__(self) -> str:
113 return "<%s: %s is_final=%s>" % (
114 self.__class__.__name__,
115 self.from_rule,
116 self.is_final,
117 )
120class ReservedString:
121 """
122 Most grammars will have certain keywords and operators that are mentioned
123 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
124 This class basically is the former.
125 """
127 def __init__(self, value: str) -> None:
128 self.value = value
130 def __repr__(self) -> str:
131 return "%s(%s)" % (self.__class__.__name__, self.value)
134class Grammar(Generic[_TokenTypeT]):
135 """
136 Once initialized, this class supplies the grammar tables for the
137 parsing engine implemented by parse.py. The parsing engine
138 accesses the instance variables directly.
140 The only important part in this parsers are dfas and transitions between
141 dfas.
142 """
144 def __init__(
145 self,
146 start_nonterminal: str,
147 rule_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]],
148 reserved_syntax_strings: Mapping[str, ReservedString],
149 ) -> None:
150 self.nonterminal_to_dfas = rule_to_dfas
151 self.reserved_syntax_strings = reserved_syntax_strings
152 self.start_nonterminal = start_nonterminal
155def _simplify_dfas(dfas):
156 """
157 This is not theoretically optimal, but works well enough.
158 Algorithm: repeatedly look for two states that have the same
159 set of arcs (same labels pointing to the same nodes) and
160 unify them, until things stop changing.
162 dfas is a list of DFAState instances
163 """
164 changes = True
165 while changes:
166 changes = False
167 for i, state_i in enumerate(dfas):
168 for j in range(i + 1, len(dfas)):
169 state_j = dfas[j]
170 if state_i == state_j:
171 # print " unify", i, j
172 del dfas[j]
173 for state in dfas:
174 state.unifystate(state_j, state_i)
175 changes = True
176 break
179def _make_dfas(start, finish):
180 """
181 Uses the powerset construction algorithm to create DFA states from sets of
182 NFA states.
184 Also does state reduction if some states are not needed.
185 """
186 # To turn an NFA into a DFA, we define the states of the DFA
187 # to correspond to *sets* of states of the NFA. Then do some
188 # state reduction.
189 assert isinstance(start, NFAState)
190 assert isinstance(finish, NFAState)
192 def addclosure(nfa_state, base_nfa_set):
193 assert isinstance(nfa_state, NFAState)
194 if nfa_state in base_nfa_set:
195 return
196 base_nfa_set.add(nfa_state)
197 for nfa_arc in nfa_state.arcs:
198 if nfa_arc.nonterminal_or_string is None:
199 addclosure(nfa_arc.next, base_nfa_set)
201 base_nfa_set = set()
202 addclosure(start, base_nfa_set)
203 states = [DFAState(start.from_rule, base_nfa_set, finish)]
204 for state in states: # NB states grows while we're iterating
205 arcs = {}
206 # Find state transitions and store them in arcs.
207 for nfa_state in state.nfa_set:
208 for nfa_arc in nfa_state.arcs:
209 if nfa_arc.nonterminal_or_string is not None:
210 nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
211 addclosure(nfa_arc.next, nfa_set)
213 # Now create the dfa's with no None's in arcs anymore. All Nones have
214 # been eliminated and state transitions (arcs) are properly defined, we
215 # just need to create the dfa's.
216 for nonterminal_or_string, nfa_set in arcs.items():
217 for nested_state in states:
218 if nested_state.nfa_set == nfa_set:
219 # The DFA state already exists for this rule.
220 break
221 else:
222 nested_state = DFAState(start.from_rule, nfa_set, finish)
223 states.append(nested_state)
225 state.add_arc(nested_state, nonterminal_or_string)
226 return states # List of DFAState instances; first one is start
229def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]:
230 """
231 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
232 at-least-once repetition, [] for optional parts, | for alternatives and ()
233 for grouping).
235 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
236 own parser.
237 """
238 rule_to_dfas = {}
239 start_nonterminal = None
240 for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
241 dfas = _make_dfas(nfa_a, nfa_z)
242 _simplify_dfas(dfas)
243 rule_to_dfas[nfa_a.from_rule] = dfas
245 if start_nonterminal is None:
246 start_nonterminal = nfa_a.from_rule
248 reserved_strings = {}
249 for nonterminal, dfas in rule_to_dfas.items():
250 for dfa_state in dfas:
251 for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
252 if terminal_or_nonterminal in rule_to_dfas:
253 dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
254 else:
255 transition = _make_transition(
256 token_namespace, reserved_strings, terminal_or_nonterminal
257 )
258 dfa_state.transitions[transition] = DFAPlan(next_dfa)
260 _calculate_tree_traversal(rule_to_dfas)
261 if start_nonterminal is None:
262 raise Exception("could not find starting nonterminal!")
263 return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
266def _make_transition(token_namespace, reserved_syntax_strings, label):
267 """
268 Creates a reserved string ("if", "for", "*", ...) or returns the token type
269 (NUMBER, STRING, ...) for a given grammar terminal.
270 """
271 if label[0].isalpha():
272 # A named token (e.g. NAME, NUMBER, STRING)
273 return getattr(token_namespace, label)
274 else:
275 # Either a keyword or an operator
276 assert label[0] in ('"', "'"), label
277 assert not label.startswith('"""') and not label.startswith("'''")
278 value = literal_eval(label)
279 try:
280 return reserved_syntax_strings[value]
281 except KeyError:
282 r = reserved_syntax_strings[value] = ReservedString(value)
283 return r
286def _calculate_tree_traversal(nonterminal_to_dfas):
287 """
288 By this point we know how dfas can move around within a stack node, but we
289 don't know how we can add a new stack node (nonterminal transitions).
290 """
291 # Map from grammar rule (nonterminal) name to a set of tokens.
292 first_plans = {}
294 nonterminals = list(nonterminal_to_dfas.keys())
295 nonterminals.sort()
296 for nonterminal in nonterminals:
297 if nonterminal not in first_plans:
298 _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
300 # Now that we have calculated the first terminals, we are sure that
301 # there is no left recursion.
303 for dfas in nonterminal_to_dfas.values():
304 for dfa_state in dfas:
305 transitions = dfa_state.transitions
306 for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
307 for transition, pushes in first_plans[nonterminal].items():
308 if transition in transitions:
309 prev_plan = transitions[transition]
310 # Make sure these are sorted so that error messages are
311 # at least deterministic
312 choices = sorted(
313 [
314 (
315 prev_plan.dfa_pushes[0].from_rule
316 if prev_plan.dfa_pushes
317 else prev_plan.next_dfa.from_rule
318 ),
319 (pushes[0].from_rule if pushes else next_dfa.from_rule),
320 ]
321 )
322 raise ValueError(
323 (
324 "Rule %s is ambiguous; given a %s token, we "
325 + "can't determine if we should evaluate %s or %s."
326 )
327 % ((dfa_state.from_rule, transition) + tuple(choices))
328 )
329 transitions[transition] = DFAPlan(next_dfa, pushes)
332def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
333 """
334 Calculates the first plan in the first_plans dictionary for every given
335 nonterminal. This is going to be used to know when to create stack nodes.
336 """
337 dfas = nonterminal_to_dfas[nonterminal]
338 new_first_plans = {}
339 first_plans[nonterminal] = None # dummy to detect left recursion
340 # We only need to check the first dfa. All the following ones are not
341 # interesting to find first terminals.
342 state = dfas[0]
343 for transition, next_ in state.transitions.items():
344 # It's a string. We have finally found a possible first token.
345 new_first_plans[transition] = [next_.next_dfa]
347 for nonterminal2, next_ in state.nonterminal_arcs.items():
348 # It's a nonterminal and we have either a left recursion issue
349 # in the grammar or we have to recurse.
350 try:
351 first_plans2 = first_plans[nonterminal2]
352 except KeyError:
353 first_plans2 = _calculate_first_plans(
354 nonterminal_to_dfas, first_plans, nonterminal2
355 )
356 else:
357 if first_plans2 is None:
358 raise ValueError("left recursion for rule %r" % nonterminal)
360 for t, pushes in first_plans2.items():
361 new_first_plans[t] = [next_] + pushes
363 first_plans[nonterminal] = new_first_plans
364 return new_first_plans