Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/

2# Licensed to PSF under a Contributor Agreement.

4# Modifications:

5# Copyright David Halter and Contributors

6# Modifications are dual-licensed: MIT and PSF.

7# 99% of the code is different from pgen2, now.

9# A fork of `parso.pgen2.generator`.

10# https://github.com/davidhalter/parso/blob/master/parso/pgen2/generator.py

11#

12# The following changes were made:

13# - Type stubs were directly applied.

14# pyre-unsafe

16"""

17This module defines the data structures used to represent a grammar.

19Specifying grammars in pgen is possible with this grammar::

21 grammar: (NEWLINE | rule)* ENDMARKER

22 rule: NAME ':' rhs NEWLINE

23 rhs: items ('|' items)*

24 items: item+

25 item: '[' rhs ']' | atom ['+' | '*']

26 atom: '(' rhs ')' | NAME | STRING

28This grammar is self-referencing.

30This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.

31Most of the code has been refactored to make it more Pythonic. Since this was a

32"copy" of the CPython Parser parser "pgen", there was some work needed to make

33it more readable. It should also be slightly faster than the original pgen2,

34because we made some optimizations.

35"""

37from ast import literal_eval

38from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union

40from libcst._parser.parso.pgen2.grammar_parser import GrammarParser, NFAState

42_TokenTypeT = TypeVar("_TokenTypeT")

45class DFAPlan:

46 """

47 Plans are used for the parser to create stack nodes and do the proper

48 DFA state transitions.

49 """

51 def __init__(

52 self, next_dfa: "DFAState", dfa_pushes: Sequence["DFAState"] = []

53 ) -> None:

54 self.next_dfa = next_dfa

55 self.dfa_pushes = dfa_pushes

57 def __repr__(self) -> str:

58 return "%s(%s, %s)" % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)

61class DFAState(Generic[_TokenTypeT]):

62 """

63 The DFAState object is the core class for pretty much anything. DFAState

64 are the vertices of an ordered graph while arcs and transitions are the

65 edges.

67 Arcs are the initial edges, where most DFAStates are not connected and

68 transitions are then calculated to connect the DFA state machines that have

69 different nonterminals.

70 """

72 def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState) -> None:

73 self.from_rule = from_rule

74 self.nfa_set = nfa_set

75 self.arcs: Mapping[

76 str, DFAState

77 ] = {} # map from terminals/nonterminals to DFAState

78 # In an intermediary step we set these nonterminal arcs (which has the

79 # same structure as arcs). These don't contain terminals anymore.

80 self.nonterminal_arcs: Mapping[str, DFAState] = {}

82 # Transitions are basically the only thing that the parser is using

83 # with is_final. Everyting else is purely here to create a parser.

84 self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}

85 self.is_final = final in nfa_set

87 def add_arc(self, next_, label):

88 assert isinstance(label, str)

89 assert label not in self.arcs

90 assert isinstance(next_, DFAState)

91 self.arcs[label] = next_

93 def unifystate(self, old, new):

94 for label, next_ in self.arcs.items():

95 if next_ is old:

96 self.arcs[label] = new

98 def __eq__(self, other):

99 # Equality test -- ignore the nfa_set instance variable

100 assert isinstance(other, DFAState)

101 if self.is_final != other.is_final:

102 return False

103 # Can't just return self.arcs == other.arcs, because that

104 # would invoke this method recursively, with cycles...

105 if len(self.arcs) != len(other.arcs):

106 return False

107 for label, next_ in self.arcs.items():

108 if next_ is not other.arcs.get(label):

109 return False

110 return True

111

112 def __repr__(self) -> str:

113 return "<%s: %s is_final=%s>" % (

114 self.__class__.__name__,

115 self.from_rule,

116 self.is_final,

117 )

118

119

120class ReservedString:

121 """

122 Most grammars will have certain keywords and operators that are mentioned

123 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).

124 This class basically is the former.

125 """

126

127 def __init__(self, value: str) -> None:

128 self.value = value

129

130 def __repr__(self) -> str:

131 return "%s(%s)" % (self.__class__.__name__, self.value)

132

133

134class Grammar(Generic[_TokenTypeT]):

135 """

136 Once initialized, this class supplies the grammar tables for the

137 parsing engine implemented by parse.py. The parsing engine

138 accesses the instance variables directly.

139

140 The only important part in this parsers are dfas and transitions between

141 dfas.

142 """

143

144 def __init__(

145 self,

146 start_nonterminal: str,

147 rule_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]],

148 reserved_syntax_strings: Mapping[str, ReservedString],

149 ) -> None:

150 self.nonterminal_to_dfas = rule_to_dfas

151 self.reserved_syntax_strings = reserved_syntax_strings

152 self.start_nonterminal = start_nonterminal

153

154

155def _simplify_dfas(dfas):

156 """

157 This is not theoretically optimal, but works well enough.

158 Algorithm: repeatedly look for two states that have the same

159 set of arcs (same labels pointing to the same nodes) and

160 unify them, until things stop changing.

161

162 dfas is a list of DFAState instances

163 """

164 changes = True

165 while changes:

166 changes = False

167 for i, state_i in enumerate(dfas):

168 for j in range(i + 1, len(dfas)):

169 state_j = dfas[j]

170 if state_i == state_j:

171 # print " unify", i, j

172 del dfas[j]

173 for state in dfas:

174 state.unifystate(state_j, state_i)

175 changes = True

176 break

177

178

179def _make_dfas(start, finish):

180 """

181 Uses the powerset construction algorithm to create DFA states from sets of

182 NFA states.

183

184 Also does state reduction if some states are not needed.

185 """

186 # To turn an NFA into a DFA, we define the states of the DFA

187 # to correspond to *sets* of states of the NFA. Then do some

188 # state reduction.

189 assert isinstance(start, NFAState)

190 assert isinstance(finish, NFAState)

191

192 def addclosure(nfa_state, base_nfa_set):

193 assert isinstance(nfa_state, NFAState)

194 if nfa_state in base_nfa_set:

195 return

196 base_nfa_set.add(nfa_state)

197 for nfa_arc in nfa_state.arcs:

198 if nfa_arc.nonterminal_or_string is None:

199 addclosure(nfa_arc.next, base_nfa_set)

200

201 base_nfa_set = set()

202 addclosure(start, base_nfa_set)

203 states = [DFAState(start.from_rule, base_nfa_set, finish)]

204 for state in states: # NB states grows while we're iterating

205 arcs = {}

206 # Find state transitions and store them in arcs.

207 for nfa_state in state.nfa_set:

208 for nfa_arc in nfa_state.arcs:

209 if nfa_arc.nonterminal_or_string is not None:

210 nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())

211 addclosure(nfa_arc.next, nfa_set)

212

213 # Now create the dfa's with no None's in arcs anymore. All Nones have

214 # been eliminated and state transitions (arcs) are properly defined, we

215 # just need to create the dfa's.

216 for nonterminal_or_string, nfa_set in arcs.items():

217 for nested_state in states:

218 if nested_state.nfa_set == nfa_set:

219 # The DFA state already exists for this rule.

220 break

221 else:

222 nested_state = DFAState(start.from_rule, nfa_set, finish)

223 states.append(nested_state)

224

225 state.add_arc(nested_state, nonterminal_or_string)

226 return states # List of DFAState instances; first one is start

227

228

229def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]:

230 """

231 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for

232 at-least-once repetition, [] for optional parts, | for alternatives and ()

233 for grouping).

234

235 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its

236 own parser.

237 """

238 rule_to_dfas = {}

239 start_nonterminal = None

240 for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():

241 dfas = _make_dfas(nfa_a, nfa_z)

242 _simplify_dfas(dfas)

243 rule_to_dfas[nfa_a.from_rule] = dfas

244

245 if start_nonterminal is None:

246 start_nonterminal = nfa_a.from_rule

247

248 reserved_strings = {}

249 for nonterminal, dfas in rule_to_dfas.items():

250 for dfa_state in dfas:

251 for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():

252 if terminal_or_nonterminal in rule_to_dfas:

253 dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa

254 else:

255 transition = _make_transition(

256 token_namespace, reserved_strings, terminal_or_nonterminal

257 )

258 dfa_state.transitions[transition] = DFAPlan(next_dfa)

259

260 _calculate_tree_traversal(rule_to_dfas)

261 if start_nonterminal is None:

262 raise Exception("could not find starting nonterminal!")

263 return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)

264

265

266def _make_transition(token_namespace, reserved_syntax_strings, label):

267 """

268 Creates a reserved string ("if", "for", "*", ...) or returns the token type

269 (NUMBER, STRING, ...) for a given grammar terminal.

270 """

271 if label[0].isalpha():

272 # A named token (e.g. NAME, NUMBER, STRING)

273 return getattr(token_namespace, label)

274 else:

275 # Either a keyword or an operator

276 assert label[0] in ('"', "'"), label

277 assert not label.startswith('"""') and not label.startswith("'''")

278 value = literal_eval(label)

279 try:

280 return reserved_syntax_strings[value]

281 except KeyError:

282 r = reserved_syntax_strings[value] = ReservedString(value)

283 return r

284

285

286def _calculate_tree_traversal(nonterminal_to_dfas):

287 """

288 By this point we know how dfas can move around within a stack node, but we

289 don't know how we can add a new stack node (nonterminal transitions).

290 """

291 # Map from grammar rule (nonterminal) name to a set of tokens.

292 first_plans = {}

293

294 nonterminals = list(nonterminal_to_dfas.keys())

295 nonterminals.sort()

296 for nonterminal in nonterminals:

297 if nonterminal not in first_plans:

298 _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)

299

300 # Now that we have calculated the first terminals, we are sure that

301 # there is no left recursion.

302

303 for dfas in nonterminal_to_dfas.values():

304 for dfa_state in dfas:

305 transitions = dfa_state.transitions

306 for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():

307 for transition, pushes in first_plans[nonterminal].items():

308 if transition in transitions:

309 prev_plan = transitions[transition]

310 # Make sure these are sorted so that error messages are

311 # at least deterministic

312 choices = sorted(

313 [

314 (

315 prev_plan.dfa_pushes[0].from_rule

316 if prev_plan.dfa_pushes

317 else prev_plan.next_dfa.from_rule

318 ),

319 (pushes[0].from_rule if pushes else next_dfa.from_rule),

320 ]

321 )

322 raise ValueError(

323 (

324 "Rule %s is ambiguous; given a %s token, we "

325 + "can't determine if we should evaluate %s or %s."

326 )

327 % ((dfa_state.from_rule, transition) + tuple(choices))

328 )

329 transitions[transition] = DFAPlan(next_dfa, pushes)

330

331

332def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):

333 """

334 Calculates the first plan in the first_plans dictionary for every given

335 nonterminal. This is going to be used to know when to create stack nodes.

336 """

337 dfas = nonterminal_to_dfas[nonterminal]

338 new_first_plans = {}

339 first_plans[nonterminal] = None # dummy to detect left recursion

340 # We only need to check the first dfa. All the following ones are not

341 # interesting to find first terminals.

342 state = dfas[0]

343 for transition, next_ in state.transitions.items():

344 # It's a string. We have finally found a possible first token.

345 new_first_plans[transition] = [next_.next_dfa]

346

347 for nonterminal2, next_ in state.nonterminal_arcs.items():

348 # It's a nonterminal and we have either a left recursion issue

349 # in the grammar or we have to recurse.

350 try:

351 first_plans2 = first_plans[nonterminal2]

352 except KeyError:

353 first_plans2 = _calculate_first_plans(

354 nonterminal_to_dfas, first_plans, nonterminal2

355 )

356 else:

357 if first_plans2 is None:

358 raise ValueError("left recursion for rule %r" % nonterminal)

359

360 for t, pushes in first_plans2.items():

361 new_first_plans[t] = [next_] + pushes

362

363 first_plans[nonterminal] = new_first_plans

364 return new_first_plans

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/parso/pgen2/generator.py: 16%

159 statements