Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/pgen2/generator.py: 15%

2# Licensed to PSF under a Contributor Agreement.

4# Modifications:

5# Copyright David Halter and Contributors

6# Modifications are dual-licensed: MIT and PSF.

8"""

9This module defines the data structures used to represent a grammar.

11Specifying grammars in pgen is possible with this grammar::

13 grammar: (NEWLINE | rule)* ENDMARKER

14 rule: NAME ':' rhs NEWLINE

15 rhs: items ('|' items)*

16 items: item+

17 item: '[' rhs ']' | atom ['+' | '*']

18 atom: '(' rhs ')' | NAME | STRING

20This grammar is self-referencing.

22This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.

23Most of the code has been refactored to make it more Pythonic. Since this was a

24"copy" of the CPython Parser parser "pgen", there was some work needed to make

25it more readable. It should also be slightly faster than the original pgen2,

26because we made some optimizations.

27"""

29from ast import literal_eval

30from typing import TypeVar, Generic, Mapping, Sequence, Set, Union

32from parso.pgen2.grammar_parser import GrammarParser, NFAState

34_TokenTypeT = TypeVar("_TokenTypeT")

37class Grammar(Generic[_TokenTypeT]):

38 """

39 Once initialized, this class supplies the grammar tables for the

40 parsing engine implemented by parse.py. The parsing engine

41 accesses the instance variables directly.

43 The only important part in this parsers are dfas and transitions between

44 dfas.

45 """

47 def __init__(self,

48 start_nonterminal: str,

49 rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']],

50 reserved_syntax_strings: Mapping[str, 'ReservedString']):

51 self.nonterminal_to_dfas = rule_to_dfas

52 self.reserved_syntax_strings = reserved_syntax_strings

53 self.start_nonterminal = start_nonterminal

56class DFAPlan:

57 """

58 Plans are used for the parser to create stack nodes and do the proper

59 DFA state transitions.

60 """

61 def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []):

62 self.next_dfa = next_dfa

63 self.dfa_pushes = dfa_pushes

65 def __repr__(self):

66 return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)

69class DFAState(Generic[_TokenTypeT]):

70 """

71 The DFAState object is the core class for pretty much anything. DFAState

72 are the vertices of an ordered graph while arcs and transitions are the

73 edges.

75 Arcs are the initial edges, where most DFAStates are not connected and

76 transitions are then calculated to connect the DFA state machines that have

77 different nonterminals.

78 """

79 def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState):

80 assert isinstance(nfa_set, set)

81 assert isinstance(next(iter(nfa_set)), NFAState)

82 assert isinstance(final, NFAState)

83 self.from_rule = from_rule

84 self.nfa_set = nfa_set

85 # map from terminals/nonterminals to DFAState

86 self.arcs: Mapping[str, DFAState] = {}

87 # In an intermediary step we set these nonterminal arcs (which has the

88 # same structure as arcs). These don't contain terminals anymore.

89 self.nonterminal_arcs: Mapping[str, DFAState] = {}

91 # Transitions are basically the only thing that the parser is using

92 # with is_final. Everyting else is purely here to create a parser.

93 self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}

94 self.is_final = final in nfa_set

96 def add_arc(self, next_, label):

97 assert isinstance(label, str)

98 assert label not in self.arcs

99 assert isinstance(next_, DFAState)

100 self.arcs[label] = next_

101

102 def unifystate(self, old, new):

103 for label, next_ in self.arcs.items():

104 if next_ is old:

105 self.arcs[label] = new

106

107 def __eq__(self, other):

108 # Equality test -- ignore the nfa_set instance variable

109 assert isinstance(other, DFAState)

110 if self.is_final != other.is_final:

111 return False

112 # Can't just return self.arcs == other.arcs, because that

113 # would invoke this method recursively, with cycles...

114 if len(self.arcs) != len(other.arcs):

115 return False

116 for label, next_ in self.arcs.items():

117 if next_ is not other.arcs.get(label):

118 return False

119 return True

120

121 def __repr__(self):

122 return '<%s: %s is_final=%s>' % (

123 self.__class__.__name__, self.from_rule, self.is_final

124 )

125

126

127class ReservedString:

128 """

129 Most grammars will have certain keywords and operators that are mentioned

130 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).

131 This class basically is the former.

132 """

133

134 def __init__(self, value: str):

135 self.value = value

136

137 def __repr__(self):

138 return '%s(%s)' % (self.__class__.__name__, self.value)

139

140

141def _simplify_dfas(dfas):

142 """

143 This is not theoretically optimal, but works well enough.

144 Algorithm: repeatedly look for two states that have the same

145 set of arcs (same labels pointing to the same nodes) and

146 unify them, until things stop changing.

147

148 dfas is a list of DFAState instances

149 """

150 changes = True

151 while changes:

152 changes = False

153 for i, state_i in enumerate(dfas):

154 for j in range(i + 1, len(dfas)):

155 state_j = dfas[j]

156 if state_i == state_j:

157 del dfas[j]

158 for state in dfas:

159 state.unifystate(state_j, state_i)

160 changes = True

161 break

162

163

164def _make_dfas(start, finish):

165 """

166 Uses the powerset construction algorithm to create DFA states from sets of

167 NFA states.

168

169 Also does state reduction if some states are not needed.

170 """

171 # To turn an NFA into a DFA, we define the states of the DFA

172 # to correspond to *sets* of states of the NFA. Then do some

173 # state reduction.

174 assert isinstance(start, NFAState)

175 assert isinstance(finish, NFAState)

176

177 def addclosure(nfa_state, base_nfa_set):

178 assert isinstance(nfa_state, NFAState)

179 if nfa_state in base_nfa_set:

180 return

181 base_nfa_set.add(nfa_state)

182 for nfa_arc in nfa_state.arcs:

183 if nfa_arc.nonterminal_or_string is None:

184 addclosure(nfa_arc.next, base_nfa_set)

185

186 base_nfa_set = set()

187 addclosure(start, base_nfa_set)

188 states = [DFAState(start.from_rule, base_nfa_set, finish)]

189 for state in states: # NB states grows while we're iterating

190 arcs = {}

191 # Find state transitions and store them in arcs.

192 for nfa_state in state.nfa_set:

193 for nfa_arc in nfa_state.arcs:

194 if nfa_arc.nonterminal_or_string is not None:

195 nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())

196 addclosure(nfa_arc.next, nfa_set)

197

198 # Now create the dfa's with no None's in arcs anymore. All Nones have

199 # been eliminated and state transitions (arcs) are properly defined, we

200 # just need to create the dfa's.

201 for nonterminal_or_string, nfa_set in arcs.items():

202 for nested_state in states:

203 if nested_state.nfa_set == nfa_set:

204 # The DFA state already exists for this rule.

205 break

206 else:

207 nested_state = DFAState(start.from_rule, nfa_set, finish)

208 states.append(nested_state)

209

210 state.add_arc(nested_state, nonterminal_or_string)

211 return states # List of DFAState instances; first one is start

212

213

214def _dump_nfa(start, finish):

215 print("Dump of NFA for", start.from_rule)

216 todo = [start]

217 for i, state in enumerate(todo):

218 print(" State", i, state is finish and "(final)" or "")

219 for arc in state.arcs:

220 label, next_ = arc.nonterminal_or_string, arc.next

221 if next_ in todo:

222 j = todo.index(next_)

223 else:

224 j = len(todo)

225 todo.append(next_)

226 if label is None:

227 print(" -> %d" % j)

228 else:

229 print(" %s -> %d" % (label, j))

230

231

232def _dump_dfas(dfas):

233 print("Dump of DFA for", dfas[0].from_rule)

234 for i, state in enumerate(dfas):

235 print(" State", i, state.is_final and "(final)" or "")

236 for nonterminal, next_ in state.arcs.items():

237 print(" %s -> %d" % (nonterminal, dfas.index(next_)))

238

239

240def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar:

241 """

242 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for

243 at-least-once repetition, [] for optional parts, | for alternatives and ()

244 for grouping).

245

246 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its

247 own parser.

248 """

249 rule_to_dfas = {}

250 start_nonterminal = None

251 for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():

252 # _dump_nfa(nfa_a, nfa_z)

253 dfas = _make_dfas(nfa_a, nfa_z)

254 # _dump_dfas(dfas)

255 # oldlen = len(dfas)

256 _simplify_dfas(dfas)

257 # newlen = len(dfas)

258 rule_to_dfas[nfa_a.from_rule] = dfas

259 # print(nfa_a.from_rule, oldlen, newlen)

260

261 if start_nonterminal is None:

262 start_nonterminal = nfa_a.from_rule

263

264 reserved_strings: Mapping[str, ReservedString] = {}

265 for nonterminal, dfas in rule_to_dfas.items():

266 for dfa_state in dfas:

267 for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():

268 if terminal_or_nonterminal in rule_to_dfas:

269 dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa

270 else:

271 transition = _make_transition(

272 token_namespace,

273 reserved_strings,

274 terminal_or_nonterminal

275 )

276 dfa_state.transitions[transition] = DFAPlan(next_dfa)

277

278 _calculate_tree_traversal(rule_to_dfas)

279 return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) # type: ignore[arg-type]

280

281

282def _make_transition(token_namespace, reserved_syntax_strings, label):

283 """

284 Creates a reserved string ("if", "for", "*", ...) or returns the token type

285 (NUMBER, STRING, ...) for a given grammar terminal.

286 """

287 if label[0].isalpha():

288 # A named token (e.g. NAME, NUMBER, STRING)

289 return getattr(token_namespace, label)

290 else:

291 # Either a keyword or an operator

292 assert label[0] in ('"', "'"), label

293 assert not label.startswith('"""') and not label.startswith("'''")

294 value = literal_eval(label)

295 try:

296 return reserved_syntax_strings[value]

297 except KeyError:

298 r = reserved_syntax_strings[value] = ReservedString(value)

299 return r

300

301

302def _calculate_tree_traversal(nonterminal_to_dfas):

303 """

304 By this point we know how dfas can move around within a stack node, but we

305 don't know how we can add a new stack node (nonterminal transitions).

306 """

307 # Map from grammar rule (nonterminal) name to a set of tokens.

308 first_plans = {}

309

310 nonterminals = list(nonterminal_to_dfas.keys())

311 nonterminals.sort()

312 for nonterminal in nonterminals:

313 if nonterminal not in first_plans:

314 _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)

315

316 # Now that we have calculated the first terminals, we are sure that

317 # there is no left recursion.

318

319 for dfas in nonterminal_to_dfas.values():

320 for dfa_state in dfas:

321 transitions = dfa_state.transitions

322 for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():

323 for transition, pushes in first_plans[nonterminal].items():

324 if transition in transitions:

325 prev_plan = transitions[transition]

326 # Make sure these are sorted so that error messages are

327 # at least deterministic

328 choices = sorted([

329 (

330 prev_plan.dfa_pushes[0].from_rule

331 if prev_plan.dfa_pushes

332 else prev_plan.next_dfa.from_rule

333 ),

334 (

335 pushes[0].from_rule

336 if pushes else next_dfa.from_rule

337 ),

338 ])

339 raise ValueError(

340 "Rule %s is ambiguous; given a %s token, we "

341 "can't determine if we should evaluate %s or %s."

342 % (

343 (

344 dfa_state.from_rule,

345 transition,

346 ) + tuple(choices)

347 )

348 )

349 transitions[transition] = DFAPlan(next_dfa, pushes)

350

351

352def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):

353 """

354 Calculates the first plan in the first_plans dictionary for every given

355 nonterminal. This is going to be used to know when to create stack nodes.

356 """

357 dfas = nonterminal_to_dfas[nonterminal]

358 new_first_plans = {}

359 first_plans[nonterminal] = None # dummy to detect left recursion

360 # We only need to check the first dfa. All the following ones are not

361 # interesting to find first terminals.

362 state = dfas[0]

363 for transition, next_ in state.transitions.items():

364 # It's a string. We have finally found a possible first token.

365 new_first_plans[transition] = [next_.next_dfa]

366

367 for nonterminal2, next_ in state.nonterminal_arcs.items():

368 # It's a nonterminal and we have either a left recursion issue

369 # in the grammar or we have to recurse.

370 try:

371 first_plans2 = first_plans[nonterminal2]

372 except KeyError:

373 first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)

374 else:

375 if first_plans2 is None:

376 raise ValueError("left recursion for rule %r" % nonterminal)

377

378 for t, pushes in first_plans2.items():

379 new_first_plans[t] = [next_] + pushes

380

381 first_plans[nonterminal] = new_first_plans

382 return new_first_plans