Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/pgen2/generator.py: 15%

180 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7 

8""" 

9This module defines the data structures used to represent a grammar. 

10 

11Specifying grammars in pgen is possible with this grammar:: 

12 

13 grammar: (NEWLINE | rule)* ENDMARKER 

14 rule: NAME ':' rhs NEWLINE 

15 rhs: items ('|' items)* 

16 items: item+ 

17 item: '[' rhs ']' | atom ['+' | '*'] 

18 atom: '(' rhs ')' | NAME | STRING 

19 

20This grammar is self-referencing. 

21 

22This parser generator (pgen2) was created by Guido Rossum and used for lib2to3. 

23Most of the code has been refactored to make it more Pythonic. Since this was a 

24"copy" of the CPython Parser parser "pgen", there was some work needed to make 

25it more readable. It should also be slightly faster than the original pgen2, 

26because we made some optimizations. 

27""" 

28 

29from ast import literal_eval 

30from typing import TypeVar, Generic, Mapping, Sequence, Set, Union 

31 

32from parso.pgen2.grammar_parser import GrammarParser, NFAState 

33 

34_TokenTypeT = TypeVar("_TokenTypeT") 

35 

36 

37class Grammar(Generic[_TokenTypeT]): 

38 """ 

39 Once initialized, this class supplies the grammar tables for the 

40 parsing engine implemented by parse.py. The parsing engine 

41 accesses the instance variables directly. 

42 

43 The only important part in this parsers are dfas and transitions between 

44 dfas. 

45 """ 

46 

47 def __init__(self, 

48 start_nonterminal: str, 

49 rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']], 

50 reserved_syntax_strings: Mapping[str, 'ReservedString']): 

51 self.nonterminal_to_dfas = rule_to_dfas 

52 self.reserved_syntax_strings = reserved_syntax_strings 

53 self.start_nonterminal = start_nonterminal 

54 

55 

56class DFAPlan: 

57 """ 

58 Plans are used for the parser to create stack nodes and do the proper 

59 DFA state transitions. 

60 """ 

61 def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []): 

62 self.next_dfa = next_dfa 

63 self.dfa_pushes = dfa_pushes 

64 

65 def __repr__(self): 

66 return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) 

67 

68 

69class DFAState(Generic[_TokenTypeT]): 

70 """ 

71 The DFAState object is the core class for pretty much anything. DFAState 

72 are the vertices of an ordered graph while arcs and transitions are the 

73 edges. 

74 

75 Arcs are the initial edges, where most DFAStates are not connected and 

76 transitions are then calculated to connect the DFA state machines that have 

77 different nonterminals. 

78 """ 

79 def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState): 

80 assert isinstance(nfa_set, set) 

81 assert isinstance(next(iter(nfa_set)), NFAState) 

82 assert isinstance(final, NFAState) 

83 self.from_rule = from_rule 

84 self.nfa_set = nfa_set 

85 # map from terminals/nonterminals to DFAState 

86 self.arcs: Mapping[str, DFAState] = {} 

87 # In an intermediary step we set these nonterminal arcs (which has the 

88 # same structure as arcs). These don't contain terminals anymore. 

89 self.nonterminal_arcs: Mapping[str, DFAState] = {} 

90 

91 # Transitions are basically the only thing that the parser is using 

92 # with is_final. Everyting else is purely here to create a parser. 

93 self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {} 

94 self.is_final = final in nfa_set 

95 

96 def add_arc(self, next_, label): 

97 assert isinstance(label, str) 

98 assert label not in self.arcs 

99 assert isinstance(next_, DFAState) 

100 self.arcs[label] = next_ 

101 

102 def unifystate(self, old, new): 

103 for label, next_ in self.arcs.items(): 

104 if next_ is old: 

105 self.arcs[label] = new 

106 

107 def __eq__(self, other): 

108 # Equality test -- ignore the nfa_set instance variable 

109 assert isinstance(other, DFAState) 

110 if self.is_final != other.is_final: 

111 return False 

112 # Can't just return self.arcs == other.arcs, because that 

113 # would invoke this method recursively, with cycles... 

114 if len(self.arcs) != len(other.arcs): 

115 return False 

116 for label, next_ in self.arcs.items(): 

117 if next_ is not other.arcs.get(label): 

118 return False 

119 return True 

120 

121 def __repr__(self): 

122 return '<%s: %s is_final=%s>' % ( 

123 self.__class__.__name__, self.from_rule, self.is_final 

124 ) 

125 

126 

127class ReservedString: 

128 """ 

129 Most grammars will have certain keywords and operators that are mentioned 

130 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER). 

131 This class basically is the former. 

132 """ 

133 

134 def __init__(self, value: str): 

135 self.value = value 

136 

137 def __repr__(self): 

138 return '%s(%s)' % (self.__class__.__name__, self.value) 

139 

140 

141def _simplify_dfas(dfas): 

142 """ 

143 This is not theoretically optimal, but works well enough. 

144 Algorithm: repeatedly look for two states that have the same 

145 set of arcs (same labels pointing to the same nodes) and 

146 unify them, until things stop changing. 

147 

148 dfas is a list of DFAState instances 

149 """ 

150 changes = True 

151 while changes: 

152 changes = False 

153 for i, state_i in enumerate(dfas): 

154 for j in range(i + 1, len(dfas)): 

155 state_j = dfas[j] 

156 if state_i == state_j: 

157 del dfas[j] 

158 for state in dfas: 

159 state.unifystate(state_j, state_i) 

160 changes = True 

161 break 

162 

163 

164def _make_dfas(start, finish): 

165 """ 

166 Uses the powerset construction algorithm to create DFA states from sets of 

167 NFA states. 

168 

169 Also does state reduction if some states are not needed. 

170 """ 

171 # To turn an NFA into a DFA, we define the states of the DFA 

172 # to correspond to *sets* of states of the NFA. Then do some 

173 # state reduction. 

174 assert isinstance(start, NFAState) 

175 assert isinstance(finish, NFAState) 

176 

177 def addclosure(nfa_state, base_nfa_set): 

178 assert isinstance(nfa_state, NFAState) 

179 if nfa_state in base_nfa_set: 

180 return 

181 base_nfa_set.add(nfa_state) 

182 for nfa_arc in nfa_state.arcs: 

183 if nfa_arc.nonterminal_or_string is None: 

184 addclosure(nfa_arc.next, base_nfa_set) 

185 

186 base_nfa_set = set() 

187 addclosure(start, base_nfa_set) 

188 states = [DFAState(start.from_rule, base_nfa_set, finish)] 

189 for state in states: # NB states grows while we're iterating 

190 arcs = {} 

191 # Find state transitions and store them in arcs. 

192 for nfa_state in state.nfa_set: 

193 for nfa_arc in nfa_state.arcs: 

194 if nfa_arc.nonterminal_or_string is not None: 

195 nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set()) 

196 addclosure(nfa_arc.next, nfa_set) 

197 

198 # Now create the dfa's with no None's in arcs anymore. All Nones have 

199 # been eliminated and state transitions (arcs) are properly defined, we 

200 # just need to create the dfa's. 

201 for nonterminal_or_string, nfa_set in arcs.items(): 

202 for nested_state in states: 

203 if nested_state.nfa_set == nfa_set: 

204 # The DFA state already exists for this rule. 

205 break 

206 else: 

207 nested_state = DFAState(start.from_rule, nfa_set, finish) 

208 states.append(nested_state) 

209 

210 state.add_arc(nested_state, nonterminal_or_string) 

211 return states # List of DFAState instances; first one is start 

212 

213 

214def _dump_nfa(start, finish): 

215 print("Dump of NFA for", start.from_rule) 

216 todo = [start] 

217 for i, state in enumerate(todo): 

218 print(" State", i, state is finish and "(final)" or "") 

219 for arc in state.arcs: 

220 label, next_ = arc.nonterminal_or_string, arc.next 

221 if next_ in todo: 

222 j = todo.index(next_) 

223 else: 

224 j = len(todo) 

225 todo.append(next_) 

226 if label is None: 

227 print(" -> %d" % j) 

228 else: 

229 print(" %s -> %d" % (label, j)) 

230 

231 

232def _dump_dfas(dfas): 

233 print("Dump of DFA for", dfas[0].from_rule) 

234 for i, state in enumerate(dfas): 

235 print(" State", i, state.is_final and "(final)" or "") 

236 for nonterminal, next_ in state.arcs.items(): 

237 print(" %s -> %d" % (nonterminal, dfas.index(next_))) 

238 

239 

240def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar: 

241 """ 

242 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for 

243 at-least-once repetition, [] for optional parts, | for alternatives and () 

244 for grouping). 

245 

246 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its 

247 own parser. 

248 """ 

249 rule_to_dfas = {} 

250 start_nonterminal = None 

251 for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse(): 

252 # _dump_nfa(nfa_a, nfa_z) 

253 dfas = _make_dfas(nfa_a, nfa_z) 

254 # _dump_dfas(dfas) 

255 # oldlen = len(dfas) 

256 _simplify_dfas(dfas) 

257 # newlen = len(dfas) 

258 rule_to_dfas[nfa_a.from_rule] = dfas 

259 # print(nfa_a.from_rule, oldlen, newlen) 

260 

261 if start_nonterminal is None: 

262 start_nonterminal = nfa_a.from_rule 

263 

264 reserved_strings: Mapping[str, ReservedString] = {} 

265 for nonterminal, dfas in rule_to_dfas.items(): 

266 for dfa_state in dfas: 

267 for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): 

268 if terminal_or_nonterminal in rule_to_dfas: 

269 dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa 

270 else: 

271 transition = _make_transition( 

272 token_namespace, 

273 reserved_strings, 

274 terminal_or_nonterminal 

275 ) 

276 dfa_state.transitions[transition] = DFAPlan(next_dfa) 

277 

278 _calculate_tree_traversal(rule_to_dfas) 

279 return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) # type: ignore[arg-type] 

280 

281 

282def _make_transition(token_namespace, reserved_syntax_strings, label): 

283 """ 

284 Creates a reserved string ("if", "for", "*", ...) or returns the token type 

285 (NUMBER, STRING, ...) for a given grammar terminal. 

286 """ 

287 if label[0].isalpha(): 

288 # A named token (e.g. NAME, NUMBER, STRING) 

289 return getattr(token_namespace, label) 

290 else: 

291 # Either a keyword or an operator 

292 assert label[0] in ('"', "'"), label 

293 assert not label.startswith('"""') and not label.startswith("'''") 

294 value = literal_eval(label) 

295 try: 

296 return reserved_syntax_strings[value] 

297 except KeyError: 

298 r = reserved_syntax_strings[value] = ReservedString(value) 

299 return r 

300 

301 

302def _calculate_tree_traversal(nonterminal_to_dfas): 

303 """ 

304 By this point we know how dfas can move around within a stack node, but we 

305 don't know how we can add a new stack node (nonterminal transitions). 

306 """ 

307 # Map from grammar rule (nonterminal) name to a set of tokens. 

308 first_plans = {} 

309 

310 nonterminals = list(nonterminal_to_dfas.keys()) 

311 nonterminals.sort() 

312 for nonterminal in nonterminals: 

313 if nonterminal not in first_plans: 

314 _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal) 

315 

316 # Now that we have calculated the first terminals, we are sure that 

317 # there is no left recursion. 

318 

319 for dfas in nonterminal_to_dfas.values(): 

320 for dfa_state in dfas: 

321 transitions = dfa_state.transitions 

322 for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): 

323 for transition, pushes in first_plans[nonterminal].items(): 

324 if transition in transitions: 

325 prev_plan = transitions[transition] 

326 # Make sure these are sorted so that error messages are 

327 # at least deterministic 

328 choices = sorted([ 

329 ( 

330 prev_plan.dfa_pushes[0].from_rule 

331 if prev_plan.dfa_pushes 

332 else prev_plan.next_dfa.from_rule 

333 ), 

334 ( 

335 pushes[0].from_rule 

336 if pushes else next_dfa.from_rule 

337 ), 

338 ]) 

339 raise ValueError( 

340 "Rule %s is ambiguous; given a %s token, we " 

341 "can't determine if we should evaluate %s or %s." 

342 % ( 

343 ( 

344 dfa_state.from_rule, 

345 transition, 

346 ) + tuple(choices) 

347 ) 

348 ) 

349 transitions[transition] = DFAPlan(next_dfa, pushes) 

350 

351 

352def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): 

353 """ 

354 Calculates the first plan in the first_plans dictionary for every given 

355 nonterminal. This is going to be used to know when to create stack nodes. 

356 """ 

357 dfas = nonterminal_to_dfas[nonterminal] 

358 new_first_plans = {} 

359 first_plans[nonterminal] = None # dummy to detect left recursion 

360 # We only need to check the first dfa. All the following ones are not 

361 # interesting to find first terminals. 

362 state = dfas[0] 

363 for transition, next_ in state.transitions.items(): 

364 # It's a string. We have finally found a possible first token. 

365 new_first_plans[transition] = [next_.next_dfa] 

366 

367 for nonterminal2, next_ in state.nonterminal_arcs.items(): 

368 # It's a nonterminal and we have either a left recursion issue 

369 # in the grammar or we have to recurse. 

370 try: 

371 first_plans2 = first_plans[nonterminal2] 

372 except KeyError: 

373 first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2) 

374 else: 

375 if first_plans2 is None: 

376 raise ValueError("left recursion for rule %r" % nonterminal) 

377 

378 for t, pushes in first_plans2.items(): 

379 new_first_plans[t] = [next_] + pushes 

380 

381 first_plans[nonterminal] = new_first_plans 

382 return new_first_plans