Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/blib2to3/pgen2/pgen.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

309 statements  

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4import os 

5from collections.abc import Iterator, Sequence 

6from typing import IO, Any, NoReturn, Optional, Union 

7 

8from blib2to3.pgen2 import grammar, token, tokenize 

9from blib2to3.pgen2.tokenize import TokenInfo 

10 

11Path = Union[str, "os.PathLike[str]"] 

12 

13 

14class PgenGrammar(grammar.Grammar): 

15 pass 

16 

17 

18class ParserGenerator: 

19 filename: Path 

20 stream: IO[str] 

21 generator: Iterator[TokenInfo] 

22 first: dict[str, Optional[dict[str, int]]] 

23 

24 def __init__(self, filename: Path, stream: Optional[IO[str]] = None) -> None: 

25 close_stream = None 

26 if stream is None: 

27 stream = open(filename, encoding="utf-8") 

28 close_stream = stream.close 

29 self.filename = filename 

30 self.generator = tokenize.tokenize(stream.read()) 

31 self.gettoken() # Initialize lookahead 

32 self.dfas, self.startsymbol = self.parse() 

33 if close_stream is not None: 

34 close_stream() 

35 self.first = {} # map from symbol name to set of tokens 

36 self.addfirstsets() 

37 

38 def make_grammar(self) -> PgenGrammar: 

39 c = PgenGrammar() 

40 names = list(self.dfas.keys()) 

41 names.sort() 

42 names.remove(self.startsymbol) 

43 names.insert(0, self.startsymbol) 

44 for name in names: 

45 i = 256 + len(c.symbol2number) 

46 c.symbol2number[name] = i 

47 c.number2symbol[i] = name 

48 for name in names: 

49 dfa = self.dfas[name] 

50 states = [] 

51 for state in dfa: 

52 arcs = [] 

53 for label, next in sorted(state.arcs.items()): 

54 arcs.append((self.make_label(c, label), dfa.index(next))) 

55 if state.isfinal: 

56 arcs.append((0, dfa.index(state))) 

57 states.append(arcs) 

58 c.states.append(states) 

59 c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) 

60 c.start = c.symbol2number[self.startsymbol] 

61 return c 

62 

63 def make_first(self, c: PgenGrammar, name: str) -> dict[int, int]: 

64 rawfirst = self.first[name] 

65 assert rawfirst is not None 

66 first = {} 

67 for label in sorted(rawfirst): 

68 ilabel = self.make_label(c, label) 

69 ##assert ilabel not in first # XXX failed on <> ... != 

70 first[ilabel] = 1 

71 return first 

72 

73 def make_label(self, c: PgenGrammar, label: str) -> int: 

74 # XXX Maybe this should be a method on a subclass of converter? 

75 ilabel = len(c.labels) 

76 if label[0].isalpha(): 

77 # Either a symbol name or a named token 

78 if label in c.symbol2number: 

79 # A symbol name (a non-terminal) 

80 if label in c.symbol2label: 

81 return c.symbol2label[label] 

82 else: 

83 c.labels.append((c.symbol2number[label], None)) 

84 c.symbol2label[label] = ilabel 

85 return ilabel 

86 else: 

87 # A named token (NAME, NUMBER, STRING) 

88 itoken = getattr(token, label, None) 

89 assert isinstance(itoken, int), label 

90 assert itoken in token.tok_name, label 

91 if itoken in c.tokens: 

92 return c.tokens[itoken] 

93 else: 

94 c.labels.append((itoken, None)) 

95 c.tokens[itoken] = ilabel 

96 return ilabel 

97 else: 

98 # Either a keyword or an operator 

99 assert label[0] in ('"', "'"), label 

100 value = eval(label) 

101 if value[0].isalpha(): 

102 if label[0] == '"': 

103 keywords = c.soft_keywords 

104 else: 

105 keywords = c.keywords 

106 

107 # A keyword 

108 if value in keywords: 

109 return keywords[value] 

110 else: 

111 c.labels.append((token.NAME, value)) 

112 keywords[value] = ilabel 

113 return ilabel 

114 else: 

115 # An operator (any non-numeric token) 

116 itoken = grammar.opmap[value] # Fails if unknown token 

117 if itoken in c.tokens: 

118 return c.tokens[itoken] 

119 else: 

120 c.labels.append((itoken, None)) 

121 c.tokens[itoken] = ilabel 

122 return ilabel 

123 

124 def addfirstsets(self) -> None: 

125 names = list(self.dfas.keys()) 

126 names.sort() 

127 for name in names: 

128 if name not in self.first: 

129 self.calcfirst(name) 

130 # print name, self.first[name].keys() 

131 

132 def calcfirst(self, name: str) -> None: 

133 dfa = self.dfas[name] 

134 self.first[name] = None # dummy to detect left recursion 

135 state = dfa[0] 

136 totalset: dict[str, int] = {} 

137 overlapcheck = {} 

138 for label in state.arcs: 

139 if label in self.dfas: 

140 if label in self.first: 

141 fset = self.first[label] 

142 if fset is None: 

143 raise ValueError(f"recursion for rule {name!r}") 

144 else: 

145 self.calcfirst(label) 

146 fset = self.first[label] 

147 assert fset is not None 

148 totalset.update(fset) 

149 overlapcheck[label] = fset 

150 else: 

151 totalset[label] = 1 

152 overlapcheck[label] = {label: 1} 

153 inverse: dict[str, str] = {} 

154 for label, itsfirst in overlapcheck.items(): 

155 for symbol in itsfirst: 

156 if symbol in inverse: 

157 raise ValueError( 

158 f"rule {name} is ambiguous; {symbol} is in the first sets of" 

159 f" {label} as well as {inverse[symbol]}" 

160 ) 

161 inverse[symbol] = label 

162 self.first[name] = totalset 

163 

164 def parse(self) -> tuple[dict[str, list["DFAState"]], str]: 

165 dfas = {} 

166 startsymbol: Optional[str] = None 

167 # MSTART: (NEWLINE | RULE)* ENDMARKER 

168 while self.type != token.ENDMARKER: 

169 while self.type == token.NEWLINE: 

170 self.gettoken() 

171 # RULE: NAME ':' RHS NEWLINE 

172 name = self.expect(token.NAME) 

173 self.expect(token.OP, ":") 

174 a, z = self.parse_rhs() 

175 self.expect(token.NEWLINE) 

176 # self.dump_nfa(name, a, z) 

177 dfa = self.make_dfa(a, z) 

178 # self.dump_dfa(name, dfa) 

179 # oldlen = len(dfa) 

180 self.simplify_dfa(dfa) 

181 # newlen = len(dfa) 

182 dfas[name] = dfa 

183 # print name, oldlen, newlen 

184 if startsymbol is None: 

185 startsymbol = name 

186 assert startsymbol is not None 

187 return dfas, startsymbol 

188 

189 def make_dfa(self, start: "NFAState", finish: "NFAState") -> list["DFAState"]: 

190 # To turn an NFA into a DFA, we define the states of the DFA 

191 # to correspond to *sets* of states of the NFA. Then do some 

192 # state reduction. Let's represent sets as dicts with 1 for 

193 # values. 

194 assert isinstance(start, NFAState) 

195 assert isinstance(finish, NFAState) 

196 

197 def closure(state: NFAState) -> dict[NFAState, int]: 

198 base: dict[NFAState, int] = {} 

199 addclosure(state, base) 

200 return base 

201 

202 def addclosure(state: NFAState, base: dict[NFAState, int]) -> None: 

203 assert isinstance(state, NFAState) 

204 if state in base: 

205 return 

206 base[state] = 1 

207 for label, next in state.arcs: 

208 if label is None: 

209 addclosure(next, base) 

210 

211 states = [DFAState(closure(start), finish)] 

212 for state in states: # NB states grows while we're iterating 

213 arcs: dict[str, dict[NFAState, int]] = {} 

214 for nfastate in state.nfaset: 

215 for label, next in nfastate.arcs: 

216 if label is not None: 

217 addclosure(next, arcs.setdefault(label, {})) 

218 for label, nfaset in sorted(arcs.items()): 

219 for st in states: 

220 if st.nfaset == nfaset: 

221 break 

222 else: 

223 st = DFAState(nfaset, finish) 

224 states.append(st) 

225 state.addarc(st, label) 

226 return states # List of DFAState instances; first one is start 

227 

228 def dump_nfa(self, name: str, start: "NFAState", finish: "NFAState") -> None: 

229 print("Dump of NFA for", name) 

230 todo = [start] 

231 for i, state in enumerate(todo): 

232 print(" State", i, state is finish and "(final)" or "") 

233 for label, next in state.arcs: 

234 if next in todo: 

235 j = todo.index(next) 

236 else: 

237 j = len(todo) 

238 todo.append(next) 

239 if label is None: 

240 print(f" -> {j}") 

241 else: 

242 print(f" {label} -> {j}") 

243 

244 def dump_dfa(self, name: str, dfa: Sequence["DFAState"]) -> None: 

245 print("Dump of DFA for", name) 

246 for i, state in enumerate(dfa): 

247 print(" State", i, state.isfinal and "(final)" or "") 

248 for label, next in sorted(state.arcs.items()): 

249 print(f" {label} -> {dfa.index(next)}") 

250 

251 def simplify_dfa(self, dfa: list["DFAState"]) -> None: 

252 # This is not theoretically optimal, but works well enough. 

253 # Algorithm: repeatedly look for two states that have the same 

254 # set of arcs (same labels pointing to the same nodes) and 

255 # unify them, until things stop changing. 

256 

257 # dfa is a list of DFAState instances 

258 changes = True 

259 while changes: 

260 changes = False 

261 for i, state_i in enumerate(dfa): 

262 for j in range(i + 1, len(dfa)): 

263 state_j = dfa[j] 

264 if state_i == state_j: 

265 # print " unify", i, j 

266 del dfa[j] 

267 for state in dfa: 

268 state.unifystate(state_j, state_i) 

269 changes = True 

270 break 

271 

272 def parse_rhs(self) -> tuple["NFAState", "NFAState"]: 

273 # RHS: ALT ('|' ALT)* 

274 a, z = self.parse_alt() 

275 if self.value != "|": 

276 return a, z 

277 else: 

278 aa = NFAState() 

279 zz = NFAState() 

280 aa.addarc(a) 

281 z.addarc(zz) 

282 while self.value == "|": 

283 self.gettoken() 

284 a, z = self.parse_alt() 

285 aa.addarc(a) 

286 z.addarc(zz) 

287 return aa, zz 

288 

289 def parse_alt(self) -> tuple["NFAState", "NFAState"]: 

290 # ALT: ITEM+ 

291 a, b = self.parse_item() 

292 while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): 

293 c, d = self.parse_item() 

294 b.addarc(c) 

295 b = d 

296 return a, b 

297 

298 def parse_item(self) -> tuple["NFAState", "NFAState"]: 

299 # ITEM: '[' RHS ']' | ATOM ['+' | '*'] 

300 if self.value == "[": 

301 self.gettoken() 

302 a, z = self.parse_rhs() 

303 self.expect(token.OP, "]") 

304 a.addarc(z) 

305 return a, z 

306 else: 

307 a, z = self.parse_atom() 

308 value = self.value 

309 if value not in ("+", "*"): 

310 return a, z 

311 self.gettoken() 

312 z.addarc(a) 

313 if value == "+": 

314 return a, z 

315 else: 

316 return a, a 

317 

318 def parse_atom(self) -> tuple["NFAState", "NFAState"]: 

319 # ATOM: '(' RHS ')' | NAME | STRING 

320 if self.value == "(": 

321 self.gettoken() 

322 a, z = self.parse_rhs() 

323 self.expect(token.OP, ")") 

324 return a, z 

325 elif self.type in (token.NAME, token.STRING): 

326 a = NFAState() 

327 z = NFAState() 

328 a.addarc(z, self.value) 

329 self.gettoken() 

330 return a, z 

331 else: 

332 self.raise_error( 

333 f"expected (...) or NAME or STRING, got {self.type}/{self.value}" 

334 ) 

335 

336 def expect(self, type: int, value: Optional[Any] = None) -> str: 

337 if self.type != type or (value is not None and self.value != value): 

338 self.raise_error(f"expected {type}/{value}, got {self.type}/{self.value}") 

339 value = self.value 

340 self.gettoken() 

341 return value 

342 

343 def gettoken(self) -> None: 

344 tup = next(self.generator) 

345 while tup[0] in (tokenize.COMMENT, tokenize.NL): 

346 tup = next(self.generator) 

347 self.type, self.value, self.begin, self.end, self.line = tup 

348 # print token.tok_name[self.type], repr(self.value) 

349 

350 def raise_error(self, msg: str) -> NoReturn: 

351 raise SyntaxError( 

352 msg, (str(self.filename), self.end[0], self.end[1], self.line) 

353 ) 

354 

355 

356class NFAState: 

357 arcs: list[tuple[Optional[str], "NFAState"]] 

358 

359 def __init__(self) -> None: 

360 self.arcs = [] # list of (label, NFAState) pairs 

361 

362 def addarc(self, next: "NFAState", label: Optional[str] = None) -> None: 

363 assert label is None or isinstance(label, str) 

364 assert isinstance(next, NFAState) 

365 self.arcs.append((label, next)) 

366 

367 

368class DFAState: 

369 nfaset: dict[NFAState, Any] 

370 isfinal: bool 

371 arcs: dict[str, "DFAState"] 

372 

373 def __init__(self, nfaset: dict[NFAState, Any], final: NFAState) -> None: 

374 assert isinstance(nfaset, dict) 

375 assert isinstance(next(iter(nfaset)), NFAState) 

376 assert isinstance(final, NFAState) 

377 self.nfaset = nfaset 

378 self.isfinal = final in nfaset 

379 self.arcs = {} # map from label to DFAState 

380 

381 def addarc(self, next: "DFAState", label: str) -> None: 

382 assert isinstance(label, str) 

383 assert label not in self.arcs 

384 assert isinstance(next, DFAState) 

385 self.arcs[label] = next 

386 

387 def unifystate(self, old: "DFAState", new: "DFAState") -> None: 

388 for label, next in self.arcs.items(): 

389 if next is old: 

390 self.arcs[label] = new 

391 

392 def __eq__(self, other: Any) -> bool: 

393 # Equality test -- ignore the nfaset instance variable 

394 assert isinstance(other, DFAState) 

395 if self.isfinal != other.isfinal: 

396 return False 

397 # Can't just return self.arcs == other.arcs, because that 

398 # would invoke this method recursively, with cycles... 

399 if len(self.arcs) != len(other.arcs): 

400 return False 

401 for label, next in self.arcs.items(): 

402 if next is not other.arcs.get(label): 

403 return False 

404 return True 

405 

406 __hash__: Any = None # For Py3 compatibility. 

407 

408 

409def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar: 

410 p = ParserGenerator(filename) 

411 return p.make_grammar()