Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/blib2to3/pgen2/parse.py: 97%

181 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:15 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4"""Parser engine for the grammar tables generated by pgen. 

5 

6The grammar table must be loaded first. 

7 

8See Parser/parser.c in the Python distribution for additional info on 

9how this parsing engine works. 

10 

11""" 

12import copy 

13from contextlib import contextmanager 

14 

15# Local imports 

16from . import grammar, token, tokenize 

17from typing import ( 

18 cast, 

19 Any, 

20 Optional, 

21 Text, 

22 Union, 

23 Tuple, 

24 Dict, 

25 List, 

26 Iterator, 

27 Callable, 

28 Set, 

29 TYPE_CHECKING, 

30) 

31from blib2to3.pgen2.grammar import Grammar 

32from blib2to3.pytree import convert, NL, Context, RawNode, Leaf, Node 

33 

34if TYPE_CHECKING: 

35 from blib2to3.pgen2.driver import TokenProxy 

36 

37 

38Results = Dict[Text, NL] 

39Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]] 

40DFA = List[List[Tuple[int, int]]] 

41DFAS = Tuple[DFA, Dict[int, int]] 

42 

43 

44def lam_sub(grammar: Grammar, node: RawNode) -> NL: 

45 assert node[3] is not None 

46 return Node(type=node[0], children=node[3], context=node[2]) 

47 

48 

49# A placeholder node, used when parser is backtracking. 

50DUMMY_NODE = (-1, None, None, None) 

51 

52 

53def stack_copy( 

54 stack: List[Tuple[DFAS, int, RawNode]] 

55) -> List[Tuple[DFAS, int, RawNode]]: 

56 """Nodeless stack copy.""" 

57 return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack] 

58 

59 

60class Recorder: 

61 def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None: 

62 self.parser = parser 

63 self._ilabels = ilabels 

64 self.context = context # not really matter 

65 

66 self._dead_ilabels: Set[int] = set() 

67 self._start_point = self.parser.stack 

68 self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels} 

69 

70 @property 

71 def ilabels(self) -> Set[int]: 

72 return self._dead_ilabels.symmetric_difference(self._ilabels) 

73 

74 @contextmanager 

75 def switch_to(self, ilabel: int) -> Iterator[None]: 

76 with self.backtrack(): 

77 self.parser.stack = self._points[ilabel] 

78 try: 

79 yield 

80 except ParseError: 

81 self._dead_ilabels.add(ilabel) 

82 finally: 

83 self.parser.stack = self._start_point 

84 

85 @contextmanager 

86 def backtrack(self) -> Iterator[None]: 

87 """ 

88 Use the node-level invariant ones for basic parsing operations (push/pop/shift). 

89 These still will operate on the stack; but they won't create any new nodes, or 

90 modify the contents of any other existing nodes. 

91 

92 This saves us a ton of time when we are backtracking, since we 

93 want to restore to the initial state as quick as possible, which 

94 can only be done by having as little mutatations as possible. 

95 """ 

96 is_backtracking = self.parser.is_backtracking 

97 try: 

98 self.parser.is_backtracking = True 

99 yield 

100 finally: 

101 self.parser.is_backtracking = is_backtracking 

102 

103 def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None: 

104 func: Callable[..., Any] 

105 if raw: 

106 func = self.parser._addtoken 

107 else: 

108 func = self.parser.addtoken 

109 

110 for ilabel in self.ilabels: 

111 with self.switch_to(ilabel): 

112 args = [tok_type, tok_val, self.context] 

113 if raw: 

114 args.insert(0, ilabel) 

115 func(*args) 

116 

117 def determine_route(self, value: Optional[Text] = None, force: bool = False) -> Optional[int]: 

118 alive_ilabels = self.ilabels 

119 if len(alive_ilabels) == 0: 

120 *_, most_successful_ilabel = self._dead_ilabels 

121 raise ParseError("bad input", most_successful_ilabel, value, self.context) 

122 

123 ilabel, *rest = alive_ilabels 

124 if force or not rest: 

125 return ilabel 

126 else: 

127 return None 

128 

129 

130class ParseError(Exception): 

131 """Exception to signal the parser is stuck.""" 

132 

133 def __init__( 

134 self, msg: Text, type: Optional[int], value: Optional[Text], context: Context 

135 ) -> None: 

136 Exception.__init__( 

137 self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context) 

138 ) 

139 self.msg = msg 

140 self.type = type 

141 self.value = value 

142 self.context = context 

143 

144 

145class Parser(object): 

146 """Parser engine. 

147 

148 The proper usage sequence is: 

149 

150 p = Parser(grammar, [converter]) # create instance 

151 p.setup([start]) # prepare for parsing 

152 <for each input token>: 

153 if p.addtoken(...): # parse a token; may raise ParseError 

154 break 

155 root = p.rootnode # root of abstract syntax tree 

156 

157 A Parser instance may be reused by calling setup() repeatedly. 

158 

159 A Parser instance contains state pertaining to the current token 

160 sequence, and should not be used concurrently by different threads 

161 to parse separate token sequences. 

162 

163 See driver.py for how to get input tokens by tokenizing a file or 

164 string. 

165 

166 Parsing is complete when addtoken() returns True; the root of the 

167 abstract syntax tree can then be retrieved from the rootnode 

168 instance variable. When a syntax error occurs, addtoken() raises 

169 the ParseError exception. There is no error recovery; the parser 

170 cannot be used after a syntax error was reported (but it can be 

171 reinitialized by calling setup()). 

172 

173 """ 

174 

175 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: 

176 """Constructor. 

177 

178 The grammar argument is a grammar.Grammar instance; see the 

179 grammar module for more information. 

180 

181 The parser is not ready yet for parsing; you must call the 

182 setup() method to get it started. 

183 

184 The optional convert argument is a function mapping concrete 

185 syntax tree nodes to abstract syntax tree nodes. If not 

186 given, no conversion is done and the syntax tree produced is 

187 the concrete syntax tree. If given, it must be a function of 

188 two arguments, the first being the grammar (a grammar.Grammar 

189 instance), and the second being the concrete syntax tree node 

190 to be converted. The syntax tree is converted from the bottom 

191 up. 

192 

193 **post-note: the convert argument is ignored since for Black's 

194 usage, convert will always be blib2to3.pytree.convert. Allowing 

195 this to be dynamic hurts mypyc's ability to use early binding. 

196 These docs are left for historical and informational value. 

197 

198 A concrete syntax tree node is a (type, value, context, nodes) 

199 tuple, where type is the node type (a token or symbol number), 

200 value is None for symbols and a string for tokens, context is 

201 None or an opaque value used for error reporting (typically a 

202 (lineno, offset) pair), and nodes is a list of children for 

203 symbols, and None for tokens. 

204 

205 An abstract syntax tree node may be anything; this is entirely 

206 up to the converter function. 

207 

208 """ 

209 self.grammar = grammar 

210 # See note in docstring above. TL;DR this is ignored. 

211 self.convert = convert or lam_sub 

212 self.is_backtracking = False 

213 

214 def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None: 

215 """Prepare for parsing. 

216 

217 This *must* be called before starting to parse. 

218 

219 The optional argument is an alternative start symbol; it 

220 defaults to the grammar's start symbol. 

221 

222 You can use a Parser instance to parse any number of programs; 

223 each time you call setup() the parser is reset to an initial 

224 state determined by the (implicit or explicit) start symbol. 

225 

226 """ 

227 if start is None: 

228 start = self.grammar.start 

229 # Each stack entry is a tuple: (dfa, state, node). 

230 # A node is a tuple: (type, value, context, children), 

231 # where children is a list of nodes or None, and context may be None. 

232 newnode: RawNode = (start, None, None, []) 

233 stackentry = (self.grammar.dfas[start], 0, newnode) 

234 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry] 

235 self.rootnode: Optional[NL] = None 

236 self.used_names: Set[str] = set() 

237 self.proxy = proxy 

238 

239 def addtoken(self, type: int, value: Text, context: Context) -> bool: 

240 """Add a token; return True iff this is the end of the program.""" 

241 # Map from token to label 

242 ilabels = self.classify(type, value, context) 

243 assert len(ilabels) >= 1 

244 

245 # If we have only one state to advance, we'll directly 

246 # take it as is. 

247 if len(ilabels) == 1: 

248 [ilabel] = ilabels 

249 return self._addtoken(ilabel, type, value, context) 

250 

251 # If there are multiple states which we can advance (only 

252 # happen under soft-keywords), then we will try all of them 

253 # in parallel and as soon as one state can reach further than 

254 # the rest, we'll choose that one. This is a pretty hacky 

255 # and hopefully temporary algorithm. 

256 # 

257 # For a more detailed explanation, check out this post: 

258 # https://tree.science/what-the-backtracking.html 

259 

260 with self.proxy.release() as proxy: 

261 counter, force = 0, False 

262 recorder = Recorder(self, ilabels, context) 

263 recorder.add_token(type, value, raw=True) 

264 

265 next_token_value = value 

266 while recorder.determine_route(next_token_value) is None: 

267 if not proxy.can_advance(counter): 

268 force = True 

269 break 

270 

271 next_token_type, next_token_value, *_ = proxy.eat(counter) 

272 if next_token_type in (tokenize.COMMENT, tokenize.NL): 

273 counter += 1 

274 continue 

275 

276 if next_token_type == tokenize.OP: 

277 next_token_type = grammar.opmap[next_token_value] 

278 

279 recorder.add_token(next_token_type, next_token_value) 

280 counter += 1 

281 

282 ilabel = cast(int, recorder.determine_route(next_token_value, force=force)) 

283 assert ilabel is not None 

284 

285 return self._addtoken(ilabel, type, value, context) 

286 

287 def _addtoken(self, ilabel: int, type: int, value: Text, context: Context) -> bool: 

288 # Loop until the token is shifted; may raise exceptions 

289 while True: 

290 dfa, state, node = self.stack[-1] 

291 states, first = dfa 

292 arcs = states[state] 

293 # Look for a state with this label 

294 for i, newstate in arcs: 

295 t = self.grammar.labels[i][0] 

296 if t >= 256: 

297 # See if it's a symbol and if we're in its first set 

298 itsdfa = self.grammar.dfas[t] 

299 itsstates, itsfirst = itsdfa 

300 if ilabel in itsfirst: 

301 # Push a symbol 

302 self.push(t, itsdfa, newstate, context) 

303 break # To continue the outer while loop 

304 

305 elif ilabel == i: 

306 # Look it up in the list of labels 

307 # Shift a token; we're done with it 

308 self.shift(type, value, newstate, context) 

309 # Pop while we are in an accept-only state 

310 state = newstate 

311 while states[state] == [(0, state)]: 

312 self.pop() 

313 if not self.stack: 

314 # Done parsing! 

315 return True 

316 dfa, state, node = self.stack[-1] 

317 states, first = dfa 

318 # Done with this token 

319 return False 

320 

321 else: 

322 if (0, state) in arcs: 

323 # An accepting state, pop it and try something else 

324 self.pop() 

325 if not self.stack: 

326 # Done parsing, but another token is input 

327 raise ParseError("too much input", type, value, context) 

328 else: 

329 # No success finding a transition 

330 raise ParseError("bad input", type, value, context) 

331 

332 def classify(self, type: int, value: Text, context: Context) -> List[int]: 

333 """Turn a token into a label. (Internal) 

334 

335 Depending on whether the value is a soft-keyword or not, 

336 this function may return multiple labels to choose from.""" 

337 if type == token.NAME: 

338 # Keep a listing of all used names 

339 self.used_names.add(value) 

340 # Check for reserved words 

341 if value in self.grammar.keywords: 

342 return [self.grammar.keywords[value]] 

343 elif value in self.grammar.soft_keywords: 

344 assert type in self.grammar.tokens 

345 return [ 

346 self.grammar.soft_keywords[value], 

347 self.grammar.tokens[type], 

348 ] 

349 

350 ilabel = self.grammar.tokens.get(type) 

351 if ilabel is None: 

352 raise ParseError("bad token", type, value, context) 

353 return [ilabel] 

354 

355 def shift(self, type: int, value: Text, newstate: int, context: Context) -> None: 

356 """Shift a token. (Internal)""" 

357 if self.is_backtracking: 

358 dfa, state, _ = self.stack[-1] 

359 self.stack[-1] = (dfa, newstate, DUMMY_NODE) 

360 else: 

361 dfa, state, node = self.stack[-1] 

362 rawnode: RawNode = (type, value, context, None) 

363 newnode = convert(self.grammar, rawnode) 

364 assert node[-1] is not None 

365 node[-1].append(newnode) 

366 self.stack[-1] = (dfa, newstate, node) 

367 

368 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None: 

369 """Push a nonterminal. (Internal)""" 

370 if self.is_backtracking: 

371 dfa, state, _ = self.stack[-1] 

372 self.stack[-1] = (dfa, newstate, DUMMY_NODE) 

373 self.stack.append((newdfa, 0, DUMMY_NODE)) 

374 else: 

375 dfa, state, node = self.stack[-1] 

376 newnode: RawNode = (type, None, context, []) 

377 self.stack[-1] = (dfa, newstate, node) 

378 self.stack.append((newdfa, 0, newnode)) 

379 

380 def pop(self) -> None: 

381 """Pop a nonterminal. (Internal)""" 

382 if self.is_backtracking: 

383 self.stack.pop() 

384 else: 

385 popdfa, popstate, popnode = self.stack.pop() 

386 newnode = convert(self.grammar, popnode) 

387 if self.stack: 

388 dfa, state, node = self.stack[-1] 

389 assert node[-1] is not None 

390 node[-1].append(newnode) 

391 else: 

392 self.rootnode = newnode 

393 self.rootnode.used_names = self.used_names