Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/blib2to3/pgen2/parse.py: 97%

2# Licensed to PSF under a Contributor Agreement.

4"""Parser engine for the grammar tables generated by pgen.

6The grammar table must be loaded first.

8See Parser/parser.c in the Python distribution for additional info on

9how this parsing engine works.

11"""

12import copy

13from contextlib import contextmanager

15# Local imports

16from . import grammar, token, tokenize

17from typing import (

18 cast,

19 Any,

20 Optional,

21 Text,

22 Union,

23 Tuple,

24 Dict,

25 List,

26 Iterator,

27 Callable,

28 Set,

29 TYPE_CHECKING,

30)

31from blib2to3.pgen2.grammar import Grammar

32from blib2to3.pytree import convert, NL, Context, RawNode, Leaf, Node

34if TYPE_CHECKING:

35 from blib2to3.pgen2.driver import TokenProxy

38Results = Dict[Text, NL]

39Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]

40DFA = List[List[Tuple[int, int]]]

41DFAS = Tuple[DFA, Dict[int, int]]

44def lam_sub(grammar: Grammar, node: RawNode) -> NL:

45 assert node[3] is not None

46 return Node(type=node[0], children=node[3], context=node[2])

49# A placeholder node, used when parser is backtracking.

50DUMMY_NODE = (-1, None, None, None)

53def stack_copy(

54 stack: List[Tuple[DFAS, int, RawNode]]

55) -> List[Tuple[DFAS, int, RawNode]]:

56 """Nodeless stack copy."""

57 return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]

60class Recorder:

61 def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:

62 self.parser = parser

63 self._ilabels = ilabels

64 self.context = context # not really matter

66 self._dead_ilabels: Set[int] = set()

67 self._start_point = self.parser.stack

68 self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}

70 @property

71 def ilabels(self) -> Set[int]:

72 return self._dead_ilabels.symmetric_difference(self._ilabels)

74 @contextmanager

75 def switch_to(self, ilabel: int) -> Iterator[None]:

76 with self.backtrack():

77 self.parser.stack = self._points[ilabel]

78 try:

79 yield

80 except ParseError:

81 self._dead_ilabels.add(ilabel)

82 finally:

83 self.parser.stack = self._start_point

85 @contextmanager

86 def backtrack(self) -> Iterator[None]:

87 """

88 Use the node-level invariant ones for basic parsing operations (push/pop/shift).

89 These still will operate on the stack; but they won't create any new nodes, or

90 modify the contents of any other existing nodes.

92 This saves us a ton of time when we are backtracking, since we

93 want to restore to the initial state as quick as possible, which

94 can only be done by having as little mutatations as possible.

95 """

96 is_backtracking = self.parser.is_backtracking

97 try:

98 self.parser.is_backtracking = True

99 yield

100 finally:

101 self.parser.is_backtracking = is_backtracking

102

103 def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None:

104 func: Callable[..., Any]

105 if raw:

106 func = self.parser._addtoken

107 else:

108 func = self.parser.addtoken

109

110 for ilabel in self.ilabels:

111 with self.switch_to(ilabel):

112 args = [tok_type, tok_val, self.context]

113 if raw:

114 args.insert(0, ilabel)

115 func(*args)

116

117 def determine_route(self, value: Optional[Text] = None, force: bool = False) -> Optional[int]:

118 alive_ilabels = self.ilabels

119 if len(alive_ilabels) == 0:

120 *_, most_successful_ilabel = self._dead_ilabels

121 raise ParseError("bad input", most_successful_ilabel, value, self.context)

122

123 ilabel, *rest = alive_ilabels

124 if force or not rest:

125 return ilabel

126 else:

127 return None

128

129

130class ParseError(Exception):

131 """Exception to signal the parser is stuck."""

132

133 def __init__(

134 self, msg: Text, type: Optional[int], value: Optional[Text], context: Context

135 ) -> None:

136 Exception.__init__(

137 self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)

138 )

139 self.msg = msg

140 self.type = type

141 self.value = value

142 self.context = context

143

144

145class Parser(object):

146 """Parser engine.

147

148 The proper usage sequence is:

149

150 p = Parser(grammar, [converter]) # create instance

151 p.setup([start]) # prepare for parsing

152 <for each input token>:

153 if p.addtoken(...): # parse a token; may raise ParseError

154 break

155 root = p.rootnode # root of abstract syntax tree

156

157 A Parser instance may be reused by calling setup() repeatedly.

158

159 A Parser instance contains state pertaining to the current token

160 sequence, and should not be used concurrently by different threads

161 to parse separate token sequences.

162

163 See driver.py for how to get input tokens by tokenizing a file or

164 string.

165

166 Parsing is complete when addtoken() returns True; the root of the

167 abstract syntax tree can then be retrieved from the rootnode

168 instance variable. When a syntax error occurs, addtoken() raises

169 the ParseError exception. There is no error recovery; the parser

170 cannot be used after a syntax error was reported (but it can be

171 reinitialized by calling setup()).

172

173 """

174

175 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:

176 """Constructor.

177

178 The grammar argument is a grammar.Grammar instance; see the

179 grammar module for more information.

180

181 The parser is not ready yet for parsing; you must call the

182 setup() method to get it started.

183

184 The optional convert argument is a function mapping concrete

185 syntax tree nodes to abstract syntax tree nodes. If not

186 given, no conversion is done and the syntax tree produced is

187 the concrete syntax tree. If given, it must be a function of

188 two arguments, the first being the grammar (a grammar.Grammar

189 instance), and the second being the concrete syntax tree node

190 to be converted. The syntax tree is converted from the bottom

191 up.

192

193 **post-note: the convert argument is ignored since for Black's

194 usage, convert will always be blib2to3.pytree.convert. Allowing

195 this to be dynamic hurts mypyc's ability to use early binding.

196 These docs are left for historical and informational value.

197

198 A concrete syntax tree node is a (type, value, context, nodes)

199 tuple, where type is the node type (a token or symbol number),

200 value is None for symbols and a string for tokens, context is

201 None or an opaque value used for error reporting (typically a

202 (lineno, offset) pair), and nodes is a list of children for

203 symbols, and None for tokens.

204

205 An abstract syntax tree node may be anything; this is entirely

206 up to the converter function.

207

208 """

209 self.grammar = grammar

210 # See note in docstring above. TL;DR this is ignored.

211 self.convert = convert or lam_sub

212 self.is_backtracking = False

213

214 def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:

215 """Prepare for parsing.

216

217 This *must* be called before starting to parse.

218

219 The optional argument is an alternative start symbol; it

220 defaults to the grammar's start symbol.

221

222 You can use a Parser instance to parse any number of programs;

223 each time you call setup() the parser is reset to an initial

224 state determined by the (implicit or explicit) start symbol.

225

226 """

227 if start is None:

228 start = self.grammar.start

229 # Each stack entry is a tuple: (dfa, state, node).

230 # A node is a tuple: (type, value, context, children),

231 # where children is a list of nodes or None, and context may be None.

232 newnode: RawNode = (start, None, None, [])

233 stackentry = (self.grammar.dfas[start], 0, newnode)

234 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]

235 self.rootnode: Optional[NL] = None

236 self.used_names: Set[str] = set()

237 self.proxy = proxy

238

239 def addtoken(self, type: int, value: Text, context: Context) -> bool:

240 """Add a token; return True iff this is the end of the program."""

241 # Map from token to label

242 ilabels = self.classify(type, value, context)

243 assert len(ilabels) >= 1

244

245 # If we have only one state to advance, we'll directly

246 # take it as is.

247 if len(ilabels) == 1:

248 [ilabel] = ilabels

249 return self._addtoken(ilabel, type, value, context)

250

251 # If there are multiple states which we can advance (only

252 # happen under soft-keywords), then we will try all of them

253 # in parallel and as soon as one state can reach further than

254 # the rest, we'll choose that one. This is a pretty hacky

255 # and hopefully temporary algorithm.

256 #

257 # For a more detailed explanation, check out this post:

258 # https://tree.science/what-the-backtracking.html

259

260 with self.proxy.release() as proxy:

261 counter, force = 0, False

262 recorder = Recorder(self, ilabels, context)

263 recorder.add_token(type, value, raw=True)

264

265 next_token_value = value

266 while recorder.determine_route(next_token_value) is None:

267 if not proxy.can_advance(counter):

268 force = True

269 break

270

271 next_token_type, next_token_value, *_ = proxy.eat(counter)

272 if next_token_type in (tokenize.COMMENT, tokenize.NL):

273 counter += 1

274 continue

275

276 if next_token_type == tokenize.OP:

277 next_token_type = grammar.opmap[next_token_value]

278

279 recorder.add_token(next_token_type, next_token_value)

280 counter += 1

281

282 ilabel = cast(int, recorder.determine_route(next_token_value, force=force))

283 assert ilabel is not None

284

285 return self._addtoken(ilabel, type, value, context)

286

287 def _addtoken(self, ilabel: int, type: int, value: Text, context: Context) -> bool:

288 # Loop until the token is shifted; may raise exceptions

289 while True:

290 dfa, state, node = self.stack[-1]

291 states, first = dfa

292 arcs = states[state]

293 # Look for a state with this label

294 for i, newstate in arcs:

295 t = self.grammar.labels[i][0]

296 if t >= 256:

297 # See if it's a symbol and if we're in its first set

298 itsdfa = self.grammar.dfas[t]

299 itsstates, itsfirst = itsdfa

300 if ilabel in itsfirst:

301 # Push a symbol

302 self.push(t, itsdfa, newstate, context)

303 break # To continue the outer while loop

304

305 elif ilabel == i:

306 # Look it up in the list of labels

307 # Shift a token; we're done with it

308 self.shift(type, value, newstate, context)

309 # Pop while we are in an accept-only state

310 state = newstate

311 while states[state] == [(0, state)]:

312 self.pop()

313 if not self.stack:

314 # Done parsing!

315 return True

316 dfa, state, node = self.stack[-1]

317 states, first = dfa

318 # Done with this token

319 return False

320

321 else:

322 if (0, state) in arcs:

323 # An accepting state, pop it and try something else

324 self.pop()

325 if not self.stack:

326 # Done parsing, but another token is input

327 raise ParseError("too much input", type, value, context)

328 else:

329 # No success finding a transition

330 raise ParseError("bad input", type, value, context)

331

332 def classify(self, type: int, value: Text, context: Context) -> List[int]:

333 """Turn a token into a label. (Internal)

334

335 Depending on whether the value is a soft-keyword or not,

336 this function may return multiple labels to choose from."""

337 if type == token.NAME:

338 # Keep a listing of all used names

339 self.used_names.add(value)

340 # Check for reserved words

341 if value in self.grammar.keywords:

342 return [self.grammar.keywords[value]]

343 elif value in self.grammar.soft_keywords:

344 assert type in self.grammar.tokens

345 return [

346 self.grammar.soft_keywords[value],

347 self.grammar.tokens[type],

348 ]

349

350 ilabel = self.grammar.tokens.get(type)

351 if ilabel is None:

352 raise ParseError("bad token", type, value, context)

353 return [ilabel]

354

355 def shift(self, type: int, value: Text, newstate: int, context: Context) -> None:

356 """Shift a token. (Internal)"""

357 if self.is_backtracking:

358 dfa, state, _ = self.stack[-1]

359 self.stack[-1] = (dfa, newstate, DUMMY_NODE)

360 else:

361 dfa, state, node = self.stack[-1]

362 rawnode: RawNode = (type, value, context, None)

363 newnode = convert(self.grammar, rawnode)

364 assert node[-1] is not None

365 node[-1].append(newnode)

366 self.stack[-1] = (dfa, newstate, node)

367

368 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:

369 """Push a nonterminal. (Internal)"""

370 if self.is_backtracking:

371 dfa, state, _ = self.stack[-1]

372 self.stack[-1] = (dfa, newstate, DUMMY_NODE)

373 self.stack.append((newdfa, 0, DUMMY_NODE))

374 else:

375 dfa, state, node = self.stack[-1]

376 newnode: RawNode = (type, None, context, [])

377 self.stack[-1] = (dfa, newstate, node)

378 self.stack.append((newdfa, 0, newnode))

379

380 def pop(self) -> None:

381 """Pop a nonterminal. (Internal)"""

382 if self.is_backtracking:

383 self.stack.pop()

384 else:

385 popdfa, popstate, popnode = self.stack.pop()

386 newnode = convert(self.grammar, popnode)

387 if self.stack:

388 dfa, state, node = self.stack[-1]

389 assert node[-1] is not None

390 node[-1].append(newnode)

391 else:

392 self.rootnode = newnode

393 self.rootnode.used_names = self.used_names