Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/base_parser.py: 34%
82 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
4# Modifications:
5# Copyright David Halter and Contributors
6# Modifications are dual-licensed: MIT and PSF.
7# 99% of the code is different from pgen2, now.
9# A fork of `parso.parser`.
10# https://github.com/davidhalter/parso/blob/v0.3.4/parso/parser.py
11#
12# The following changes were made:
13# - Typing was added.
14# - Error recovery is removed.
15# - The Jedi-specific _allowed_transition_names_and_token_types API is removed.
16# - Improved error messages by using our exceptions module.
17# - node_map/leaf_map were removed in favor of just calling convert_*.
18# - convert_node/convert_leaf were renamed to convert_nonterminal/convert_terminal
19# - convert_nonterminal is called regardless of the number of children. Parso avoids
20# calling it in some cases to avoid creating extra nodes.
21# - The parser is constructed with the tokens to allow us to track a bit more state. As
22# As a consequence parser may only be used once.
23# - Supports our custom Token class, instead of `parso.python.tokenize.Token`.
26from dataclasses import dataclass, field
27from typing import Generic, Iterable, List, Sequence, TypeVar, Union
29from libcst._exceptions import (
30 EOFSentinel,
31 get_expected_str,
32 ParserSyntaxError,
33 PartialParserSyntaxError,
34)
35from libcst._parser.parso.pgen2.generator import DFAState, Grammar, ReservedString
36from libcst._parser.parso.python.token import TokenType
37from libcst._parser.types.token import Token
39_NodeT = TypeVar("_NodeT")
40_TokenTypeT = TypeVar("_TokenTypeT", bound=TokenType)
41_TokenT = TypeVar("_TokenT", bound=Token)
44@dataclass(frozen=False)
45class StackNode(Generic[_TokenTypeT, _NodeT]):
46 dfa: "DFAState[_TokenTypeT]"
47 nodes: List[_NodeT] = field(default_factory=list)
49 @property
50 def nonterminal(self) -> str:
51 return self.dfa.from_rule
54def _token_to_transition(
55 grammar: "Grammar[_TokenTypeT]", type_: _TokenTypeT, value: str
56) -> Union[ReservedString, _TokenTypeT]:
57 # Map from token to label
58 if type_.contains_syntax:
59 # Check for reserved words (keywords)
60 try:
61 return grammar.reserved_syntax_strings[value]
62 except KeyError:
63 pass
65 return type_
68# TODO: This should be an ABC, but there's a metaclass conflict between Generic and ABC
69# that's fixed in Python 3.7.
70class BaseParser(Generic[_TokenT, _TokenTypeT, _NodeT]):
71 """Parser engine.
73 A Parser instance contains state pertaining to the current token
74 sequence, and should not be used concurrently by different threads
75 to parse separate token sequences.
77 See python/tokenize.py for how to get input tokens by a string.
78 """
80 tokens: Iterable[_TokenT]
81 lines: Sequence[str] # used when generating parse errors
82 _pgen_grammar: "Grammar[_TokenTypeT]"
83 stack: List[StackNode[_TokenTypeT, _NodeT]]
84 # Keep track of if parse was called. Because a parser may keep global mutable state,
85 # each BaseParser instance should only be used once.
86 __was_parse_called: bool
88 def __init__(
89 self,
90 *,
91 tokens: Iterable[_TokenT],
92 lines: Sequence[str],
93 pgen_grammar: "Grammar[_TokenTypeT]",
94 start_nonterminal: str,
95 ) -> None:
96 self.tokens = tokens
97 self.lines = lines
98 self._pgen_grammar = pgen_grammar
99 first_dfa = pgen_grammar.nonterminal_to_dfas[start_nonterminal][0]
100 self.stack = [StackNode(first_dfa)]
101 self.__was_parse_called = False
103 def parse(self) -> _NodeT:
104 # Ensure that we don't re-use parsers.
105 if self.__was_parse_called:
106 raise Exception("Each parser object may only be used to parse once.")
107 self.__was_parse_called = True
109 for token in self.tokens:
110 self._add_token(token)
112 while True:
113 tos = self.stack[-1]
114 if not tos.dfa.is_final:
115 expected_str = get_expected_str(
116 EOFSentinel.EOF, tos.dfa.transitions.keys()
117 )
118 raise ParserSyntaxError(
119 f"Incomplete input. {expected_str}",
120 lines=self.lines,
121 raw_line=len(self.lines),
122 raw_column=len(self.lines[-1]),
123 )
125 if len(self.stack) > 1:
126 self._pop()
127 else:
128 return self.convert_nonterminal(tos.nonterminal, tos.nodes)
130 def convert_nonterminal(
131 self, nonterminal: str, children: Sequence[_NodeT]
132 ) -> _NodeT:
133 ...
135 def convert_terminal(self, token: _TokenT) -> _NodeT:
136 ...
138 def _add_token(self, token: _TokenT) -> None:
139 """
140 This is the only core function for parsing. Here happens basically
141 everything. Everything is well prepared by the parser generator and we
142 only apply the necessary steps here.
143 """
144 grammar = self._pgen_grammar
145 stack = self.stack
146 # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`.
147 transition = _token_to_transition(grammar, token.type, token.string)
149 while True:
150 try:
151 plan = stack[-1].dfa.transitions[transition]
152 break
153 except KeyError:
154 if stack[-1].dfa.is_final:
155 try:
156 self._pop()
157 except PartialParserSyntaxError as ex:
158 # Upconvert the PartialParserSyntaxError to a ParserSyntaxError
159 # by backfilling the line/column information.
160 raise ParserSyntaxError(
161 ex.message,
162 lines=self.lines,
163 raw_line=token.start_pos[0],
164 raw_column=token.start_pos[1],
165 )
166 except Exception as ex:
167 # convert_nonterminal may fail due to a bug in our code. Try to
168 # recover enough to at least tell us where in the file it
169 # failed.
170 raise ParserSyntaxError(
171 f"Internal error: {ex}",
172 lines=self.lines,
173 raw_line=token.start_pos[0],
174 raw_column=token.start_pos[1],
175 )
176 else:
177 # We never broke out -- EOF is too soon -- Unfinished statement.
178 #
179 # BUG: The `expected_str` may not be complete because we already
180 # popped the other possibilities off the stack at this point, but
181 # it still seems useful to list some of the possibilities that we
182 # could've expected.
183 expected_str = get_expected_str(
184 token, stack[-1].dfa.transitions.keys()
185 )
186 raise ParserSyntaxError(
187 f"Incomplete input. {expected_str}",
188 lines=self.lines,
189 raw_line=token.start_pos[0],
190 raw_column=token.start_pos[1],
191 )
192 except IndexError:
193 # I don't think this will ever happen with Python's grammar, because if
194 # there are any extra tokens at the end of the input, we'll instead
195 # complain that we expected ENDMARKER.
196 #
197 # However, let's leave it just in case.
198 expected_str = get_expected_str(token, EOFSentinel.EOF)
199 raise ParserSyntaxError(
200 f"Too much input. {expected_str}",
201 lines=self.lines,
202 raw_line=token.start_pos[0],
203 raw_column=token.start_pos[1],
204 )
206 # Logically, `plan` is always defined, but pyre can't reasonably determine that.
207 stack[-1].dfa = plan.next_dfa
209 for push in plan.dfa_pushes:
210 stack.append(StackNode(push))
212 leaf = self.convert_terminal(token)
213 stack[-1].nodes.append(leaf)
215 def _pop(self) -> None:
216 tos = self.stack.pop()
217 # Unlike parso and lib2to3, we call `convert_nonterminal` unconditionally
218 # instead of only when we have more than one child. This allows us to create a
219 # far more consistent and predictable tree.
220 new_node = self.convert_nonterminal(tos.dfa.from_rule, tos.nodes)
221 self.stack[-1].nodes.append(new_node)