Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_parser/base_parser.py: 34%

82 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7# 99% of the code is different from pgen2, now. 

8 

9# A fork of `parso.parser`. 

10# https://github.com/davidhalter/parso/blob/v0.3.4/parso/parser.py 

11# 

12# The following changes were made: 

13# - Typing was added. 

14# - Error recovery is removed. 

15# - The Jedi-specific _allowed_transition_names_and_token_types API is removed. 

16# - Improved error messages by using our exceptions module. 

17# - node_map/leaf_map were removed in favor of just calling convert_*. 

18# - convert_node/convert_leaf were renamed to convert_nonterminal/convert_terminal 

19# - convert_nonterminal is called regardless of the number of children. Parso avoids 

20# calling it in some cases to avoid creating extra nodes. 

21# - The parser is constructed with the tokens to allow us to track a bit more state. As 

22# As a consequence parser may only be used once. 

23# - Supports our custom Token class, instead of `parso.python.tokenize.Token`. 

24 

25 

26from dataclasses import dataclass, field 

27from typing import Generic, Iterable, List, Sequence, TypeVar, Union 

28 

29from libcst._exceptions import ( 

30 EOFSentinel, 

31 get_expected_str, 

32 ParserSyntaxError, 

33 PartialParserSyntaxError, 

34) 

35from libcst._parser.parso.pgen2.generator import DFAState, Grammar, ReservedString 

36from libcst._parser.parso.python.token import TokenType 

37from libcst._parser.types.token import Token 

38 

39_NodeT = TypeVar("_NodeT") 

40_TokenTypeT = TypeVar("_TokenTypeT", bound=TokenType) 

41_TokenT = TypeVar("_TokenT", bound=Token) 

42 

43 

44@dataclass(frozen=False) 

45class StackNode(Generic[_TokenTypeT, _NodeT]): 

46 dfa: "DFAState[_TokenTypeT]" 

47 nodes: List[_NodeT] = field(default_factory=list) 

48 

49 @property 

50 def nonterminal(self) -> str: 

51 return self.dfa.from_rule 

52 

53 

54def _token_to_transition( 

55 grammar: "Grammar[_TokenTypeT]", type_: _TokenTypeT, value: str 

56) -> Union[ReservedString, _TokenTypeT]: 

57 # Map from token to label 

58 if type_.contains_syntax: 

59 # Check for reserved words (keywords) 

60 try: 

61 return grammar.reserved_syntax_strings[value] 

62 except KeyError: 

63 pass 

64 

65 return type_ 

66 

67 

68# TODO: This should be an ABC, but there's a metaclass conflict between Generic and ABC 

69# that's fixed in Python 3.7. 

70class BaseParser(Generic[_TokenT, _TokenTypeT, _NodeT]): 

71 """Parser engine. 

72 

73 A Parser instance contains state pertaining to the current token 

74 sequence, and should not be used concurrently by different threads 

75 to parse separate token sequences. 

76 

77 See python/tokenize.py for how to get input tokens by a string. 

78 """ 

79 

80 tokens: Iterable[_TokenT] 

81 lines: Sequence[str] # used when generating parse errors 

82 _pgen_grammar: "Grammar[_TokenTypeT]" 

83 stack: List[StackNode[_TokenTypeT, _NodeT]] 

84 # Keep track of if parse was called. Because a parser may keep global mutable state, 

85 # each BaseParser instance should only be used once. 

86 __was_parse_called: bool 

87 

88 def __init__( 

89 self, 

90 *, 

91 tokens: Iterable[_TokenT], 

92 lines: Sequence[str], 

93 pgen_grammar: "Grammar[_TokenTypeT]", 

94 start_nonterminal: str, 

95 ) -> None: 

96 self.tokens = tokens 

97 self.lines = lines 

98 self._pgen_grammar = pgen_grammar 

99 first_dfa = pgen_grammar.nonterminal_to_dfas[start_nonterminal][0] 

100 self.stack = [StackNode(first_dfa)] 

101 self.__was_parse_called = False 

102 

103 def parse(self) -> _NodeT: 

104 # Ensure that we don't re-use parsers. 

105 if self.__was_parse_called: 

106 raise Exception("Each parser object may only be used to parse once.") 

107 self.__was_parse_called = True 

108 

109 for token in self.tokens: 

110 self._add_token(token) 

111 

112 while True: 

113 tos = self.stack[-1] 

114 if not tos.dfa.is_final: 

115 expected_str = get_expected_str( 

116 EOFSentinel.EOF, tos.dfa.transitions.keys() 

117 ) 

118 raise ParserSyntaxError( 

119 f"Incomplete input. {expected_str}", 

120 lines=self.lines, 

121 raw_line=len(self.lines), 

122 raw_column=len(self.lines[-1]), 

123 ) 

124 

125 if len(self.stack) > 1: 

126 self._pop() 

127 else: 

128 return self.convert_nonterminal(tos.nonterminal, tos.nodes) 

129 

130 def convert_nonterminal( 

131 self, nonterminal: str, children: Sequence[_NodeT] 

132 ) -> _NodeT: 

133 ... 

134 

135 def convert_terminal(self, token: _TokenT) -> _NodeT: 

136 ... 

137 

138 def _add_token(self, token: _TokenT) -> None: 

139 """ 

140 This is the only core function for parsing. Here happens basically 

141 everything. Everything is well prepared by the parser generator and we 

142 only apply the necessary steps here. 

143 """ 

144 grammar = self._pgen_grammar 

145 stack = self.stack 

146 # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`. 

147 transition = _token_to_transition(grammar, token.type, token.string) 

148 

149 while True: 

150 try: 

151 plan = stack[-1].dfa.transitions[transition] 

152 break 

153 except KeyError: 

154 if stack[-1].dfa.is_final: 

155 try: 

156 self._pop() 

157 except PartialParserSyntaxError as ex: 

158 # Upconvert the PartialParserSyntaxError to a ParserSyntaxError 

159 # by backfilling the line/column information. 

160 raise ParserSyntaxError( 

161 ex.message, 

162 lines=self.lines, 

163 raw_line=token.start_pos[0], 

164 raw_column=token.start_pos[1], 

165 ) 

166 except Exception as ex: 

167 # convert_nonterminal may fail due to a bug in our code. Try to 

168 # recover enough to at least tell us where in the file it 

169 # failed. 

170 raise ParserSyntaxError( 

171 f"Internal error: {ex}", 

172 lines=self.lines, 

173 raw_line=token.start_pos[0], 

174 raw_column=token.start_pos[1], 

175 ) 

176 else: 

177 # We never broke out -- EOF is too soon -- Unfinished statement. 

178 # 

179 # BUG: The `expected_str` may not be complete because we already 

180 # popped the other possibilities off the stack at this point, but 

181 # it still seems useful to list some of the possibilities that we 

182 # could've expected. 

183 expected_str = get_expected_str( 

184 token, stack[-1].dfa.transitions.keys() 

185 ) 

186 raise ParserSyntaxError( 

187 f"Incomplete input. {expected_str}", 

188 lines=self.lines, 

189 raw_line=token.start_pos[0], 

190 raw_column=token.start_pos[1], 

191 ) 

192 except IndexError: 

193 # I don't think this will ever happen with Python's grammar, because if 

194 # there are any extra tokens at the end of the input, we'll instead 

195 # complain that we expected ENDMARKER. 

196 # 

197 # However, let's leave it just in case. 

198 expected_str = get_expected_str(token, EOFSentinel.EOF) 

199 raise ParserSyntaxError( 

200 f"Too much input. {expected_str}", 

201 lines=self.lines, 

202 raw_line=token.start_pos[0], 

203 raw_column=token.start_pos[1], 

204 ) 

205 

206 # Logically, `plan` is always defined, but pyre can't reasonably determine that. 

207 stack[-1].dfa = plan.next_dfa 

208 

209 for push in plan.dfa_pushes: 

210 stack.append(StackNode(push)) 

211 

212 leaf = self.convert_terminal(token) 

213 stack[-1].nodes.append(leaf) 

214 

215 def _pop(self) -> None: 

216 tos = self.stack.pop() 

217 # Unlike parso and lib2to3, we call `convert_nonterminal` unconditionally 

218 # instead of only when we have more than one child. This allows us to create a 

219 # far more consistent and predictable tree. 

220 new_node = self.convert_nonterminal(tos.dfa.from_rule, tos.nodes) 

221 self.stack[-1].nodes.append(new_node)