Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/python/parser.py: 96%

94 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:36 +0000

1from parso.python import tree 

2from parso.python.token import PythonTokenTypes 

3from parso.parser import BaseParser 

4 

5 

6NAME = PythonTokenTypes.NAME 

7INDENT = PythonTokenTypes.INDENT 

8DEDENT = PythonTokenTypes.DEDENT 

9 

10 

11class Parser(BaseParser): 

12 """ 

13 This class is used to parse a Python file, it then divides them into a 

14 class structure of different scopes. 

15 

16 :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. 

17 """ 

18 

19 node_map = { 

20 'expr_stmt': tree.ExprStmt, 

21 'classdef': tree.Class, 

22 'funcdef': tree.Function, 

23 'file_input': tree.Module, 

24 'import_name': tree.ImportName, 

25 'import_from': tree.ImportFrom, 

26 'break_stmt': tree.KeywordStatement, 

27 'continue_stmt': tree.KeywordStatement, 

28 'return_stmt': tree.ReturnStmt, 

29 'raise_stmt': tree.KeywordStatement, 

30 'yield_expr': tree.YieldExpr, 

31 'del_stmt': tree.KeywordStatement, 

32 'pass_stmt': tree.KeywordStatement, 

33 'global_stmt': tree.GlobalStmt, 

34 'nonlocal_stmt': tree.KeywordStatement, 

35 'print_stmt': tree.KeywordStatement, 

36 'assert_stmt': tree.AssertStmt, 

37 'if_stmt': tree.IfStmt, 

38 'with_stmt': tree.WithStmt, 

39 'for_stmt': tree.ForStmt, 

40 'while_stmt': tree.WhileStmt, 

41 'try_stmt': tree.TryStmt, 

42 'sync_comp_for': tree.SyncCompFor, 

43 # Not sure if this is the best idea, but IMO it's the easiest way to 

44 # avoid extreme amounts of work around the subtle difference of 2/3 

45 # grammar in list comoprehensions. 

46 'decorator': tree.Decorator, 

47 'lambdef': tree.Lambda, 

48 'lambdef_nocond': tree.Lambda, 

49 'namedexpr_test': tree.NamedExpr, 

50 } 

51 default_node = tree.PythonNode 

52 

53 # Names/Keywords are handled separately 

54 _leaf_map = { 

55 PythonTokenTypes.STRING: tree.String, 

56 PythonTokenTypes.NUMBER: tree.Number, 

57 PythonTokenTypes.NEWLINE: tree.Newline, 

58 PythonTokenTypes.ENDMARKER: tree.EndMarker, 

59 PythonTokenTypes.FSTRING_STRING: tree.FStringString, 

60 PythonTokenTypes.FSTRING_START: tree.FStringStart, 

61 PythonTokenTypes.FSTRING_END: tree.FStringEnd, 

62 } 

63 

64 def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): 

65 super().__init__(pgen_grammar, start_nonterminal, 

66 error_recovery=error_recovery) 

67 

68 self.syntax_errors = [] 

69 self._omit_dedent_list = [] 

70 self._indent_counter = 0 

71 

72 def parse(self, tokens): 

73 if self._error_recovery: 

74 if self._start_nonterminal != 'file_input': 

75 raise NotImplementedError 

76 

77 tokens = self._recovery_tokenize(tokens) 

78 

79 return super().parse(tokens) 

80 

81 def convert_node(self, nonterminal, children): 

82 """ 

83 Convert raw node information to a PythonBaseNode instance. 

84 

85 This is passed to the parser driver which calls it whenever a reduction of a 

86 grammar rule produces a new complete node, so that the tree is build 

87 strictly bottom-up. 

88 """ 

89 try: 

90 node = self.node_map[nonterminal](children) 

91 except KeyError: 

92 if nonterminal == 'suite': 

93 # We don't want the INDENT/DEDENT in our parser tree. Those 

94 # leaves are just cancer. They are virtual leaves and not real 

95 # ones and therefore have pseudo start/end positions and no 

96 # prefixes. Just ignore them. 

97 children = [children[0]] + children[2:-1] 

98 node = self.default_node(nonterminal, children) 

99 return node 

100 

101 def convert_leaf(self, type, value, prefix, start_pos): 

102 # print('leaf', repr(value), token.tok_name[type]) 

103 if type == NAME: 

104 if value in self._pgen_grammar.reserved_syntax_strings: 

105 return tree.Keyword(value, start_pos, prefix) 

106 else: 

107 return tree.Name(value, start_pos, prefix) 

108 

109 return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) 

110 

111 def error_recovery(self, token): 

112 tos_nodes = self.stack[-1].nodes 

113 if tos_nodes: 

114 last_leaf = tos_nodes[-1].get_last_leaf() 

115 else: 

116 last_leaf = None 

117 

118 if self._start_nonterminal == 'file_input' and \ 

119 (token.type == PythonTokenTypes.ENDMARKER 

120 or token.type == DEDENT and not last_leaf.value.endswith('\n') 

121 and not last_leaf.value.endswith('\r')): 

122 # In Python statements need to end with a newline. But since it's 

123 # possible (and valid in Python) that there's no newline at the 

124 # end of a file, we have to recover even if the user doesn't want 

125 # error recovery. 

126 if self.stack[-1].dfa.from_rule == 'simple_stmt': 

127 try: 

128 plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE] 

129 except KeyError: 

130 pass 

131 else: 

132 if plan.next_dfa.is_final and not plan.dfa_pushes: 

133 # We are ignoring here that the newline would be 

134 # required for a simple_stmt. 

135 self.stack[-1].dfa = plan.next_dfa 

136 self._add_token(token) 

137 return 

138 

139 if not self._error_recovery: 

140 return super().error_recovery(token) 

141 

142 def current_suite(stack): 

143 # For now just discard everything that is not a suite or 

144 # file_input, if we detect an error. 

145 for until_index, stack_node in reversed(list(enumerate(stack))): 

146 # `suite` can sometimes be only simple_stmt, not stmt. 

147 if stack_node.nonterminal == 'file_input': 

148 break 

149 elif stack_node.nonterminal == 'suite': 

150 # In the case where we just have a newline we don't want to 

151 # do error recovery here. In all other cases, we want to do 

152 # error recovery. 

153 if len(stack_node.nodes) != 1: 

154 break 

155 return until_index 

156 

157 until_index = current_suite(self.stack) 

158 

159 if self._stack_removal(until_index + 1): 

160 self._add_token(token) 

161 else: 

162 typ, value, start_pos, prefix = token 

163 if typ == INDENT: 

164 # For every deleted INDENT we have to delete a DEDENT as well. 

165 # Otherwise the parser will get into trouble and DEDENT too early. 

166 self._omit_dedent_list.append(self._indent_counter) 

167 

168 error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) 

169 self.stack[-1].nodes.append(error_leaf) 

170 

171 tos = self.stack[-1] 

172 if tos.nonterminal == 'suite': 

173 # Need at least one statement in the suite. This happend with the 

174 # error recovery above. 

175 try: 

176 tos.dfa = tos.dfa.arcs['stmt'] 

177 except KeyError: 

178 # We're already in a final state. 

179 pass 

180 

181 def _stack_removal(self, start_index): 

182 all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes] 

183 

184 if all_nodes: 

185 node = tree.PythonErrorNode(all_nodes) 

186 self.stack[start_index - 1].nodes.append(node) 

187 

188 self.stack[start_index:] = [] 

189 return bool(all_nodes) 

190 

191 def _recovery_tokenize(self, tokens): 

192 for token in tokens: 

193 typ = token[0] 

194 if typ == DEDENT: 

195 # We need to count indents, because if we just omit any DEDENT, 

196 # we might omit them in the wrong place. 

197 o = self._omit_dedent_list 

198 if o and o[-1] == self._indent_counter: 

199 o.pop() 

200 self._indent_counter -= 1 

201 continue 

202 

203 self._indent_counter -= 1 

204 elif typ == INDENT: 

205 self._indent_counter += 1 

206 yield token