Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/python/parser.py: 96%

1from parso.python import tree

2from parso.python.token import PythonTokenTypes

3from parso.parser import BaseParser

6NAME = PythonTokenTypes.NAME

7INDENT = PythonTokenTypes.INDENT

8DEDENT = PythonTokenTypes.DEDENT

11class Parser(BaseParser):

12 """

13 This class is used to parse a Python file, it then divides them into a

14 class structure of different scopes.

16 :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.

17 """

19 node_map = {

20 'expr_stmt': tree.ExprStmt,

21 'classdef': tree.Class,

22 'funcdef': tree.Function,

23 'file_input': tree.Module,

24 'import_name': tree.ImportName,

25 'import_from': tree.ImportFrom,

26 'break_stmt': tree.KeywordStatement,

27 'continue_stmt': tree.KeywordStatement,

28 'return_stmt': tree.ReturnStmt,

29 'raise_stmt': tree.KeywordStatement,

30 'yield_expr': tree.YieldExpr,

31 'del_stmt': tree.KeywordStatement,

32 'pass_stmt': tree.KeywordStatement,

33 'global_stmt': tree.GlobalStmt,

34 'nonlocal_stmt': tree.KeywordStatement,

35 'print_stmt': tree.KeywordStatement,

36 'assert_stmt': tree.AssertStmt,

37 'if_stmt': tree.IfStmt,

38 'with_stmt': tree.WithStmt,

39 'for_stmt': tree.ForStmt,

40 'while_stmt': tree.WhileStmt,

41 'try_stmt': tree.TryStmt,

42 'sync_comp_for': tree.SyncCompFor,

43 # Not sure if this is the best idea, but IMO it's the easiest way to

44 # avoid extreme amounts of work around the subtle difference of 2/3

45 # grammar in list comoprehensions.

46 'decorator': tree.Decorator,

47 'lambdef': tree.Lambda,

48 'lambdef_nocond': tree.Lambda,

49 'namedexpr_test': tree.NamedExpr,

50 }

51 default_node = tree.PythonNode

53 # Names/Keywords are handled separately

54 _leaf_map = {

55 PythonTokenTypes.STRING: tree.String,

56 PythonTokenTypes.NUMBER: tree.Number,

57 PythonTokenTypes.NEWLINE: tree.Newline,

58 PythonTokenTypes.ENDMARKER: tree.EndMarker,

59 PythonTokenTypes.FSTRING_STRING: tree.FStringString,

60 PythonTokenTypes.FSTRING_START: tree.FStringStart,

61 PythonTokenTypes.FSTRING_END: tree.FStringEnd,

62 }

64 def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):

65 super().__init__(pgen_grammar, start_nonterminal,

66 error_recovery=error_recovery)

68 self.syntax_errors = []

69 self._omit_dedent_list = []

70 self._indent_counter = 0

72 def parse(self, tokens):

73 if self._error_recovery:

74 if self._start_nonterminal != 'file_input':

75 raise NotImplementedError

77 tokens = self._recovery_tokenize(tokens)

79 return super().parse(tokens)

81 def convert_node(self, nonterminal, children):

82 """

83 Convert raw node information to a PythonBaseNode instance.

85 This is passed to the parser driver which calls it whenever a reduction of a

86 grammar rule produces a new complete node, so that the tree is build

87 strictly bottom-up.

88 """

89 try:

90 node = self.node_map[nonterminal](children)

91 except KeyError:

92 if nonterminal == 'suite':

93 # We don't want the INDENT/DEDENT in our parser tree. Those

94 # leaves are just cancer. They are virtual leaves and not real

95 # ones and therefore have pseudo start/end positions and no

96 # prefixes. Just ignore them.

97 children = [children[0]] + children[2:-1]

98 node = self.default_node(nonterminal, children)

99 return node

100

101 def convert_leaf(self, type, value, prefix, start_pos):

102 # print('leaf', repr(value), token.tok_name[type])

103 if type == NAME:

104 if value in self._pgen_grammar.reserved_syntax_strings:

105 return tree.Keyword(value, start_pos, prefix)

106 else:

107 return tree.Name(value, start_pos, prefix)

108

109 return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)

110

111 def error_recovery(self, token):

112 tos_nodes = self.stack[-1].nodes

113 if tos_nodes:

114 last_leaf = tos_nodes[-1].get_last_leaf()

115 else:

116 last_leaf = None

117

118 if self._start_nonterminal == 'file_input' and \

119 (token.type == PythonTokenTypes.ENDMARKER

120 or token.type == DEDENT and not last_leaf.value.endswith('\n')

121 and not last_leaf.value.endswith('\r')):

122 # In Python statements need to end with a newline. But since it's

123 # possible (and valid in Python) that there's no newline at the

124 # end of a file, we have to recover even if the user doesn't want

125 # error recovery.

126 if self.stack[-1].dfa.from_rule == 'simple_stmt':

127 try:

128 plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]

129 except KeyError:

130 pass

131 else:

132 if plan.next_dfa.is_final and not plan.dfa_pushes:

133 # We are ignoring here that the newline would be

134 # required for a simple_stmt.

135 self.stack[-1].dfa = plan.next_dfa

136 self._add_token(token)

137 return

138

139 if not self._error_recovery:

140 return super().error_recovery(token)

141

142 def current_suite(stack):

143 # For now just discard everything that is not a suite or

144 # file_input, if we detect an error.

145 for until_index, stack_node in reversed(list(enumerate(stack))):

146 # `suite` can sometimes be only simple_stmt, not stmt.

147 if stack_node.nonterminal == 'file_input':

148 break

149 elif stack_node.nonterminal == 'suite':

150 # In the case where we just have a newline we don't want to

151 # do error recovery here. In all other cases, we want to do

152 # error recovery.

153 if len(stack_node.nodes) != 1:

154 break

155 return until_index

156

157 until_index = current_suite(self.stack)

158

159 if self._stack_removal(until_index + 1):

160 self._add_token(token)

161 else:

162 typ, value, start_pos, prefix = token

163 if typ == INDENT:

164 # For every deleted INDENT we have to delete a DEDENT as well.

165 # Otherwise the parser will get into trouble and DEDENT too early.

166 self._omit_dedent_list.append(self._indent_counter)

167

168 error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)

169 self.stack[-1].nodes.append(error_leaf)

170

171 tos = self.stack[-1]

172 if tos.nonterminal == 'suite':

173 # Need at least one statement in the suite. This happend with the

174 # error recovery above.

175 try:

176 tos.dfa = tos.dfa.arcs['stmt']

177 except KeyError:

178 # We're already in a final state.

179 pass

180

181 def _stack_removal(self, start_index):

182 all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]

183

184 if all_nodes:

185 node = tree.PythonErrorNode(all_nodes)

186 self.stack[start_index - 1].nodes.append(node)

187

188 self.stack[start_index:] = []

189 return bool(all_nodes)

190

191 def _recovery_tokenize(self, tokens):

192 for token in tokens:

193 typ = token[0]

194 if typ == DEDENT:

195 # We need to count indents, because if we just omit any DEDENT,

196 # we might omit them in the wrong place.

197 o = self._omit_dedent_list

198 if o and o[-1] == self._indent_counter:

199 o.pop()

200 self._indent_counter -= 1

201 continue

202

203 self._indent_counter -= 1

204 elif typ == INDENT:

205 self._indent_counter += 1

206 yield token