Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/parser.py: 67%

105 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:36 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4# Modifications: 

5# Copyright David Halter and Contributors 

6# Modifications are dual-licensed: MIT and PSF. 

7# 99% of the code is different from pgen2, now. 

8 

9""" 

10The ``Parser`` tries to convert the available Python code in an easy to read 

11format, something like an abstract syntax tree. The classes who represent this 

12tree, are sitting in the :mod:`parso.tree` module. 

13 

14The Python module ``tokenize`` is a very important part in the ``Parser``, 

15because it splits the code into different words (tokens). Sometimes it looks a 

16bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` 

17module for this? Well, ``ast`` does a very good job understanding proper Python 

18code, but fails to work as soon as there's a single line of broken code. 

19 

20There's one important optimization that needs to be known: Statements are not 

21being parsed completely. ``Statement`` is just a representation of the tokens 

22within the statement. This lowers memory usage and cpu time and reduces the 

23complexity of the ``Parser`` (there's another parser sitting inside 

24``Statement``, which produces ``Array`` and ``Call``). 

25""" 

26from typing import Dict, Type 

27 

28from parso import tree 

29from parso.pgen2.generator import ReservedString 

30 

31 

32class ParserSyntaxError(Exception): 

33 """ 

34 Contains error information about the parser tree. 

35 

36 May be raised as an exception. 

37 """ 

38 def __init__(self, message, error_leaf): 

39 self.message = message 

40 self.error_leaf = error_leaf 

41 

42 

43class InternalParseError(Exception): 

44 """ 

45 Exception to signal the parser is stuck and error recovery didn't help. 

46 Basically this shouldn't happen. It's a sign that something is really 

47 wrong. 

48 """ 

49 

50 def __init__(self, msg, type_, value, start_pos): 

51 Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % 

52 (msg, type_.name, value, start_pos)) 

53 self.msg = msg 

54 self.type = type 

55 self.value = value 

56 self.start_pos = start_pos 

57 

58 

59class Stack(list): 

60 def _allowed_transition_names_and_token_types(self): 

61 def iterate(): 

62 # An API just for Jedi. 

63 for stack_node in reversed(self): 

64 for transition in stack_node.dfa.transitions: 

65 if isinstance(transition, ReservedString): 

66 yield transition.value 

67 else: 

68 yield transition # A token type 

69 

70 if not stack_node.dfa.is_final: 

71 break 

72 

73 return list(iterate()) 

74 

75 

76class StackNode: 

77 def __init__(self, dfa): 

78 self.dfa = dfa 

79 self.nodes = [] 

80 

81 @property 

82 def nonterminal(self): 

83 return self.dfa.from_rule 

84 

85 def __repr__(self): 

86 return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) 

87 

88 

89def _token_to_transition(grammar, type_, value): 

90 # Map from token to label 

91 if type_.value.contains_syntax: 

92 # Check for reserved words (keywords) 

93 try: 

94 return grammar.reserved_syntax_strings[value] 

95 except KeyError: 

96 pass 

97 

98 return type_ 

99 

100 

101class BaseParser: 

102 """Parser engine. 

103 

104 A Parser instance contains state pertaining to the current token 

105 sequence, and should not be used concurrently by different threads 

106 to parse separate token sequences. 

107 

108 See python/tokenize.py for how to get input tokens by a string. 

109 

110 When a syntax error occurs, error_recovery() is called. 

111 """ 

112 

113 node_map: Dict[str, Type[tree.BaseNode]] = {} 

114 default_node = tree.Node 

115 

116 leaf_map: Dict[str, Type[tree.Leaf]] = {} 

117 default_leaf = tree.Leaf 

118 

119 def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): 

120 self._pgen_grammar = pgen_grammar 

121 self._start_nonterminal = start_nonterminal 

122 self._error_recovery = error_recovery 

123 

124 def parse(self, tokens): 

125 first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0] 

126 self.stack = Stack([StackNode(first_dfa)]) 

127 

128 for token in tokens: 

129 self._add_token(token) 

130 

131 while True: 

132 tos = self.stack[-1] 

133 if not tos.dfa.is_final: 

134 # We never broke out -- EOF is too soon -- Unfinished statement. 

135 # However, the error recovery might have added the token again, if 

136 # the stack is empty, we're fine. 

137 raise InternalParseError( 

138 "incomplete input", token.type, token.string, token.start_pos 

139 ) 

140 

141 if len(self.stack) > 1: 

142 self._pop() 

143 else: 

144 return self.convert_node(tos.nonterminal, tos.nodes) 

145 

146 def error_recovery(self, token): 

147 if self._error_recovery: 

148 raise NotImplementedError("Error Recovery is not implemented") 

149 else: 

150 type_, value, start_pos, prefix = token 

151 error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix) 

152 raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) 

153 

154 def convert_node(self, nonterminal, children): 

155 try: 

156 node = self.node_map[nonterminal](children) 

157 except KeyError: 

158 node = self.default_node(nonterminal, children) 

159 return node 

160 

161 def convert_leaf(self, type_, value, prefix, start_pos): 

162 try: 

163 return self.leaf_map[type_](value, start_pos, prefix) 

164 except KeyError: 

165 return self.default_leaf(value, start_pos, prefix) 

166 

167 def _add_token(self, token): 

168 """ 

169 This is the only core function for parsing. Here happens basically 

170 everything. Everything is well prepared by the parser generator and we 

171 only apply the necessary steps here. 

172 """ 

173 grammar = self._pgen_grammar 

174 stack = self.stack 

175 type_, value, start_pos, prefix = token 

176 transition = _token_to_transition(grammar, type_, value) 

177 

178 while True: 

179 try: 

180 plan = stack[-1].dfa.transitions[transition] 

181 break 

182 except KeyError: 

183 if stack[-1].dfa.is_final: 

184 self._pop() 

185 else: 

186 self.error_recovery(token) 

187 return 

188 except IndexError: 

189 raise InternalParseError("too much input", type_, value, start_pos) 

190 

191 stack[-1].dfa = plan.next_dfa 

192 

193 for push in plan.dfa_pushes: 

194 stack.append(StackNode(push)) 

195 

196 leaf = self.convert_leaf(type_, value, prefix, start_pos) 

197 stack[-1].nodes.append(leaf) 

198 

199 def _pop(self): 

200 tos = self.stack.pop() 

201 # If there's exactly one child, return that child instead of 

202 # creating a new node. We still create expr_stmt and 

203 # file_input though, because a lot of Jedi depends on its 

204 # logic. 

205 if len(tos.nodes) == 1: 

206 new_node = tos.nodes[0] 

207 else: 

208 new_node = self.convert_node(tos.dfa.from_rule, tos.nodes) 

209 

210 self.stack[-1].nodes.append(new_node)