Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/parsing.py: 48%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

129 statements  

1""" 

2Parse Python code and perform AST validation. 

3""" 

4 

5import ast 

6import sys 

7import warnings 

8from collections.abc import Collection, Iterator 

9 

10from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature 

11from black.nodes import syms 

12from blib2to3 import pygram 

13from blib2to3.pgen2 import driver 

14from blib2to3.pgen2.grammar import Grammar 

15from blib2to3.pgen2.parse import ParseError 

16from blib2to3.pgen2.tokenize import TokenError 

17from blib2to3.pytree import Leaf, Node 

18 

19 

20class InvalidInput(ValueError): 

21 """Raised when input source code fails all parse attempts.""" 

22 

23 

24def get_grammars(target_versions: set[TargetVersion]) -> list[Grammar]: 

25 if not target_versions: 

26 # No target_version specified, so try all grammars. 

27 return [ 

28 # Python 3.7-3.9 

29 pygram.python_grammar_async_keywords, 

30 # Python 3.0-3.6 

31 pygram.python_grammar, 

32 # Python 3.10+ 

33 pygram.python_grammar_soft_keywords, 

34 ] 

35 

36 grammars = [] 

37 # If we have to parse both, try to parse async as a keyword first 

38 if not supports_feature( 

39 target_versions, Feature.ASYNC_IDENTIFIERS 

40 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING): 

41 # Python 3.7-3.9 

42 grammars.append(pygram.python_grammar_async_keywords) 

43 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): 

44 # Python 3.0-3.6 

45 grammars.append(pygram.python_grammar) 

46 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions): 

47 # Python 3.10+ 

48 grammars.append(pygram.python_grammar_soft_keywords) 

49 

50 # At least one of the above branches must have been taken, because every Python 

51 # version has exactly one of the two 'ASYNC_*' flags 

52 return grammars 

53 

54 

55def lib2to3_parse( 

56 src_txt: str, target_versions: Collection[TargetVersion] = () 

57) -> Node: 

58 """Given a string with source, return the lib2to3 Node.""" 

59 if not src_txt.endswith("\n"): 

60 src_txt += "\n" 

61 

62 grammars = get_grammars(set(target_versions)) 

63 if target_versions: 

64 max_tv = max(target_versions, key=lambda tv: tv.value) 

65 tv_str = f" for target version {max_tv.pretty()}" 

66 else: 

67 tv_str = "" 

68 

69 errors = {} 

70 for grammar in grammars: 

71 drv = driver.Driver(grammar) 

72 try: 

73 result = drv.parse_string(src_txt, False) 

74 break 

75 

76 except ParseError as pe: 

77 lineno, column = pe.context[1] 

78 lines = src_txt.splitlines() 

79 try: 

80 faulty_line = lines[lineno - 1] 

81 except IndexError: 

82 faulty_line = "<line number missing in source>" 

83 error_msg = ( 

84 f"Cannot parse{tv_str}: {lineno}:{column}\n" 

85 f" {faulty_line}\n" 

86 f" {' ' * (column - 1)}^\n" 

87 f"ParseError: {pe.msg}" 

88 ) 

89 

90 errors[grammar.version] = InvalidInput(error_msg) 

91 

92 except TokenError as te: 

93 lineno, column = te.args[1] 

94 lines = src_txt.splitlines() 

95 try: 

96 faulty_line = lines[lineno - 1] 

97 except IndexError: 

98 faulty_line = "<line number missing in source>" 

99 error_msg = ( 

100 f"Cannot parse{tv_str}: {lineno}:{column}\n" 

101 f" {faulty_line}\n" 

102 f" {' ' * (column - 1)}^\n" 

103 f"TokenError: {te.args[0]}" 

104 ) 

105 errors[grammar.version] = InvalidInput(error_msg) 

106 

107 else: 

108 # Choose the latest version when raising the actual parsing error. 

109 assert len(errors) >= 1 

110 exc = errors[max(errors)] 

111 raise exc from None 

112 

113 if isinstance(result, Leaf): 

114 result = Node(syms.file_input, [result]) 

115 return result 

116 

117 

118class ASTSafetyError(Exception): 

119 """Raised when Black's generated code is not equivalent to the old AST.""" 

120 

121 

122class SourceASTParseError(Exception): 

123 """Raised when the source file cannot be parsed by ast.parse(). 

124 

125 This is not a bug in Black — Black's lib2to3-based parser is more lenient 

126 than Python's ast.parse(), so it may accept code that ast.parse() rejects. 

127 In blackd, this should be reported as a 400 Bad Request. 

128 """ 

129 

130 

131def _parse_single_version( 

132 src: str, version: tuple[int, int], *, type_comments: bool 

133) -> ast.AST: 

134 filename = "<unknown>" 

135 with warnings.catch_warnings(): 

136 warnings.simplefilter("ignore", SyntaxWarning) 

137 warnings.simplefilter("ignore", DeprecationWarning) 

138 return ast.parse( 

139 src, filename, feature_version=version, type_comments=type_comments 

140 ) 

141 

142 

143def parse_ast(src: str) -> ast.AST: 

144 # TODO: support Python 4+ ;) 

145 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)] 

146 

147 first_error = "" 

148 for version in sorted(versions, reverse=True): 

149 try: 

150 return _parse_single_version(src, version, type_comments=True) 

151 except SyntaxError as e: 

152 if not first_error: 

153 first_error = str(e) 

154 

155 # Try to parse without type comments 

156 for version in sorted(versions, reverse=True): 

157 try: 

158 return _parse_single_version(src, version, type_comments=False) 

159 except SyntaxError: 

160 pass 

161 

162 raise SyntaxError(first_error) 

163 

164 

165def _normalize(lineend: str, value: str) -> str: 

166 # To normalize, we strip any leading and trailing space from 

167 # each line... 

168 stripped: list[str] = [i.strip() for i in value.splitlines()] 

169 normalized = lineend.join(stripped) 

170 # ...and remove any blank lines at the beginning and end of 

171 # the whole string 

172 return normalized.strip() 

173 

174 

175def stringify_ast(node: ast.AST) -> Iterator[str]: 

176 """Simple visitor generating strings to compare ASTs by content.""" 

177 return _stringify_ast(node, []) 

178 

179 

180def _stringify_ast_with_new_parent( 

181 node: ast.AST, parent_stack: list[ast.AST], new_parent: ast.AST 

182) -> Iterator[str]: 

183 parent_stack.append(new_parent) 

184 yield from _stringify_ast(node, parent_stack) 

185 parent_stack.pop() 

186 

187 

188def _stringify_ast(node: ast.AST, parent_stack: list[ast.AST]) -> Iterator[str]: 

189 if ( 

190 isinstance(node, ast.Constant) 

191 and isinstance(node.value, str) 

192 and node.kind == "u" 

193 ): 

194 # It's a quirk of history that we strip the u prefix over here. We used to 

195 # rewrite the AST nodes for Python version compatibility and we never copied 

196 # over the kind 

197 node.kind = None 

198 

199 yield f"{' ' * len(parent_stack)}{node.__class__.__name__}(" 

200 

201 for field in sorted(node._fields): 

202 # TypeIgnore has only one field 'lineno' which breaks this comparison 

203 if isinstance(node, ast.TypeIgnore): 

204 break 

205 

206 try: 

207 value: object = getattr(node, field) 

208 except AttributeError: 

209 continue 

210 

211 yield f"{' ' * (len(parent_stack) + 1)}{field}=" 

212 

213 if isinstance(value, list): 

214 for item in value: 

215 # Ignore nested tuples within del statements, because we may insert 

216 # parentheses and they change the AST. 

217 if ( 

218 field == "targets" 

219 and isinstance(node, ast.Delete) 

220 and isinstance(item, ast.Tuple) 

221 ): 

222 for elt in _unwrap_tuples(item): 

223 yield from _stringify_ast_with_new_parent( 

224 elt, parent_stack, node 

225 ) 

226 

227 elif isinstance(item, ast.AST): 

228 yield from _stringify_ast_with_new_parent(item, parent_stack, node) 

229 

230 elif isinstance(value, ast.AST): 

231 yield from _stringify_ast_with_new_parent(value, parent_stack, node) 

232 

233 else: 

234 normalized: object 

235 if ( 

236 isinstance(node, ast.Constant) 

237 and field == "value" 

238 and isinstance(value, str) 

239 and len(parent_stack) >= 2 

240 # Any standalone string, ideally this would 

241 # exactly match black.nodes.is_docstring 

242 and isinstance(parent_stack[-1], ast.Expr) 

243 ): 

244 # Constant strings may be indented across newlines, if they are 

245 # docstrings; fold spaces after newlines when comparing. Similarly, 

246 # trailing and leading space may be removed. 

247 normalized = _normalize("\n", value) 

248 elif field == "type_comment" and isinstance(value, str): 

249 # Trailing whitespace in type comments is removed. 

250 normalized = value.rstrip() 

251 else: 

252 normalized = value 

253 yield ( 

254 f"{' ' * (len(parent_stack) + 1)}{normalized!r}, #" 

255 f" {value.__class__.__name__}" 

256 ) 

257 

258 yield f"{' ' * len(parent_stack)}) # /{node.__class__.__name__}" 

259 

260 

261def _unwrap_tuples(node: ast.Tuple) -> Iterator[ast.AST]: 

262 for elt in node.elts: 

263 if isinstance(elt, ast.Tuple): 

264 yield from _unwrap_tuples(elt) 

265 else: 

266 yield elt