Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/parsing.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

131 statements  

1""" 

2Parse Python code and perform AST validation. 

3""" 

4 

5import ast 

6import sys 

7import warnings 

8from collections.abc import Collection, Iterator 

9 

10from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature 

11from black.nodes import syms 

12from blib2to3 import pygram 

13from blib2to3.pgen2 import driver 

14from blib2to3.pgen2.grammar import Grammar 

15from blib2to3.pgen2.parse import ParseError 

16from blib2to3.pgen2.tokenize import TokenError 

17from blib2to3.pytree import Leaf, Node 

18 

19 

20class InvalidInput(ValueError): 

21 """Raised when input source code fails all parse attempts.""" 

22 

23 

24def get_grammars(target_versions: set[TargetVersion]) -> list[Grammar]: 

25 if not target_versions: 

26 # No target_version specified, so try all grammars. 

27 return [ 

28 # Python 3.7-3.9 

29 pygram.python_grammar_async_keywords, 

30 # Python 3.0-3.6 

31 pygram.python_grammar, 

32 # Python 3.10+ 

33 pygram.python_grammar_soft_keywords, 

34 ] 

35 

36 grammars = [] 

37 # If we have to parse both, try to parse async as a keyword first 

38 if not supports_feature( 

39 target_versions, Feature.ASYNC_IDENTIFIERS 

40 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING): 

41 # Python 3.7-3.9 

42 grammars.append(pygram.python_grammar_async_keywords) 

43 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): 

44 # Python 3.0-3.6 

45 grammars.append(pygram.python_grammar) 

46 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions): 

47 # Python 3.10+ 

48 grammars.append(pygram.python_grammar_soft_keywords) 

49 

50 # At least one of the above branches must have been taken, because every Python 

51 # version has exactly one of the two 'ASYNC_*' flags 

52 return grammars 

53 

54 

55def lib2to3_parse( 

56 src_txt: str, target_versions: Collection[TargetVersion] = () 

57) -> Node: 

58 """Given a string with source, return the lib2to3 Node.""" 

59 if not src_txt.endswith("\n"): 

60 src_txt += "\n" 

61 

62 grammars = get_grammars(set(target_versions)) 

63 if target_versions: 

64 max_tv = max(target_versions, key=lambda tv: tv.value) 

65 tv_str = f" for target version {max_tv.pretty()}" 

66 else: 

67 tv_str = "" 

68 

69 errors = {} 

70 for grammar in grammars: 

71 drv = driver.Driver(grammar) 

72 try: 

73 result = drv.parse_string(src_txt, False) 

74 break 

75 

76 except ParseError as pe: 

77 lineno, column = pe.context[1] 

78 lines = src_txt.splitlines() 

79 try: 

80 faulty_line = lines[lineno - 1] 

81 except IndexError: 

82 faulty_line = "<line number missing in source>" 

83 errors[grammar.version] = InvalidInput( 

84 f"Cannot parse{tv_str}: {lineno}:{column}: {faulty_line}" 

85 ) 

86 

87 except TokenError as te: 

88 # In edge cases these are raised; and typically don't have a "faulty_line". 

89 lineno, column = te.args[1] 

90 errors[grammar.version] = InvalidInput( 

91 f"Cannot parse{tv_str}: {lineno}:{column}: {te.args[0]}" 

92 ) 

93 

94 else: 

95 # Choose the latest version when raising the actual parsing error. 

96 assert len(errors) >= 1 

97 exc = errors[max(errors)] 

98 raise exc from None 

99 

100 if isinstance(result, Leaf): 

101 result = Node(syms.file_input, [result]) 

102 return result 

103 

104 

105def matches_grammar(src_txt: str, grammar: Grammar) -> bool: 

106 drv = driver.Driver(grammar) 

107 try: 

108 drv.parse_string(src_txt, False) 

109 except (ParseError, TokenError, IndentationError): 

110 return False 

111 else: 

112 return True 

113 

114 

115def lib2to3_unparse(node: Node) -> str: 

116 """Given a lib2to3 node, return its string representation.""" 

117 code = str(node) 

118 return code 

119 

120 

121class ASTSafetyError(Exception): 

122 """Raised when Black's generated code is not equivalent to the old AST.""" 

123 

124 

125def _parse_single_version( 

126 src: str, version: tuple[int, int], *, type_comments: bool 

127) -> ast.AST: 

128 filename = "<unknown>" 

129 with warnings.catch_warnings(): 

130 warnings.simplefilter("ignore", SyntaxWarning) 

131 warnings.simplefilter("ignore", DeprecationWarning) 

132 return ast.parse( 

133 src, filename, feature_version=version, type_comments=type_comments 

134 ) 

135 

136 

137def parse_ast(src: str) -> ast.AST: 

138 # TODO: support Python 4+ ;) 

139 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)] 

140 

141 first_error = "" 

142 for version in sorted(versions, reverse=True): 

143 try: 

144 return _parse_single_version(src, version, type_comments=True) 

145 except SyntaxError as e: 

146 if not first_error: 

147 first_error = str(e) 

148 

149 # Try to parse without type comments 

150 for version in sorted(versions, reverse=True): 

151 try: 

152 return _parse_single_version(src, version, type_comments=False) 

153 except SyntaxError: 

154 pass 

155 

156 raise SyntaxError(first_error) 

157 

158 

159def _normalize(lineend: str, value: str) -> str: 

160 # To normalize, we strip any leading and trailing space from 

161 # each line... 

162 stripped: list[str] = [i.strip() for i in value.splitlines()] 

163 normalized = lineend.join(stripped) 

164 # ...and remove any blank lines at the beginning and end of 

165 # the whole string 

166 return normalized.strip() 

167 

168 

169def stringify_ast(node: ast.AST) -> Iterator[str]: 

170 """Simple visitor generating strings to compare ASTs by content.""" 

171 return _stringify_ast(node, []) 

172 

173 

174def _stringify_ast_with_new_parent( 

175 node: ast.AST, parent_stack: list[ast.AST], new_parent: ast.AST 

176) -> Iterator[str]: 

177 parent_stack.append(new_parent) 

178 yield from _stringify_ast(node, parent_stack) 

179 parent_stack.pop() 

180 

181 

182def _stringify_ast(node: ast.AST, parent_stack: list[ast.AST]) -> Iterator[str]: 

183 if ( 

184 isinstance(node, ast.Constant) 

185 and isinstance(node.value, str) 

186 and node.kind == "u" 

187 ): 

188 # It's a quirk of history that we strip the u prefix over here. We used to 

189 # rewrite the AST nodes for Python version compatibility and we never copied 

190 # over the kind 

191 node.kind = None 

192 

193 yield f"{' ' * len(parent_stack)}{node.__class__.__name__}(" 

194 

195 for field in sorted(node._fields): # noqa: F402 

196 # TypeIgnore has only one field 'lineno' which breaks this comparison 

197 if isinstance(node, ast.TypeIgnore): 

198 break 

199 

200 try: 

201 value: object = getattr(node, field) 

202 except AttributeError: 

203 continue 

204 

205 yield f"{' ' * (len(parent_stack) + 1)}{field}=" 

206 

207 if isinstance(value, list): 

208 for item in value: 

209 # Ignore nested tuples within del statements, because we may insert 

210 # parentheses and they change the AST. 

211 if ( 

212 field == "targets" 

213 and isinstance(node, ast.Delete) 

214 and isinstance(item, ast.Tuple) 

215 ): 

216 for elt in _unwrap_tuples(item): 

217 yield from _stringify_ast_with_new_parent( 

218 elt, parent_stack, node 

219 ) 

220 

221 elif isinstance(item, ast.AST): 

222 yield from _stringify_ast_with_new_parent(item, parent_stack, node) 

223 

224 elif isinstance(value, ast.AST): 

225 yield from _stringify_ast_with_new_parent(value, parent_stack, node) 

226 

227 else: 

228 normalized: object 

229 if ( 

230 isinstance(node, ast.Constant) 

231 and field == "value" 

232 and isinstance(value, str) 

233 and len(parent_stack) >= 2 

234 # Any standalone string, ideally this would 

235 # exactly match black.nodes.is_docstring 

236 and isinstance(parent_stack[-1], ast.Expr) 

237 ): 

238 # Constant strings may be indented across newlines, if they are 

239 # docstrings; fold spaces after newlines when comparing. Similarly, 

240 # trailing and leading space may be removed. 

241 normalized = _normalize("\n", value) 

242 elif field == "type_comment" and isinstance(value, str): 

243 # Trailing whitespace in type comments is removed. 

244 normalized = value.rstrip() 

245 else: 

246 normalized = value 

247 yield ( 

248 f"{' ' * (len(parent_stack) + 1)}{normalized!r}, #" 

249 f" {value.__class__.__name__}" 

250 ) 

251 

252 yield f"{' ' * len(parent_stack)}) # /{node.__class__.__name__}" 

253 

254 

255def _unwrap_tuples(node: ast.Tuple) -> Iterator[ast.AST]: 

256 for elt in node.elts: 

257 if isinstance(elt, ast.Tuple): 

258 yield from _unwrap_tuples(elt) 

259 else: 

260 yield elt