Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/parsing.py: 45%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Parse Python code and perform AST validation.
3"""
5import ast
6import sys
7import warnings
8from collections.abc import Collection, Iterator
10from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
11from black.nodes import syms
12from blib2to3 import pygram
13from blib2to3.pgen2 import driver
14from blib2to3.pgen2.grammar import Grammar
15from blib2to3.pgen2.parse import ParseError
16from blib2to3.pgen2.tokenize import TokenError
17from blib2to3.pytree import Leaf, Node
20class InvalidInput(ValueError):
21 """Raised when input source code fails all parse attempts."""
24def get_grammars(target_versions: set[TargetVersion]) -> list[Grammar]:
25 if not target_versions:
26 # No target_version specified, so try all grammars.
27 return [
28 # Python 3.7-3.9
29 pygram.python_grammar_async_keywords,
30 # Python 3.0-3.6
31 pygram.python_grammar,
32 # Python 3.10+
33 pygram.python_grammar_soft_keywords,
34 ]
36 grammars = []
37 # If we have to parse both, try to parse async as a keyword first
38 if not supports_feature(
39 target_versions, Feature.ASYNC_IDENTIFIERS
40 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
41 # Python 3.7-3.9
42 grammars.append(pygram.python_grammar_async_keywords)
43 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
44 # Python 3.0-3.6
45 grammars.append(pygram.python_grammar)
46 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
47 # Python 3.10+
48 grammars.append(pygram.python_grammar_soft_keywords)
50 # At least one of the above branches must have been taken, because every Python
51 # version has exactly one of the two 'ASYNC_*' flags
52 return grammars
55def lib2to3_parse(
56 src_txt: str, target_versions: Collection[TargetVersion] = ()
57) -> Node:
58 """Given a string with source, return the lib2to3 Node."""
59 if not src_txt.endswith("\n"):
60 src_txt += "\n"
62 grammars = get_grammars(set(target_versions))
63 if target_versions:
64 max_tv = max(target_versions, key=lambda tv: tv.value)
65 tv_str = f" for target version {max_tv.pretty()}"
66 else:
67 tv_str = ""
69 errors = {}
70 for grammar in grammars:
71 drv = driver.Driver(grammar)
72 try:
73 result = drv.parse_string(src_txt, False)
74 break
76 except ParseError as pe:
77 lineno, column = pe.context[1]
78 lines = src_txt.splitlines()
79 try:
80 faulty_line = lines[lineno - 1]
81 except IndexError:
82 faulty_line = "<line number missing in source>"
83 errors[grammar.version] = InvalidInput(
84 f"Cannot parse{tv_str}: {lineno}:{column}: {faulty_line}"
85 )
87 except TokenError as te:
88 # In edge cases these are raised; and typically don't have a "faulty_line".
89 lineno, column = te.args[1]
90 errors[grammar.version] = InvalidInput(
91 f"Cannot parse{tv_str}: {lineno}:{column}: {te.args[0]}"
92 )
94 else:
95 # Choose the latest version when raising the actual parsing error.
96 assert len(errors) >= 1
97 exc = errors[max(errors)]
98 raise exc from None
100 if isinstance(result, Leaf):
101 result = Node(syms.file_input, [result])
102 return result
105class ASTSafetyError(Exception):
106 """Raised when Black's generated code is not equivalent to the old AST."""
109def _parse_single_version(
110 src: str, version: tuple[int, int], *, type_comments: bool
111) -> ast.AST:
112 filename = "<unknown>"
113 with warnings.catch_warnings():
114 warnings.simplefilter("ignore", SyntaxWarning)
115 warnings.simplefilter("ignore", DeprecationWarning)
116 return ast.parse(
117 src, filename, feature_version=version, type_comments=type_comments
118 )
121def parse_ast(src: str) -> ast.AST:
122 # TODO: support Python 4+ ;)
123 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
125 first_error = ""
126 for version in sorted(versions, reverse=True):
127 try:
128 return _parse_single_version(src, version, type_comments=True)
129 except SyntaxError as e:
130 if not first_error:
131 first_error = str(e)
133 # Try to parse without type comments
134 for version in sorted(versions, reverse=True):
135 try:
136 return _parse_single_version(src, version, type_comments=False)
137 except SyntaxError:
138 pass
140 raise SyntaxError(first_error)
143def _normalize(lineend: str, value: str) -> str:
144 # To normalize, we strip any leading and trailing space from
145 # each line...
146 stripped: list[str] = [i.strip() for i in value.splitlines()]
147 normalized = lineend.join(stripped)
148 # ...and remove any blank lines at the beginning and end of
149 # the whole string
150 return normalized.strip()
153def stringify_ast(node: ast.AST) -> Iterator[str]:
154 """Simple visitor generating strings to compare ASTs by content."""
155 return _stringify_ast(node, [])
158def _stringify_ast_with_new_parent(
159 node: ast.AST, parent_stack: list[ast.AST], new_parent: ast.AST
160) -> Iterator[str]:
161 parent_stack.append(new_parent)
162 yield from _stringify_ast(node, parent_stack)
163 parent_stack.pop()
166def _stringify_ast(node: ast.AST, parent_stack: list[ast.AST]) -> Iterator[str]:
167 if (
168 isinstance(node, ast.Constant)
169 and isinstance(node.value, str)
170 and node.kind == "u"
171 ):
172 # It's a quirk of history that we strip the u prefix over here. We used to
173 # rewrite the AST nodes for Python version compatibility and we never copied
174 # over the kind
175 node.kind = None
177 yield f"{' ' * len(parent_stack)}{node.__class__.__name__}("
179 for field in sorted(node._fields):
180 # TypeIgnore has only one field 'lineno' which breaks this comparison
181 if isinstance(node, ast.TypeIgnore):
182 break
184 try:
185 value: object = getattr(node, field)
186 except AttributeError:
187 continue
189 yield f"{' ' * (len(parent_stack) + 1)}{field}="
191 if isinstance(value, list):
192 for item in value:
193 # Ignore nested tuples within del statements, because we may insert
194 # parentheses and they change the AST.
195 if (
196 field == "targets"
197 and isinstance(node, ast.Delete)
198 and isinstance(item, ast.Tuple)
199 ):
200 for elt in _unwrap_tuples(item):
201 yield from _stringify_ast_with_new_parent(
202 elt, parent_stack, node
203 )
205 elif isinstance(item, ast.AST):
206 yield from _stringify_ast_with_new_parent(item, parent_stack, node)
208 elif isinstance(value, ast.AST):
209 yield from _stringify_ast_with_new_parent(value, parent_stack, node)
211 else:
212 normalized: object
213 if (
214 isinstance(node, ast.Constant)
215 and field == "value"
216 and isinstance(value, str)
217 and len(parent_stack) >= 2
218 # Any standalone string, ideally this would
219 # exactly match black.nodes.is_docstring
220 and isinstance(parent_stack[-1], ast.Expr)
221 ):
222 # Constant strings may be indented across newlines, if they are
223 # docstrings; fold spaces after newlines when comparing. Similarly,
224 # trailing and leading space may be removed.
225 normalized = _normalize("\n", value)
226 elif field == "type_comment" and isinstance(value, str):
227 # Trailing whitespace in type comments is removed.
228 normalized = value.rstrip()
229 else:
230 normalized = value
231 yield (
232 f"{' ' * (len(parent_stack) + 1)}{normalized!r}, #"
233 f" {value.__class__.__name__}"
234 )
236 yield f"{' ' * len(parent_stack)}) # /{node.__class__.__name__}"
239def _unwrap_tuples(node: ast.Tuple) -> Iterator[ast.AST]:
240 for elt in node.elts:
241 if isinstance(elt, ast.Tuple):
242 yield from _unwrap_tuples(elt)
243 else:
244 yield elt