Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/asttokens/util.py: 50%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright 2016 Grist Labs, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15import ast
16import collections
17import io
18import sys
19import token
20import tokenize
21from abc import ABCMeta
22from ast import Module, expr, AST
23from functools import lru_cache
24from typing import (
25 Callable,
26 Dict,
27 Iterable,
28 Iterator,
29 List,
30 Optional,
31 Tuple,
32 Union,
33 cast,
34 Any,
35 TYPE_CHECKING,
36 Type,
37)
39if TYPE_CHECKING: # pragma: no cover
40 from .astroid_compat import NodeNG
42 # Type class used to expand out the definition of AST to include fields added by this library
43 # It's not actually used for anything other than type checking though!
44 class EnhancedAST(AST):
45 # Additional attributes set by mark_tokens
46 first_token = None # type: Token
47 last_token = None # type: Token
48 lineno = 0 # type: int
49 end_lineno = 0 # type : int
50 end_col_offset = 0 # type : int
52 AstNode = Union[EnhancedAST, NodeNG]
54 TokenInfo = tokenize.TokenInfo
57def token_repr(tok_type, string):
58 # type: (int, Optional[str]) -> str
59 """Returns a human-friendly representation of a token with the given type and string."""
60 # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency.
61 return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u'))
64class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')):
65 """
66 TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize
67 module, and 3 additional ones useful for this module:
69 - [0] .type Token type (see token.py)
70 - [1] .string Token (a string)
71 - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints)
72 - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints)
73 - [4] .line Original line (string)
74 - [5] .index Index of the token in the list of tokens that it belongs to.
75 - [6] .startpos Starting character offset into the input text.
76 - [7] .endpos Ending character offset into the input text.
77 """
78 def __str__(self):
79 # type: () -> str
80 return token_repr(self.type, self.string)
83def match_token(token, tok_type, tok_str=None):
84 # type: (Token, int, Optional[str]) -> bool
85 """Returns true if token is of the given type and, if a string is given, has that string."""
86 return token.type == tok_type and (tok_str is None or token.string == tok_str)
89def expect_token(token, tok_type, tok_str=None):
90 # type: (Token, int, Optional[str]) -> None
91 """
92 Verifies that the given token is of the expected type. If tok_str is given, the token string
93 is verified too. If the token doesn't match, raises an informative ValueError.
94 """
95 if not match_token(token, tok_type, tok_str):
96 raise ValueError("Expected token %s, got %s on line %s col %s" % (
97 token_repr(tok_type, tok_str), str(token),
98 token.start[0], token.start[1] + 1))
101def is_non_coding_token(token_type):
102 # type: (int) -> bool
103 """
104 These are considered non-coding tokens, as they don't affect the syntax tree.
105 """
106 return token_type in (token.NL, token.COMMENT, token.ENCODING)
109def generate_tokens(text):
110 # type: (str) -> Iterator[TokenInfo]
111 """
112 Generates standard library tokens for the given code.
113 """
114 # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for
115 # Python2. See https://stackoverflow.com/a/4952291/328565.
116 # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed
117 return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline))
120def iter_children_func(node):
121 # type: (AST) -> Callable
122 """
123 Returns a function which yields all direct children of a AST node,
124 skipping children that are singleton nodes.
125 The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module.
126 """
127 return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast
130def iter_children_astroid(node, include_joined_str=False):
131 # type: (NodeNG, bool) -> Union[Iterator, List]
132 if not include_joined_str and is_joined_str(node):
133 return []
135 return node.get_children()
138SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and
139 issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))}
142def iter_children_ast(node, include_joined_str=False):
143 # type: (AST, bool) -> Iterator[Union[AST, expr]]
144 if not include_joined_str and is_joined_str(node):
145 return
147 if isinstance(node, ast.Dict):
148 # override the iteration order: instead of <all keys>, <all values>,
149 # yield keys and values in source order (key1, value1, key2, value2, ...)
150 for (key, value) in zip(node.keys, node.values):
151 if key is not None:
152 yield key
153 yield value
154 return
156 for child in ast.iter_child_nodes(node):
157 # Skip singleton children; they don't reflect particular positions in the code and break the
158 # assumptions about the tree consisting of distinct nodes. Note that collecting classes
159 # beforehand and checking them in a set is faster than using isinstance each time.
160 if child.__class__ not in SINGLETONS:
161 yield child
164stmt_class_names = {n for n, c in ast.__dict__.items()
165 if isinstance(c, type) and issubclass(c, ast.stmt)}
166expr_class_names = ({n for n, c in ast.__dict__.items()
167 if isinstance(c, type) and issubclass(c, ast.expr)} |
168 {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'})
170# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes
171# in the same way, and without even importing astroid.
172def is_expr(node):
173 # type: (AstNode) -> bool
174 """Returns whether node is an expression node."""
175 return node.__class__.__name__ in expr_class_names
177def is_stmt(node):
178 # type: (AstNode) -> bool
179 """Returns whether node is a statement node."""
180 return node.__class__.__name__ in stmt_class_names
182def is_module(node):
183 # type: (AstNode) -> bool
184 """Returns whether node is a module node."""
185 return node.__class__.__name__ == 'Module'
187def is_joined_str(node):
188 # type: (AstNode) -> bool
189 """Returns whether node is a JoinedStr node, used to represent f-strings."""
190 # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only
191 # leads to errors.
192 return node.__class__.__name__ == 'JoinedStr'
195def is_expr_stmt(node):
196 # type: (AstNode) -> bool
197 """Returns whether node is an `Expr` node, which is a statement that is an expression."""
198 return node.__class__.__name__ == 'Expr'
202CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
203try:
204 from astroid.nodes import Const
205 CONSTANT_CLASSES += (Const,)
206except ImportError: # pragma: no cover
207 # astroid is not available
208 pass
210def is_constant(node):
211 # type: (AstNode) -> bool
212 """Returns whether node is a Constant node."""
213 return isinstance(node, CONSTANT_CLASSES)
216def is_ellipsis(node):
217 # type: (AstNode) -> bool
218 """Returns whether node is an Ellipsis node."""
219 return is_constant(node) and node.value is Ellipsis # type: ignore
222def is_starred(node):
223 # type: (AstNode) -> bool
224 """Returns whether node is a starred expression node."""
225 return node.__class__.__name__ == 'Starred'
228def is_slice(node):
229 # type: (AstNode) -> bool
230 """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`"""
231 # Before 3.9, a tuple containing a slice is an ExtSlice,
232 # but this was removed in https://bugs.python.org/issue34822
233 return (
234 node.__class__.__name__ in ('Slice', 'ExtSlice')
235 or (
236 node.__class__.__name__ == 'Tuple'
237 and any(map(is_slice, cast(ast.Tuple, node).elts))
238 )
239 )
242def is_empty_astroid_slice(node):
243 # type: (AstNode) -> bool
244 return (
245 node.__class__.__name__ == "Slice"
246 and not isinstance(node, ast.AST)
247 and node.lower is node.upper is node.step is None
248 )
251# Sentinel value used by visit_tree().
252_PREVISIT = object()
254def visit_tree(node, previsit, postvisit):
255 # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None
256 """
257 Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion
258 via the function call stack to avoid hitting 'maximum recursion depth exceeded' error.
260 It calls ``previsit()`` and ``postvisit()`` as follows:
262 * ``previsit(node, par_value)`` - should return ``(par_value, value)``
263 ``par_value`` is as returned from ``previsit()`` of the parent.
265 * ``postvisit(node, par_value, value)`` - should return ``value``
266 ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as
267 returned from ``previsit()`` of this node itself. The return ``value`` is ignored except
268 the one for the root node, which is returned from the overall ``visit_tree()`` call.
270 For the initial node, ``par_value`` is None. ``postvisit`` may be None.
271 """
272 if not postvisit:
273 postvisit = lambda node, pvalue, value: None
275 iter_children = iter_children_func(node)
276 done = set()
277 ret = None
278 stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]]
279 while stack:
280 current, par_value, value = stack.pop()
281 if value is _PREVISIT:
282 assert current not in done # protect againt infinite loop in case of a bad tree.
283 done.add(current)
285 pvalue, post_value = previsit(current, par_value)
286 stack.append((current, par_value, post_value))
288 # Insert all children in reverse order (so that first child ends up on top of the stack).
289 ins = len(stack)
290 for n in iter_children(current):
291 stack.insert(ins, (n, pvalue, _PREVISIT))
292 else:
293 ret = postvisit(current, par_value, cast(Optional[Token], value))
294 return ret
297def walk(node, include_joined_str=False):
298 # type: (AST, bool) -> Iterator[Union[Module, AstNode]]
299 """
300 Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node``
301 itself), using depth-first pre-order traversal (yieling parents before their children).
303 This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and
304 ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``.
306 By default, ``JoinedStr`` (f-string) nodes and their contents are skipped
307 because they previously couldn't be handled. Set ``include_joined_str`` to True to include them.
308 """
309 iter_children = iter_children_func(node)
310 done = set()
311 stack = [node]
312 while stack:
313 current = stack.pop()
314 assert current not in done # protect againt infinite loop in case of a bad tree.
315 done.add(current)
317 yield current
319 # Insert all children in reverse order (so that first child ends up on top of the stack).
320 # This is faster than building a list and reversing it.
321 ins = len(stack)
322 for c in iter_children(current, include_joined_str):
323 stack.insert(ins, c)
326def replace(text, replacements):
327 # type: (str, List[Tuple[int, int, str]]) -> str
328 """
329 Replaces multiple slices of text with new values. This is a convenience method for making code
330 modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is
331 an iterable of ``(start, end, new_text)`` tuples.
333 For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces
334 ``"X is THE test"``.
335 """
336 p = 0
337 parts = []
338 for (start, end, new_text) in sorted(replacements):
339 parts.append(text[p:start])
340 parts.append(new_text)
341 p = end
342 parts.append(text[p:])
343 return ''.join(parts)
346class NodeMethods:
347 """
348 Helper to get `visit_{node_type}` methods given a node's class and cache the results.
349 """
350 def __init__(self):
351 # type: () -> None
352 self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]]
354 def get(self, obj, cls):
355 # type: (Any, Union[ABCMeta, type]) -> Callable
356 """
357 Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`,
358 or `obj.visit_default` if the type-specific method is not found.
359 """
360 method = self._cache.get(cls)
361 if not method:
362 name = "visit_" + cls.__name__.lower()
363 method = getattr(obj, name, obj.visit_default)
364 self._cache[cls] = method
365 return method
368def patched_generate_tokens(original_tokens):
369 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo]
370 """
371 Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers.
372 Workaround for https://github.com/python/cpython/issues/68382.
373 Should only be used when tokenizing a string that is known to be valid syntax,
374 because it assumes that error tokens are not actually errors.
375 Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token.
376 """
377 group = [] # type: List[tokenize.TokenInfo]
378 for tok in original_tokens:
379 if (
380 tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER)
381 # Only combine tokens if they have no whitespace in between
382 and (not group or group[-1].end == tok.start)
383 ):
384 group.append(tok)
385 else:
386 for combined_token in combine_tokens(group):
387 yield combined_token
388 group = []
389 yield tok
390 for combined_token in combine_tokens(group):
391 yield combined_token
393def combine_tokens(group):
394 # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo]
395 if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1:
396 return group
397 return [
398 tokenize.TokenInfo(
399 type=tokenize.NAME,
400 string="".join(t.string for t in group),
401 start=group[0].start,
402 end=group[-1].end,
403 line=group[0].line,
404 )
405 ]
408def last_stmt(node):
409 # type: (AstNode) -> AstNode
410 """
411 If the given AST node contains multiple statements, return the last one.
412 Otherwise, just return the node.
413 """
414 child_stmts = [
415 child for child in iter_children_func(node)(node)
416 if is_stmt(child) or type(child).__name__ in (
417 "excepthandler",
418 "ExceptHandler",
419 "match_case",
420 "MatchCase",
421 "TryExcept",
422 "TryFinally",
423 )
424 ]
425 if child_stmts:
426 return last_stmt(child_stmts[-1])
427 return node
431@lru_cache(maxsize=None)
432def fstring_positions_work():
433 # type: () -> bool
434 """
435 The positions attached to nodes inside f-string FormattedValues have some bugs
436 that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
437 This checks for those bugs more concretely without relying on the Python version.
438 Specifically this checks:
439 - Values with a format spec or conversion
440 - Repeated (i.e. identical-looking) expressions
441 - f-strings implicitly concatenated over multiple lines.
442 - Multiline, triple-quoted f-strings.
443 """
444 source = """(
445 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
446 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
447 f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
448 f'''
449 {s} {t}
450 {u} {v}
451 '''
452 )"""
453 tree = ast.parse(source)
454 name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
455 name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
456 positions_are_unique = len(set(name_positions)) == len(name_positions)
457 correct_source_segments = all(
458 ast.get_source_segment(source, node) == node.id
459 for node in name_nodes
460 )
461 return positions_are_unique and correct_source_segments
463def annotate_fstring_nodes(tree):
464 # type: (ast.AST) -> None
465 """
466 Add a special attribute `_broken_positions` to nodes inside f-strings
467 if the lineno/col_offset cannot be trusted.
468 """
469 if sys.version_info >= (3, 12):
470 # f-strings were weirdly implemented until https://peps.python.org/pep-0701/
471 # In Python 3.12, inner nodes have sensible positions.
472 return
473 for joinedstr in walk(tree, include_joined_str=True):
474 if not isinstance(joinedstr, ast.JoinedStr):
475 continue
476 for part in joinedstr.values:
477 # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
478 setattr(part, '_broken_positions', True) # use setattr for mypy
480 if isinstance(part, ast.FormattedValue):
481 if not fstring_positions_work():
482 for child in walk(part.value):
483 setattr(child, '_broken_positions', True)
485 if part.format_spec: # this is another JoinedStr
486 # Again, the standard positions span the full f-string.
487 setattr(part.format_spec, '_broken_positions', True)