Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/asttokens/util.py: 50%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

164 statements  

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# https://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import ast 

16import collections 

17import io 

18import sys 

19import token 

20import tokenize 

21from abc import ABCMeta 

22from ast import Module, expr, AST 

23from functools import lru_cache 

24from typing import ( 

25 Callable, 

26 Dict, 

27 Iterable, 

28 Iterator, 

29 List, 

30 Optional, 

31 Tuple, 

32 Union, 

33 cast, 

34 Any, 

35 TYPE_CHECKING, 

36 Type, 

37) 

38 

39if TYPE_CHECKING: # pragma: no cover 

40 from .astroid_compat import NodeNG 

41 

42 # Type class used to expand out the definition of AST to include fields added by this library 

43 # It's not actually used for anything other than type checking though! 

44 class EnhancedAST(AST): 

45 # Additional attributes set by mark_tokens 

46 first_token = None # type: Token 

47 last_token = None # type: Token 

48 lineno = 0 # type: int 

49 end_lineno = 0 # type : int 

50 end_col_offset = 0 # type : int 

51 

52 AstNode = Union[EnhancedAST, NodeNG] 

53 

54 TokenInfo = tokenize.TokenInfo 

55 

56 

57def token_repr(tok_type, string): 

58 # type: (int, Optional[str]) -> str 

59 """Returns a human-friendly representation of a token with the given type and string.""" 

60 # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency. 

61 return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u')) 

62 

63 

64class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')): 

65 """ 

66 TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize 

67 module, and 3 additional ones useful for this module: 

68 

69 - [0] .type Token type (see token.py) 

70 - [1] .string Token (a string) 

71 - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints) 

72 - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints) 

73 - [4] .line Original line (string) 

74 - [5] .index Index of the token in the list of tokens that it belongs to. 

75 - [6] .startpos Starting character offset into the input text. 

76 - [7] .endpos Ending character offset into the input text. 

77 """ 

78 def __str__(self): 

79 # type: () -> str 

80 return token_repr(self.type, self.string) 

81 

82 

83def match_token(token, tok_type, tok_str=None): 

84 # type: (Token, int, Optional[str]) -> bool 

85 """Returns true if token is of the given type and, if a string is given, has that string.""" 

86 return token.type == tok_type and (tok_str is None or token.string == tok_str) 

87 

88 

89def expect_token(token, tok_type, tok_str=None): 

90 # type: (Token, int, Optional[str]) -> None 

91 """ 

92 Verifies that the given token is of the expected type. If tok_str is given, the token string 

93 is verified too. If the token doesn't match, raises an informative ValueError. 

94 """ 

95 if not match_token(token, tok_type, tok_str): 

96 raise ValueError("Expected token %s, got %s on line %s col %s" % ( 

97 token_repr(tok_type, tok_str), str(token), 

98 token.start[0], token.start[1] + 1)) 

99 

100 

101def is_non_coding_token(token_type): 

102 # type: (int) -> bool 

103 """ 

104 These are considered non-coding tokens, as they don't affect the syntax tree. 

105 """ 

106 return token_type in (token.NL, token.COMMENT, token.ENCODING) 

107 

108 

109def generate_tokens(text): 

110 # type: (str) -> Iterator[TokenInfo] 

111 """ 

112 Generates standard library tokens for the given code. 

113 """ 

114 # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for 

115 # Python2. See https://stackoverflow.com/a/4952291/328565. 

116 # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed 

117 return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline)) 

118 

119 

120def iter_children_func(node): 

121 # type: (AST) -> Callable 

122 """ 

123 Returns a function which yields all direct children of a AST node, 

124 skipping children that are singleton nodes. 

125 The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module. 

126 """ 

127 return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast 

128 

129 

130def iter_children_astroid(node, include_joined_str=False): 

131 # type: (NodeNG, bool) -> Union[Iterator, List] 

132 if not include_joined_str and is_joined_str(node): 

133 return [] 

134 

135 return node.get_children() 

136 

137 

138SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and 

139 issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))} 

140 

141 

142def iter_children_ast(node, include_joined_str=False): 

143 # type: (AST, bool) -> Iterator[Union[AST, expr]] 

144 if not include_joined_str and is_joined_str(node): 

145 return 

146 

147 if isinstance(node, ast.Dict): 

148 # override the iteration order: instead of <all keys>, <all values>, 

149 # yield keys and values in source order (key1, value1, key2, value2, ...) 

150 for (key, value) in zip(node.keys, node.values): 

151 if key is not None: 

152 yield key 

153 yield value 

154 return 

155 

156 for child in ast.iter_child_nodes(node): 

157 # Skip singleton children; they don't reflect particular positions in the code and break the 

158 # assumptions about the tree consisting of distinct nodes. Note that collecting classes 

159 # beforehand and checking them in a set is faster than using isinstance each time. 

160 if child.__class__ not in SINGLETONS: 

161 yield child 

162 

163 

164stmt_class_names = {n for n, c in ast.__dict__.items() 

165 if isinstance(c, type) and issubclass(c, ast.stmt)} 

166expr_class_names = ({n for n, c in ast.__dict__.items() 

167 if isinstance(c, type) and issubclass(c, ast.expr)} | 

168 {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'}) 

169 

170# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes 

171# in the same way, and without even importing astroid. 

172def is_expr(node): 

173 # type: (AstNode) -> bool 

174 """Returns whether node is an expression node.""" 

175 return node.__class__.__name__ in expr_class_names 

176 

177def is_stmt(node): 

178 # type: (AstNode) -> bool 

179 """Returns whether node is a statement node.""" 

180 return node.__class__.__name__ in stmt_class_names 

181 

182def is_module(node): 

183 # type: (AstNode) -> bool 

184 """Returns whether node is a module node.""" 

185 return node.__class__.__name__ == 'Module' 

186 

187def is_joined_str(node): 

188 # type: (AstNode) -> bool 

189 """Returns whether node is a JoinedStr node, used to represent f-strings.""" 

190 # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only 

191 # leads to errors. 

192 return node.__class__.__name__ == 'JoinedStr' 

193 

194 

195def is_expr_stmt(node): 

196 # type: (AstNode) -> bool 

197 """Returns whether node is an `Expr` node, which is a statement that is an expression.""" 

198 return node.__class__.__name__ == 'Expr' 

199 

200 

201 

202CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,) 

203try: 

204 from astroid.nodes import Const 

205 CONSTANT_CLASSES += (Const,) 

206except ImportError: # pragma: no cover 

207 # astroid is not available 

208 pass 

209 

210def is_constant(node): 

211 # type: (AstNode) -> bool 

212 """Returns whether node is a Constant node.""" 

213 return isinstance(node, CONSTANT_CLASSES) 

214 

215 

216def is_ellipsis(node): 

217 # type: (AstNode) -> bool 

218 """Returns whether node is an Ellipsis node.""" 

219 return is_constant(node) and node.value is Ellipsis # type: ignore 

220 

221 

222def is_starred(node): 

223 # type: (AstNode) -> bool 

224 """Returns whether node is a starred expression node.""" 

225 return node.__class__.__name__ == 'Starred' 

226 

227 

228def is_slice(node): 

229 # type: (AstNode) -> bool 

230 """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`""" 

231 # Before 3.9, a tuple containing a slice is an ExtSlice, 

232 # but this was removed in https://bugs.python.org/issue34822 

233 return ( 

234 node.__class__.__name__ in ('Slice', 'ExtSlice') 

235 or ( 

236 node.__class__.__name__ == 'Tuple' 

237 and any(map(is_slice, cast(ast.Tuple, node).elts)) 

238 ) 

239 ) 

240 

241 

242def is_empty_astroid_slice(node): 

243 # type: (AstNode) -> bool 

244 return ( 

245 node.__class__.__name__ == "Slice" 

246 and not isinstance(node, ast.AST) 

247 and node.lower is node.upper is node.step is None 

248 ) 

249 

250 

251# Sentinel value used by visit_tree(). 

252_PREVISIT = object() 

253 

254def visit_tree(node, previsit, postvisit): 

255 # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None 

256 """ 

257 Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion 

258 via the function call stack to avoid hitting 'maximum recursion depth exceeded' error. 

259 

260 It calls ``previsit()`` and ``postvisit()`` as follows: 

261 

262 * ``previsit(node, par_value)`` - should return ``(par_value, value)`` 

263 ``par_value`` is as returned from ``previsit()`` of the parent. 

264 

265 * ``postvisit(node, par_value, value)`` - should return ``value`` 

266 ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as 

267 returned from ``previsit()`` of this node itself. The return ``value`` is ignored except 

268 the one for the root node, which is returned from the overall ``visit_tree()`` call. 

269 

270 For the initial node, ``par_value`` is None. ``postvisit`` may be None. 

271 """ 

272 if not postvisit: 

273 postvisit = lambda node, pvalue, value: None 

274 

275 iter_children = iter_children_func(node) 

276 done = set() 

277 ret = None 

278 stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]] 

279 while stack: 

280 current, par_value, value = stack.pop() 

281 if value is _PREVISIT: 

282 assert current not in done # protect againt infinite loop in case of a bad tree. 

283 done.add(current) 

284 

285 pvalue, post_value = previsit(current, par_value) 

286 stack.append((current, par_value, post_value)) 

287 

288 # Insert all children in reverse order (so that first child ends up on top of the stack). 

289 ins = len(stack) 

290 for n in iter_children(current): 

291 stack.insert(ins, (n, pvalue, _PREVISIT)) 

292 else: 

293 ret = postvisit(current, par_value, cast(Optional[Token], value)) 

294 return ret 

295 

296 

297def walk(node, include_joined_str=False): 

298 # type: (AST, bool) -> Iterator[Union[Module, AstNode]] 

299 """ 

300 Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node`` 

301 itself), using depth-first pre-order traversal (yieling parents before their children). 

302 

303 This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and 

304 ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``. 

305 

306 By default, ``JoinedStr`` (f-string) nodes and their contents are skipped 

307 because they previously couldn't be handled. Set ``include_joined_str`` to True to include them. 

308 """ 

309 iter_children = iter_children_func(node) 

310 done = set() 

311 stack = [node] 

312 while stack: 

313 current = stack.pop() 

314 assert current not in done # protect againt infinite loop in case of a bad tree. 

315 done.add(current) 

316 

317 yield current 

318 

319 # Insert all children in reverse order (so that first child ends up on top of the stack). 

320 # This is faster than building a list and reversing it. 

321 ins = len(stack) 

322 for c in iter_children(current, include_joined_str): 

323 stack.insert(ins, c) 

324 

325 

326def replace(text, replacements): 

327 # type: (str, List[Tuple[int, int, str]]) -> str 

328 """ 

329 Replaces multiple slices of text with new values. This is a convenience method for making code 

330 modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is 

331 an iterable of ``(start, end, new_text)`` tuples. 

332 

333 For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces 

334 ``"X is THE test"``. 

335 """ 

336 p = 0 

337 parts = [] 

338 for (start, end, new_text) in sorted(replacements): 

339 parts.append(text[p:start]) 

340 parts.append(new_text) 

341 p = end 

342 parts.append(text[p:]) 

343 return ''.join(parts) 

344 

345 

346class NodeMethods: 

347 """ 

348 Helper to get `visit_{node_type}` methods given a node's class and cache the results. 

349 """ 

350 def __init__(self): 

351 # type: () -> None 

352 self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]] 

353 

354 def get(self, obj, cls): 

355 # type: (Any, Union[ABCMeta, type]) -> Callable 

356 """ 

357 Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`, 

358 or `obj.visit_default` if the type-specific method is not found. 

359 """ 

360 method = self._cache.get(cls) 

361 if not method: 

362 name = "visit_" + cls.__name__.lower() 

363 method = getattr(obj, name, obj.visit_default) 

364 self._cache[cls] = method 

365 return method 

366 

367 

368def patched_generate_tokens(original_tokens): 

369 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

370 """ 

371 Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers. 

372 Workaround for https://github.com/python/cpython/issues/68382. 

373 Should only be used when tokenizing a string that is known to be valid syntax, 

374 because it assumes that error tokens are not actually errors. 

375 Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token. 

376 """ 

377 group = [] # type: List[tokenize.TokenInfo] 

378 for tok in original_tokens: 

379 if ( 

380 tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER) 

381 # Only combine tokens if they have no whitespace in between 

382 and (not group or group[-1].end == tok.start) 

383 ): 

384 group.append(tok) 

385 else: 

386 for combined_token in combine_tokens(group): 

387 yield combined_token 

388 group = [] 

389 yield tok 

390 for combined_token in combine_tokens(group): 

391 yield combined_token 

392 

393def combine_tokens(group): 

394 # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo] 

395 if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1: 

396 return group 

397 return [ 

398 tokenize.TokenInfo( 

399 type=tokenize.NAME, 

400 string="".join(t.string for t in group), 

401 start=group[0].start, 

402 end=group[-1].end, 

403 line=group[0].line, 

404 ) 

405 ] 

406 

407 

408def last_stmt(node): 

409 # type: (AstNode) -> AstNode  

410 """ 

411 If the given AST node contains multiple statements, return the last one. 

412 Otherwise, just return the node. 

413 """ 

414 child_stmts = [ 

415 child for child in iter_children_func(node)(node) 

416 if is_stmt(child) or type(child).__name__ in ( 

417 "excepthandler", 

418 "ExceptHandler", 

419 "match_case", 

420 "MatchCase", 

421 "TryExcept", 

422 "TryFinally", 

423 ) 

424 ] 

425 if child_stmts: 

426 return last_stmt(child_stmts[-1]) 

427 return node 

428 

429 

430 

431@lru_cache(maxsize=None) 

432def fstring_positions_work(): 

433 # type: () -> bool 

434 """ 

435 The positions attached to nodes inside f-string FormattedValues have some bugs 

436 that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. 

437 This checks for those bugs more concretely without relying on the Python version. 

438 Specifically this checks: 

439 - Values with a format spec or conversion 

440 - Repeated (i.e. identical-looking) expressions 

441 - f-strings implicitly concatenated over multiple lines. 

442 - Multiline, triple-quoted f-strings. 

443 """ 

444 source = """( 

445 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

446 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

447 f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" 

448 f''' 

449 {s} {t} 

450 {u} {v} 

451 ''' 

452 )""" 

453 tree = ast.parse(source) 

454 name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] 

455 name_positions = [(node.lineno, node.col_offset) for node in name_nodes] 

456 positions_are_unique = len(set(name_positions)) == len(name_positions) 

457 correct_source_segments = all( 

458 ast.get_source_segment(source, node) == node.id 

459 for node in name_nodes 

460 ) 

461 return positions_are_unique and correct_source_segments 

462 

463def annotate_fstring_nodes(tree): 

464 # type: (ast.AST) -> None 

465 """ 

466 Add a special attribute `_broken_positions` to nodes inside f-strings 

467 if the lineno/col_offset cannot be trusted. 

468 """ 

469 if sys.version_info >= (3, 12): 

470 # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ 

471 # In Python 3.12, inner nodes have sensible positions. 

472 return 

473 for joinedstr in walk(tree, include_joined_str=True): 

474 if not isinstance(joinedstr, ast.JoinedStr): 

475 continue 

476 for part in joinedstr.values: 

477 # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. 

478 setattr(part, '_broken_positions', True) # use setattr for mypy 

479 

480 if isinstance(part, ast.FormattedValue): 

481 if not fstring_positions_work(): 

482 for child in walk(part.value): 

483 setattr(child, '_broken_positions', True) 

484 

485 if part.format_spec: # this is another JoinedStr 

486 # Again, the standard positions span the full f-string. 

487 setattr(part.format_spec, '_broken_positions', True)