Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/asttokens/util.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

164 statements  

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import ast 

16import collections 

17import io 

18import sys 

19import token 

20import tokenize 

21from abc import ABCMeta 

22from ast import Module, expr, AST 

23from functools import lru_cache 

24from typing import ( 

25 Callable, 

26 Dict, 

27 Iterable, 

28 Iterator, 

29 List, 

30 Optional, 

31 Tuple, 

32 Union, 

33 cast, 

34 Any, 

35 TYPE_CHECKING, 

36 Type, 

37) 

38 

39if TYPE_CHECKING: # pragma: no cover 

40 from .astroid_compat import NodeNG 

41 

42 # Type class used to expand out the definition of AST to include fields added by this library 

43 # It's not actually used for anything other than type checking though! 

44 class EnhancedAST(AST): 

45 # Additional attributes set by mark_tokens 

46 first_token = None # type: Token 

47 last_token = None # type: Token 

48 lineno = 0 # type: int 

49 

50 AstNode = Union[EnhancedAST, NodeNG] 

51 

52 TokenInfo = tokenize.TokenInfo 

53 

54 

55def token_repr(tok_type, string): 

56 # type: (int, Optional[str]) -> str 

57 """Returns a human-friendly representation of a token with the given type and string.""" 

58 # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency. 

59 return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u')) 

60 

61 

62class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')): 

63 """ 

64 TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize 

65 module, and 3 additional ones useful for this module: 

66 

67 - [0] .type Token type (see token.py) 

68 - [1] .string Token (a string) 

69 - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints) 

70 - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints) 

71 - [4] .line Original line (string) 

72 - [5] .index Index of the token in the list of tokens that it belongs to. 

73 - [6] .startpos Starting character offset into the input text. 

74 - [7] .endpos Ending character offset into the input text. 

75 """ 

76 def __str__(self): 

77 # type: () -> str 

78 return token_repr(self.type, self.string) 

79 

80 

81def match_token(token, tok_type, tok_str=None): 

82 # type: (Token, int, Optional[str]) -> bool 

83 """Returns true if token is of the given type and, if a string is given, has that string.""" 

84 return token.type == tok_type and (tok_str is None or token.string == tok_str) 

85 

86 

87def expect_token(token, tok_type, tok_str=None): 

88 # type: (Token, int, Optional[str]) -> None 

89 """ 

90 Verifies that the given token is of the expected type. If tok_str is given, the token string 

91 is verified too. If the token doesn't match, raises an informative ValueError. 

92 """ 

93 if not match_token(token, tok_type, tok_str): 

94 raise ValueError("Expected token %s, got %s on line %s col %s" % ( 

95 token_repr(tok_type, tok_str), str(token), 

96 token.start[0], token.start[1] + 1)) 

97 

98 

99def is_non_coding_token(token_type): 

100 # type: (int) -> bool 

101 """ 

102 These are considered non-coding tokens, as they don't affect the syntax tree. 

103 """ 

104 return token_type in (token.NL, token.COMMENT, token.ENCODING) 

105 

106 

107def generate_tokens(text): 

108 # type: (str) -> Iterator[TokenInfo] 

109 """ 

110 Generates standard library tokens for the given code. 

111 """ 

112 # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for 

113 # Python2. See http://stackoverflow.com/a/4952291/328565. 

114 # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed 

115 return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline)) 

116 

117 

118def iter_children_func(node): 

119 # type: (AST) -> Callable 

120 """ 

121 Returns a function which yields all direct children of a AST node, 

122 skipping children that are singleton nodes. 

123 The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module. 

124 """ 

125 return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast 

126 

127 

128def iter_children_astroid(node, include_joined_str=False): 

129 # type: (NodeNG, bool) -> Union[Iterator, List] 

130 if not include_joined_str and is_joined_str(node): 

131 return [] 

132 

133 return node.get_children() 

134 

135 

136SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and 

137 issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))} 

138 

139 

140def iter_children_ast(node, include_joined_str=False): 

141 # type: (AST, bool) -> Iterator[Union[AST, expr]] 

142 if not include_joined_str and is_joined_str(node): 

143 return 

144 

145 if isinstance(node, ast.Dict): 

146 # override the iteration order: instead of <all keys>, <all values>, 

147 # yield keys and values in source order (key1, value1, key2, value2, ...) 

148 for (key, value) in zip(node.keys, node.values): 

149 if key is not None: 

150 yield key 

151 yield value 

152 return 

153 

154 for child in ast.iter_child_nodes(node): 

155 # Skip singleton children; they don't reflect particular positions in the code and break the 

156 # assumptions about the tree consisting of distinct nodes. Note that collecting classes 

157 # beforehand and checking them in a set is faster than using isinstance each time. 

158 if child.__class__ not in SINGLETONS: 

159 yield child 

160 

161 

162stmt_class_names = {n for n, c in ast.__dict__.items() 

163 if isinstance(c, type) and issubclass(c, ast.stmt)} 

164expr_class_names = ({n for n, c in ast.__dict__.items() 

165 if isinstance(c, type) and issubclass(c, ast.expr)} | 

166 {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'}) 

167 

168# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes 

169# in the same way, and without even importing astroid. 

170def is_expr(node): 

171 # type: (AstNode) -> bool 

172 """Returns whether node is an expression node.""" 

173 return node.__class__.__name__ in expr_class_names 

174 

175def is_stmt(node): 

176 # type: (AstNode) -> bool 

177 """Returns whether node is a statement node.""" 

178 return node.__class__.__name__ in stmt_class_names 

179 

180def is_module(node): 

181 # type: (AstNode) -> bool 

182 """Returns whether node is a module node.""" 

183 return node.__class__.__name__ == 'Module' 

184 

185def is_joined_str(node): 

186 # type: (AstNode) -> bool 

187 """Returns whether node is a JoinedStr node, used to represent f-strings.""" 

188 # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only 

189 # leads to errors. 

190 return node.__class__.__name__ == 'JoinedStr' 

191 

192 

193def is_expr_stmt(node): 

194 # type: (AstNode) -> bool 

195 """Returns whether node is an `Expr` node, which is a statement that is an expression.""" 

196 return node.__class__.__name__ == 'Expr' 

197 

198 

199 

200CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,) 

201try: 

202 from astroid import Const 

203 CONSTANT_CLASSES += (Const,) 

204except ImportError: # pragma: no cover 

205 # astroid is not available 

206 pass 

207 

208def is_constant(node): 

209 # type: (AstNode) -> bool 

210 """Returns whether node is a Constant node.""" 

211 return isinstance(node, CONSTANT_CLASSES) 

212 

213 

214def is_ellipsis(node): 

215 # type: (AstNode) -> bool 

216 """Returns whether node is an Ellipsis node.""" 

217 return is_constant(node) and node.value is Ellipsis # type: ignore 

218 

219 

220def is_starred(node): 

221 # type: (AstNode) -> bool 

222 """Returns whether node is a starred expression node.""" 

223 return node.__class__.__name__ == 'Starred' 

224 

225 

226def is_slice(node): 

227 # type: (AstNode) -> bool 

228 """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`""" 

229 # Before 3.9, a tuple containing a slice is an ExtSlice, 

230 # but this was removed in https://bugs.python.org/issue34822 

231 return ( 

232 node.__class__.__name__ in ('Slice', 'ExtSlice') 

233 or ( 

234 node.__class__.__name__ == 'Tuple' 

235 and any(map(is_slice, cast(ast.Tuple, node).elts)) 

236 ) 

237 ) 

238 

239 

240def is_empty_astroid_slice(node): 

241 # type: (AstNode) -> bool 

242 return ( 

243 node.__class__.__name__ == "Slice" 

244 and not isinstance(node, ast.AST) 

245 and node.lower is node.upper is node.step is None 

246 ) 

247 

248 

249# Sentinel value used by visit_tree(). 

250_PREVISIT = object() 

251 

252def visit_tree(node, previsit, postvisit): 

253 # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None 

254 """ 

255 Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion 

256 via the function call stack to avoid hitting 'maximum recursion depth exceeded' error. 

257 

258 It calls ``previsit()`` and ``postvisit()`` as follows: 

259 

260 * ``previsit(node, par_value)`` - should return ``(par_value, value)`` 

261 ``par_value`` is as returned from ``previsit()`` of the parent. 

262 

263 * ``postvisit(node, par_value, value)`` - should return ``value`` 

264 ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as 

265 returned from ``previsit()`` of this node itself. The return ``value`` is ignored except 

266 the one for the root node, which is returned from the overall ``visit_tree()`` call. 

267 

268 For the initial node, ``par_value`` is None. ``postvisit`` may be None. 

269 """ 

270 if not postvisit: 

271 postvisit = lambda node, pvalue, value: None 

272 

273 iter_children = iter_children_func(node) 

274 done = set() 

275 ret = None 

276 stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]] 

277 while stack: 

278 current, par_value, value = stack.pop() 

279 if value is _PREVISIT: 

280 assert current not in done # protect againt infinite loop in case of a bad tree. 

281 done.add(current) 

282 

283 pvalue, post_value = previsit(current, par_value) 

284 stack.append((current, par_value, post_value)) 

285 

286 # Insert all children in reverse order (so that first child ends up on top of the stack). 

287 ins = len(stack) 

288 for n in iter_children(current): 

289 stack.insert(ins, (n, pvalue, _PREVISIT)) 

290 else: 

291 ret = postvisit(current, par_value, cast(Optional[Token], value)) 

292 return ret 

293 

294 

295def walk(node, include_joined_str=False): 

296 # type: (AST, bool) -> Iterator[Union[Module, AstNode]] 

297 """ 

298 Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node`` 

299 itself), using depth-first pre-order traversal (yieling parents before their children). 

300 

301 This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and 

302 ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``. 

303 

304 By default, ``JoinedStr`` (f-string) nodes and their contents are skipped 

305 because they previously couldn't be handled. Set ``include_joined_str`` to True to include them. 

306 """ 

307 iter_children = iter_children_func(node) 

308 done = set() 

309 stack = [node] 

310 while stack: 

311 current = stack.pop() 

312 assert current not in done # protect againt infinite loop in case of a bad tree. 

313 done.add(current) 

314 

315 yield current 

316 

317 # Insert all children in reverse order (so that first child ends up on top of the stack). 

318 # This is faster than building a list and reversing it. 

319 ins = len(stack) 

320 for c in iter_children(current, include_joined_str): 

321 stack.insert(ins, c) 

322 

323 

324def replace(text, replacements): 

325 # type: (str, List[Tuple[int, int, str]]) -> str 

326 """ 

327 Replaces multiple slices of text with new values. This is a convenience method for making code 

328 modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is 

329 an iterable of ``(start, end, new_text)`` tuples. 

330 

331 For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces 

332 ``"X is THE test"``. 

333 """ 

334 p = 0 

335 parts = [] 

336 for (start, end, new_text) in sorted(replacements): 

337 parts.append(text[p:start]) 

338 parts.append(new_text) 

339 p = end 

340 parts.append(text[p:]) 

341 return ''.join(parts) 

342 

343 

344class NodeMethods: 

345 """ 

346 Helper to get `visit_{node_type}` methods given a node's class and cache the results. 

347 """ 

348 def __init__(self): 

349 # type: () -> None 

350 self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]] 

351 

352 def get(self, obj, cls): 

353 # type: (Any, Union[ABCMeta, type]) -> Callable 

354 """ 

355 Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`, 

356 or `obj.visit_default` if the type-specific method is not found. 

357 """ 

358 method = self._cache.get(cls) 

359 if not method: 

360 name = "visit_" + cls.__name__.lower() 

361 method = getattr(obj, name, obj.visit_default) 

362 self._cache[cls] = method 

363 return method 

364 

365 

366def patched_generate_tokens(original_tokens): 

367 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

368 """ 

369 Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers. 

370 Workaround for https://github.com/python/cpython/issues/68382. 

371 Should only be used when tokenizing a string that is known to be valid syntax, 

372 because it assumes that error tokens are not actually errors. 

373 Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token. 

374 """ 

375 group = [] # type: List[tokenize.TokenInfo] 

376 for tok in original_tokens: 

377 if ( 

378 tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER) 

379 # Only combine tokens if they have no whitespace in between 

380 and (not group or group[-1].end == tok.start) 

381 ): 

382 group.append(tok) 

383 else: 

384 for combined_token in combine_tokens(group): 

385 yield combined_token 

386 group = [] 

387 yield tok 

388 for combined_token in combine_tokens(group): 

389 yield combined_token 

390 

391def combine_tokens(group): 

392 # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo] 

393 if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1: 

394 return group 

395 return [ 

396 tokenize.TokenInfo( 

397 type=tokenize.NAME, 

398 string="".join(t.string for t in group), 

399 start=group[0].start, 

400 end=group[-1].end, 

401 line=group[0].line, 

402 ) 

403 ] 

404 

405 

406def last_stmt(node): 

407 # type: (ast.AST) -> ast.AST 

408 """ 

409 If the given AST node contains multiple statements, return the last one. 

410 Otherwise, just return the node. 

411 """ 

412 child_stmts = [ 

413 child for child in iter_children_func(node)(node) 

414 if is_stmt(child) or type(child).__name__ in ( 

415 "excepthandler", 

416 "ExceptHandler", 

417 "match_case", 

418 "MatchCase", 

419 "TryExcept", 

420 "TryFinally", 

421 ) 

422 ] 

423 if child_stmts: 

424 return last_stmt(child_stmts[-1]) 

425 return node 

426 

427 

428 

429@lru_cache(maxsize=None) 

430def fstring_positions_work(): 

431 # type: () -> bool 

432 """ 

433 The positions attached to nodes inside f-string FormattedValues have some bugs 

434 that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. 

435 This checks for those bugs more concretely without relying on the Python version. 

436 Specifically this checks: 

437 - Values with a format spec or conversion 

438 - Repeated (i.e. identical-looking) expressions 

439 - f-strings implicitly concatenated over multiple lines. 

440 - Multiline, triple-quoted f-strings. 

441 """ 

442 source = """( 

443 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

444 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

445 f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" 

446 f''' 

447 {s} {t} 

448 {u} {v} 

449 ''' 

450 )""" 

451 tree = ast.parse(source) 

452 name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] 

453 name_positions = [(node.lineno, node.col_offset) for node in name_nodes] 

454 positions_are_unique = len(set(name_positions)) == len(name_positions) 

455 correct_source_segments = all( 

456 ast.get_source_segment(source, node) == node.id 

457 for node in name_nodes 

458 ) 

459 return positions_are_unique and correct_source_segments 

460 

461def annotate_fstring_nodes(tree): 

462 # type: (ast.AST) -> None 

463 """ 

464 Add a special attribute `_broken_positions` to nodes inside f-strings 

465 if the lineno/col_offset cannot be trusted. 

466 """ 

467 if sys.version_info >= (3, 12): 

468 # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ 

469 # In Python 3.12, inner nodes have sensible positions. 

470 return 

471 for joinedstr in walk(tree, include_joined_str=True): 

472 if not isinstance(joinedstr, ast.JoinedStr): 

473 continue 

474 for part in joinedstr.values: 

475 # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. 

476 setattr(part, '_broken_positions', True) # use setattr for mypy 

477 

478 if isinstance(part, ast.FormattedValue): 

479 if not fstring_positions_work(): 

480 for child in walk(part.value): 

481 setattr(child, '_broken_positions', True) 

482 

483 if part.format_spec: # this is another JoinedStr 

484 # Again, the standard positions span the full f-string. 

485 setattr(part.format_spec, '_broken_positions', True)