Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/asttokens/util.py: 62%

169 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 07:18 +0000

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import ast 

16import collections 

17import io 

18import sys 

19import token 

20import tokenize 

21from abc import ABCMeta 

22from ast import Module, expr, AST 

23from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING 

24 

25from six import iteritems 

26 

27 

28if TYPE_CHECKING: # pragma: no cover 

29 from .astroid_compat import NodeNG 

30 

31 # Type class used to expand out the definition of AST to include fields added by this library 

32 # It's not actually used for anything other than type checking though! 

33 class EnhancedAST(AST): 

34 # Additional attributes set by mark_tokens 

35 first_token = None # type: Token 

36 last_token = None # type: Token 

37 lineno = 0 # type: int 

38 

39 AstNode = Union[EnhancedAST, NodeNG] 

40 

41 if sys.version_info[0] == 2: 

42 TokenInfo = Tuple[int, str, Tuple[int, int], Tuple[int, int], str] 

43 else: 

44 TokenInfo = tokenize.TokenInfo 

45 

46 

47def token_repr(tok_type, string): 

48 # type: (int, Optional[str]) -> str 

49 """Returns a human-friendly representation of a token with the given type and string.""" 

50 # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency. 

51 return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u')) 

52 

53 

54class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')): 

55 """ 

56 TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize 

57 module, and 3 additional ones useful for this module: 

58 

59 - [0] .type Token type (see token.py) 

60 - [1] .string Token (a string) 

61 - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints) 

62 - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints) 

63 - [4] .line Original line (string) 

64 - [5] .index Index of the token in the list of tokens that it belongs to. 

65 - [6] .startpos Starting character offset into the input text. 

66 - [7] .endpos Ending character offset into the input text. 

67 """ 

68 def __str__(self): 

69 # type: () -> str 

70 return token_repr(self.type, self.string) 

71 

72 

73if sys.version_info >= (3, 6): 

74 AstConstant = ast.Constant 

75else: 

76 class AstConstant: 

77 value = object() 

78 

79 

80def match_token(token, tok_type, tok_str=None): 

81 # type: (Token, int, Optional[str]) -> bool 

82 """Returns true if token is of the given type and, if a string is given, has that string.""" 

83 return token.type == tok_type and (tok_str is None or token.string == tok_str) 

84 

85 

86def expect_token(token, tok_type, tok_str=None): 

87 # type: (Token, int, Optional[str]) -> None 

88 """ 

89 Verifies that the given token is of the expected type. If tok_str is given, the token string 

90 is verified too. If the token doesn't match, raises an informative ValueError. 

91 """ 

92 if not match_token(token, tok_type, tok_str): 

93 raise ValueError("Expected token %s, got %s on line %s col %s" % ( 

94 token_repr(tok_type, tok_str), str(token), 

95 token.start[0], token.start[1] + 1)) 

96 

97# These were previously defined in tokenize.py and distinguishable by being greater than 

98# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly. 

99if sys.version_info >= (3, 7): 

100 def is_non_coding_token(token_type): 

101 # type: (int) -> bool 

102 """ 

103 These are considered non-coding tokens, as they don't affect the syntax tree. 

104 """ 

105 return token_type in (token.NL, token.COMMENT, token.ENCODING) 

106else: 

107 def is_non_coding_token(token_type): 

108 # type: (int) -> bool 

109 """ 

110 These are considered non-coding tokens, as they don't affect the syntax tree. 

111 """ 

112 return token_type >= token.N_TOKENS 

113 

114 

115def generate_tokens(text): 

116 # type: (str) -> Iterator[TokenInfo] 

117 """ 

118 Generates standard library tokens for the given code. 

119 """ 

120 # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for 

121 # Python2. See http://stackoverflow.com/a/4952291/328565. 

122 # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed 

123 return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline)) 

124 

125 

126def iter_children_func(node): 

127 # type: (AST) -> Callable 

128 """ 

129 Returns a function which yields all direct children of a AST node, 

130 skipping children that are singleton nodes. 

131 The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module. 

132 """ 

133 return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast 

134 

135 

136def iter_children_astroid(node, include_joined_str=False): 

137 # type: (NodeNG, bool) -> Union[Iterator, List] 

138 if not include_joined_str and is_joined_str(node): 

139 return [] 

140 

141 return node.get_children() 

142 

143 

144SINGLETONS = {c for n, c in iteritems(ast.__dict__) if isinstance(c, type) and 

145 issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))} 

146 

147 

148def iter_children_ast(node, include_joined_str=False): 

149 # type: (AST, bool) -> Iterator[Union[AST, expr]] 

150 if not include_joined_str and is_joined_str(node): 

151 return 

152 

153 if isinstance(node, ast.Dict): 

154 # override the iteration order: instead of <all keys>, <all values>, 

155 # yield keys and values in source order (key1, value1, key2, value2, ...) 

156 for (key, value) in zip(node.keys, node.values): 

157 if key is not None: 

158 yield key 

159 yield value 

160 return 

161 

162 for child in ast.iter_child_nodes(node): 

163 # Skip singleton children; they don't reflect particular positions in the code and break the 

164 # assumptions about the tree consisting of distinct nodes. Note that collecting classes 

165 # beforehand and checking them in a set is faster than using isinstance each time. 

166 if child.__class__ not in SINGLETONS: 

167 yield child 

168 

169 

170stmt_class_names = {n for n, c in iteritems(ast.__dict__) 

171 if isinstance(c, type) and issubclass(c, ast.stmt)} 

172expr_class_names = ({n for n, c in iteritems(ast.__dict__) 

173 if isinstance(c, type) and issubclass(c, ast.expr)} | 

174 {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'}) 

175 

176# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes 

177# in the same way, and without even importing astroid. 

178def is_expr(node): 

179 # type: (AstNode) -> bool 

180 """Returns whether node is an expression node.""" 

181 return node.__class__.__name__ in expr_class_names 

182 

183def is_stmt(node): 

184 # type: (AstNode) -> bool 

185 """Returns whether node is a statement node.""" 

186 return node.__class__.__name__ in stmt_class_names 

187 

188def is_module(node): 

189 # type: (AstNode) -> bool 

190 """Returns whether node is a module node.""" 

191 return node.__class__.__name__ == 'Module' 

192 

193def is_joined_str(node): 

194 # type: (AstNode) -> bool 

195 """Returns whether node is a JoinedStr node, used to represent f-strings.""" 

196 # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only 

197 # leads to errors. 

198 return node.__class__.__name__ == 'JoinedStr' 

199 

200 

201def is_starred(node): 

202 # type: (AstNode) -> bool 

203 """Returns whether node is a starred expression node.""" 

204 return node.__class__.__name__ == 'Starred' 

205 

206 

207def is_slice(node): 

208 # type: (AstNode) -> bool 

209 """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`""" 

210 # Before 3.9, a tuple containing a slice is an ExtSlice, 

211 # but this was removed in https://bugs.python.org/issue34822 

212 return ( 

213 node.__class__.__name__ in ('Slice', 'ExtSlice') 

214 or ( 

215 node.__class__.__name__ == 'Tuple' 

216 and any(map(is_slice, cast(ast.Tuple, node).elts)) 

217 ) 

218 ) 

219 

220 

221def is_empty_astroid_slice(node): 

222 # type: (AstNode) -> bool 

223 return ( 

224 node.__class__.__name__ == "Slice" 

225 and not isinstance(node, ast.AST) 

226 and node.lower is node.upper is node.step is None 

227 ) 

228 

229 

230# Sentinel value used by visit_tree(). 

231_PREVISIT = object() 

232 

233def visit_tree(node, previsit, postvisit): 

234 # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None 

235 """ 

236 Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion 

237 via the function call stack to avoid hitting 'maximum recursion depth exceeded' error. 

238 

239 It calls ``previsit()`` and ``postvisit()`` as follows: 

240 

241 * ``previsit(node, par_value)`` - should return ``(par_value, value)`` 

242 ``par_value`` is as returned from ``previsit()`` of the parent. 

243 

244 * ``postvisit(node, par_value, value)`` - should return ``value`` 

245 ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as 

246 returned from ``previsit()`` of this node itself. The return ``value`` is ignored except 

247 the one for the root node, which is returned from the overall ``visit_tree()`` call. 

248 

249 For the initial node, ``par_value`` is None. ``postvisit`` may be None. 

250 """ 

251 if not postvisit: 

252 postvisit = lambda node, pvalue, value: None 

253 

254 iter_children = iter_children_func(node) 

255 done = set() 

256 ret = None 

257 stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]] 

258 while stack: 

259 current, par_value, value = stack.pop() 

260 if value is _PREVISIT: 

261 assert current not in done # protect againt infinite loop in case of a bad tree. 

262 done.add(current) 

263 

264 pvalue, post_value = previsit(current, par_value) 

265 stack.append((current, par_value, post_value)) 

266 

267 # Insert all children in reverse order (so that first child ends up on top of the stack). 

268 ins = len(stack) 

269 for n in iter_children(current): 

270 stack.insert(ins, (n, pvalue, _PREVISIT)) 

271 else: 

272 ret = postvisit(current, par_value, cast(Optional[Token], value)) 

273 return ret 

274 

275 

276def walk(node, include_joined_str=False): 

277 # type: (AST, bool) -> Iterator[Union[Module, AstNode]] 

278 """ 

279 Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node`` 

280 itself), using depth-first pre-order traversal (yieling parents before their children). 

281 

282 This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and 

283 ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``. 

284 

285 By default, ``JoinedStr`` (f-string) nodes and their contents are skipped 

286 because they previously couldn't be handled. Set ``include_joined_str`` to True to include them. 

287 """ 

288 iter_children = iter_children_func(node) 

289 done = set() 

290 stack = [node] 

291 while stack: 

292 current = stack.pop() 

293 assert current not in done # protect againt infinite loop in case of a bad tree. 

294 done.add(current) 

295 

296 yield current 

297 

298 # Insert all children in reverse order (so that first child ends up on top of the stack). 

299 # This is faster than building a list and reversing it. 

300 ins = len(stack) 

301 for c in iter_children(current, include_joined_str): 

302 stack.insert(ins, c) 

303 

304 

305def replace(text, replacements): 

306 # type: (str, List[Tuple[int, int, str]]) -> str 

307 """ 

308 Replaces multiple slices of text with new values. This is a convenience method for making code 

309 modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is 

310 an iterable of ``(start, end, new_text)`` tuples. 

311 

312 For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces 

313 ``"X is THE test"``. 

314 """ 

315 p = 0 

316 parts = [] 

317 for (start, end, new_text) in sorted(replacements): 

318 parts.append(text[p:start]) 

319 parts.append(new_text) 

320 p = end 

321 parts.append(text[p:]) 

322 return ''.join(parts) 

323 

324 

325class NodeMethods(object): 

326 """ 

327 Helper to get `visit_{node_type}` methods given a node's class and cache the results. 

328 """ 

329 def __init__(self): 

330 # type: () -> None 

331 self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]] 

332 

333 def get(self, obj, cls): 

334 # type: (Any, Union[ABCMeta, type]) -> Callable 

335 """ 

336 Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`, 

337 or `obj.visit_default` if the type-specific method is not found. 

338 """ 

339 method = self._cache.get(cls) 

340 if not method: 

341 name = "visit_" + cls.__name__.lower() 

342 method = getattr(obj, name, obj.visit_default) 

343 self._cache[cls] = method 

344 return method 

345 

346 

347if sys.version_info[0] == 2: 

348 # Python 2 doesn't support non-ASCII identifiers, and making the real patched_generate_tokens support Python 2 

349 # means working with raw tuples instead of tokenize.TokenInfo namedtuples. 

350 def patched_generate_tokens(original_tokens): 

351 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

352 return iter(original_tokens) 

353else: 

354 def patched_generate_tokens(original_tokens): 

355 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

356 """ 

357 Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers. 

358 Workaround for https://github.com/python/cpython/issues/68382. 

359 Should only be used when tokenizing a string that is known to be valid syntax, 

360 because it assumes that error tokens are not actually errors. 

361 Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token. 

362 """ 

363 group = [] # type: List[tokenize.TokenInfo] 

364 for tok in original_tokens: 

365 if ( 

366 tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER) 

367 # Only combine tokens if they have no whitespace in between 

368 and (not group or group[-1].end == tok.start) 

369 ): 

370 group.append(tok) 

371 else: 

372 for combined_token in combine_tokens(group): 

373 yield combined_token 

374 group = [] 

375 yield tok 

376 for combined_token in combine_tokens(group): 

377 yield combined_token 

378 

379 def combine_tokens(group): 

380 # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo] 

381 if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1: 

382 return group 

383 return [ 

384 tokenize.TokenInfo( 

385 type=tokenize.NAME, 

386 string="".join(t.string for t in group), 

387 start=group[0].start, 

388 end=group[-1].end, 

389 line=group[0].line, 

390 ) 

391 ] 

392 

393 

394def last_stmt(node): 

395 # type: (ast.AST) -> ast.AST 

396 """ 

397 If the given AST node contains multiple statements, return the last one. 

398 Otherwise, just return the node. 

399 """ 

400 child_stmts = [ 

401 child for child in iter_children_func(node)(node) 

402 if is_stmt(child) or type(child).__name__ in ( 

403 "excepthandler", 

404 "ExceptHandler", 

405 "match_case", 

406 "MatchCase", 

407 "TryExcept", 

408 "TryFinally", 

409 ) 

410 ] 

411 if child_stmts: 

412 return last_stmt(child_stmts[-1]) 

413 return node 

414 

415 

416if sys.version_info[:2] >= (3, 8): 

417 from functools import lru_cache 

418 

419 @lru_cache(maxsize=None) 

420 def fstring_positions_work(): 

421 # type: () -> bool 

422 """ 

423 The positions attached to nodes inside f-string FormattedValues have some bugs 

424 that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. 

425 This checks for those bugs more concretely without relying on the Python version. 

426 Specifically this checks: 

427 - Values with a format spec or conversion 

428 - Repeated (i.e. identical-looking) expressions 

429 - f-strings implicitly concatenated over multiple lines. 

430 - Multiline, triple-quoted f-strings. 

431 """ 

432 source = """( 

433 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

434 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

435 f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" 

436 f''' 

437 {s} {t} 

438 {u} {v} 

439 ''' 

440 )""" 

441 tree = ast.parse(source) 

442 name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] 

443 name_positions = [(node.lineno, node.col_offset) for node in name_nodes] 

444 positions_are_unique = len(set(name_positions)) == len(name_positions) 

445 correct_source_segments = all( 

446 ast.get_source_segment(source, node) == node.id 

447 for node in name_nodes 

448 ) 

449 return positions_are_unique and correct_source_segments 

450 

451 def annotate_fstring_nodes(tree): 

452 # type: (ast.AST) -> None 

453 """ 

454 Add a special attribute `_broken_positions` to nodes inside f-strings 

455 if the lineno/col_offset cannot be trusted. 

456 """ 

457 if sys.version_info >= (3, 12): 

458 # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ 

459 # In Python 3.12, inner nodes have sensible positions. 

460 return 

461 for joinedstr in walk(tree, include_joined_str=True): 

462 if not isinstance(joinedstr, ast.JoinedStr): 

463 continue 

464 for part in joinedstr.values: 

465 # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. 

466 setattr(part, '_broken_positions', True) # use setattr for mypy 

467 

468 if isinstance(part, ast.FormattedValue): 

469 if not fstring_positions_work(): 

470 for child in walk(part.value): 

471 setattr(child, '_broken_positions', True) 

472 

473 if part.format_spec: # this is another JoinedStr 

474 # Again, the standard positions span the full f-string. 

475 setattr(part.format_spec, '_broken_positions', True) 

476 

477else: 

478 def fstring_positions_work(): 

479 # type: () -> bool 

480 return False 

481 

482 def annotate_fstring_nodes(_tree): 

483 # type: (ast.AST) -> None 

484 pass