Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/asttokens/util.py: 29%

168 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:07 +0000

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import ast 

16import collections 

17import io 

18import sys 

19import token 

20import tokenize 

21from abc import ABCMeta 

22from ast import Module, expr, AST 

23from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING 

24 

25from six import iteritems 

26 

27 

28if TYPE_CHECKING: # pragma: no cover 

29 from .astroid_compat import NodeNG 

30 

31 # Type class used to expand out the definition of AST to include fields added by this library 

32 # It's not actually used for anything other than type checking though! 

33 class EnhancedAST(AST): 

34 # Additional attributes set by mark_tokens 

35 first_token = None # type: Token 

36 last_token = None # type: Token 

37 lineno = 0 # type: int 

38 

39 AstNode = Union[EnhancedAST, NodeNG] 

40 

41 if sys.version_info[0] == 2: 

42 TokenInfo = Tuple[int, str, Tuple[int, int], Tuple[int, int], str] 

43 else: 

44 TokenInfo = tokenize.TokenInfo 

45 

46 

47def token_repr(tok_type, string): 

48 # type: (int, Optional[str]) -> str 

49 """Returns a human-friendly representation of a token with the given type and string.""" 

50 # repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency. 

51 return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u')) 

52 

53 

54class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')): 

55 """ 

56 TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize 

57 module, and 3 additional ones useful for this module: 

58 

59 - [0] .type Token type (see token.py) 

60 - [1] .string Token (a string) 

61 - [2] .start Starting (row, column) indices of the token (a 2-tuple of ints) 

62 - [3] .end Ending (row, column) indices of the token (a 2-tuple of ints) 

63 - [4] .line Original line (string) 

64 - [5] .index Index of the token in the list of tokens that it belongs to. 

65 - [6] .startpos Starting character offset into the input text. 

66 - [7] .endpos Ending character offset into the input text. 

67 """ 

68 def __str__(self): 

69 # type: () -> str 

70 return token_repr(self.type, self.string) 

71 

72 

73if sys.version_info >= (3, 6): 

74 AstConstant = ast.Constant 

75else: 

76 class AstConstant: 

77 value = object() 

78 

79 

80def match_token(token, tok_type, tok_str=None): 

81 # type: (Token, int, Optional[str]) -> bool 

82 """Returns true if token is of the given type and, if a string is given, has that string.""" 

83 return token.type == tok_type and (tok_str is None or token.string == tok_str) 

84 

85 

86def expect_token(token, tok_type, tok_str=None): 

87 # type: (Token, int, Optional[str]) -> None 

88 """ 

89 Verifies that the given token is of the expected type. If tok_str is given, the token string 

90 is verified too. If the token doesn't match, raises an informative ValueError. 

91 """ 

92 if not match_token(token, tok_type, tok_str): 

93 raise ValueError("Expected token %s, got %s on line %s col %s" % ( 

94 token_repr(tok_type, tok_str), str(token), 

95 token.start[0], token.start[1] + 1)) 

96 

97# These were previously defined in tokenize.py and distinguishable by being greater than 

98# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly. 

99if sys.version_info >= (3, 7): 

100 def is_non_coding_token(token_type): 

101 # type: (int) -> bool 

102 """ 

103 These are considered non-coding tokens, as they don't affect the syntax tree. 

104 """ 

105 return token_type in (token.NL, token.COMMENT, token.ENCODING) 

106else: 

107 def is_non_coding_token(token_type): 

108 # type: (int) -> bool 

109 """ 

110 These are considered non-coding tokens, as they don't affect the syntax tree. 

111 """ 

112 return token_type >= token.N_TOKENS 

113 

114 

115def generate_tokens(text): 

116 # type: (str) -> Iterator[TokenInfo] 

117 """ 

118 Generates standard library tokens for the given code. 

119 """ 

120 # tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for 

121 # Python2. See http://stackoverflow.com/a/4952291/328565. 

122 # FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed 

123 return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline)) 

124 

125 

126def iter_children_func(node): 

127 # type: (AST) -> Callable 

128 """ 

129 Returns a function which yields all direct children of a AST node, 

130 skipping children that are singleton nodes. 

131 The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module. 

132 """ 

133 return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast 

134 

135 

136def iter_children_astroid(node): 

137 # type: (NodeNG) -> Union[Iterator, List] 

138 # Don't attempt to process children of JoinedStr nodes, which we can't fully handle yet. 

139 if is_joined_str(node): 

140 return [] 

141 

142 return node.get_children() 

143 

144 

145SINGLETONS = {c for n, c in iteritems(ast.__dict__) if isinstance(c, type) and 

146 issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))} 

147 

148def iter_children_ast(node): 

149 # type: (AST) -> Iterator[Union[AST, expr]] 

150 # Don't attempt to process children of JoinedStr nodes, which we can't fully handle yet. 

151 if is_joined_str(node): 

152 return 

153 

154 if isinstance(node, ast.Dict): 

155 # override the iteration order: instead of <all keys>, <all values>, 

156 # yield keys and values in source order (key1, value1, key2, value2, ...) 

157 for (key, value) in zip(node.keys, node.values): 

158 if key is not None: 

159 yield key 

160 yield value 

161 return 

162 

163 for child in ast.iter_child_nodes(node): 

164 # Skip singleton children; they don't reflect particular positions in the code and break the 

165 # assumptions about the tree consisting of distinct nodes. Note that collecting classes 

166 # beforehand and checking them in a set is faster than using isinstance each time. 

167 if child.__class__ not in SINGLETONS: 

168 yield child 

169 

170 

171stmt_class_names = {n for n, c in iteritems(ast.__dict__) 

172 if isinstance(c, type) and issubclass(c, ast.stmt)} 

173expr_class_names = ({n for n, c in iteritems(ast.__dict__) 

174 if isinstance(c, type) and issubclass(c, ast.expr)} | 

175 {'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'}) 

176 

177# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes 

178# in the same way, and without even importing astroid. 

179def is_expr(node): 

180 # type: (AstNode) -> bool 

181 """Returns whether node is an expression node.""" 

182 return node.__class__.__name__ in expr_class_names 

183 

184def is_stmt(node): 

185 # type: (AstNode) -> bool 

186 """Returns whether node is a statement node.""" 

187 return node.__class__.__name__ in stmt_class_names 

188 

189def is_module(node): 

190 # type: (AstNode) -> bool 

191 """Returns whether node is a module node.""" 

192 return node.__class__.__name__ == 'Module' 

193 

194def is_joined_str(node): 

195 # type: (AstNode) -> bool 

196 """Returns whether node is a JoinedStr node, used to represent f-strings.""" 

197 # At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only 

198 # leads to errors. 

199 return node.__class__.__name__ == 'JoinedStr' 

200 

201 

202def is_starred(node): 

203 # type: (AstNode) -> bool 

204 """Returns whether node is a starred expression node.""" 

205 return node.__class__.__name__ == 'Starred' 

206 

207 

208def is_slice(node): 

209 # type: (AstNode) -> bool 

210 """Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`""" 

211 # Before 3.9, a tuple containing a slice is an ExtSlice, 

212 # but this was removed in https://bugs.python.org/issue34822 

213 return ( 

214 node.__class__.__name__ in ('Slice', 'ExtSlice') 

215 or ( 

216 node.__class__.__name__ == 'Tuple' 

217 and any(map(is_slice, cast(ast.Tuple, node).elts)) 

218 ) 

219 ) 

220 

221 

222def is_empty_astroid_slice(node): 

223 # type: (AstNode) -> bool 

224 return ( 

225 node.__class__.__name__ == "Slice" 

226 and not isinstance(node, ast.AST) 

227 and node.lower is node.upper is node.step is None 

228 ) 

229 

230 

231# Sentinel value used by visit_tree(). 

232_PREVISIT = object() 

233 

234def visit_tree(node, previsit, postvisit): 

235 # type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None 

236 """ 

237 Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion 

238 via the function call stack to avoid hitting 'maximum recursion depth exceeded' error. 

239 

240 It calls ``previsit()`` and ``postvisit()`` as follows: 

241 

242 * ``previsit(node, par_value)`` - should return ``(par_value, value)`` 

243 ``par_value`` is as returned from ``previsit()`` of the parent. 

244 

245 * ``postvisit(node, par_value, value)`` - should return ``value`` 

246 ``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as 

247 returned from ``previsit()`` of this node itself. The return ``value`` is ignored except 

248 the one for the root node, which is returned from the overall ``visit_tree()`` call. 

249 

250 For the initial node, ``par_value`` is None. ``postvisit`` may be None. 

251 """ 

252 if not postvisit: 

253 postvisit = lambda node, pvalue, value: None 

254 

255 iter_children = iter_children_func(node) 

256 done = set() 

257 ret = None 

258 stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]] 

259 while stack: 

260 current, par_value, value = stack.pop() 

261 if value is _PREVISIT: 

262 assert current not in done # protect againt infinite loop in case of a bad tree. 

263 done.add(current) 

264 

265 pvalue, post_value = previsit(current, par_value) 

266 stack.append((current, par_value, post_value)) 

267 

268 # Insert all children in reverse order (so that first child ends up on top of the stack). 

269 ins = len(stack) 

270 for n in iter_children(current): 

271 stack.insert(ins, (n, pvalue, _PREVISIT)) 

272 else: 

273 ret = postvisit(current, par_value, cast(Optional[Token], value)) 

274 return ret 

275 

276 

277 

278def walk(node): 

279 # type: (AST) -> Iterator[Union[Module, AstNode]] 

280 """ 

281 Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node`` 

282 itself), using depth-first pre-order traversal (yieling parents before their children). 

283 

284 This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and 

285 ``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``. 

286 """ 

287 iter_children = iter_children_func(node) 

288 done = set() 

289 stack = [node] 

290 while stack: 

291 current = stack.pop() 

292 assert current not in done # protect againt infinite loop in case of a bad tree. 

293 done.add(current) 

294 

295 yield current 

296 

297 # Insert all children in reverse order (so that first child ends up on top of the stack). 

298 # This is faster than building a list and reversing it. 

299 ins = len(stack) 

300 for c in iter_children(current): 

301 stack.insert(ins, c) 

302 

303 

304def replace(text, replacements): 

305 # type: (str, List[Tuple[int, int, str]]) -> str 

306 """ 

307 Replaces multiple slices of text with new values. This is a convenience method for making code 

308 modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is 

309 an iterable of ``(start, end, new_text)`` tuples. 

310 

311 For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces 

312 ``"X is THE test"``. 

313 """ 

314 p = 0 

315 parts = [] 

316 for (start, end, new_text) in sorted(replacements): 

317 parts.append(text[p:start]) 

318 parts.append(new_text) 

319 p = end 

320 parts.append(text[p:]) 

321 return ''.join(parts) 

322 

323 

324class NodeMethods(object): 

325 """ 

326 Helper to get `visit_{node_type}` methods given a node's class and cache the results. 

327 """ 

328 def __init__(self): 

329 # type: () -> None 

330 self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]] 

331 

332 def get(self, obj, cls): 

333 # type: (Any, Union[ABCMeta, type]) -> Callable 

334 """ 

335 Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`, 

336 or `obj.visit_default` if the type-specific method is not found. 

337 """ 

338 method = self._cache.get(cls) 

339 if not method: 

340 name = "visit_" + cls.__name__.lower() 

341 method = getattr(obj, name, obj.visit_default) 

342 self._cache[cls] = method 

343 return method 

344 

345 

346if sys.version_info[0] == 2: 

347 # Python 2 doesn't support non-ASCII identifiers, and making the real patched_generate_tokens support Python 2 

348 # means working with raw tuples instead of tokenize.TokenInfo namedtuples. 

349 def patched_generate_tokens(original_tokens): 

350 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

351 return iter(original_tokens) 

352else: 

353 def patched_generate_tokens(original_tokens): 

354 # type: (Iterable[TokenInfo]) -> Iterator[TokenInfo] 

355 """ 

356 Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers. 

357 Workaround for https://github.com/python/cpython/issues/68382. 

358 Should only be used when tokenizing a string that is known to be valid syntax, 

359 because it assumes that error tokens are not actually errors. 

360 Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token. 

361 """ 

362 group = [] # type: List[tokenize.TokenInfo] 

363 for tok in original_tokens: 

364 if ( 

365 tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER) 

366 # Only combine tokens if they have no whitespace in between 

367 and (not group or group[-1].end == tok.start) 

368 ): 

369 group.append(tok) 

370 else: 

371 for combined_token in combine_tokens(group): 

372 yield combined_token 

373 group = [] 

374 yield tok 

375 for combined_token in combine_tokens(group): 

376 yield combined_token 

377 

378 def combine_tokens(group): 

379 # type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo] 

380 if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1: 

381 return group 

382 return [ 

383 tokenize.TokenInfo( 

384 type=tokenize.NAME, 

385 string="".join(t.string for t in group), 

386 start=group[0].start, 

387 end=group[-1].end, 

388 line=group[0].line, 

389 ) 

390 ] 

391 

392 

393def last_stmt(node): 

394 # type: (ast.AST) -> ast.AST 

395 """ 

396 If the given AST node contains multiple statements, return the last one. 

397 Otherwise, just return the node. 

398 """ 

399 child_stmts = [ 

400 child for child in ast.iter_child_nodes(node) 

401 if isinstance(child, (ast.stmt, ast.excepthandler, getattr(ast, "match_case", ()))) 

402 ] 

403 if child_stmts: 

404 return last_stmt(child_stmts[-1]) 

405 return node 

406 

407 

408if sys.version_info[:2] >= (3, 8): 

409 from functools import lru_cache 

410 

411 @lru_cache(maxsize=None) 

412 def fstring_positions_work(): 

413 # type: () -> bool 

414 """ 

415 The positions attached to nodes inside f-string FormattedValues have some bugs 

416 that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. 

417 This checks for those bugs more concretely without relying on the Python version. 

418 Specifically this checks: 

419 - Values with a format spec or conversion 

420 - Repeated (i.e. identical-looking) expressions 

421 - Multiline f-strings implicitly concatenated. 

422 """ 

423 source = """( 

424 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

425 f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" 

426 f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" 

427 )""" 

428 tree = ast.parse(source) 

429 name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] 

430 name_positions = [(node.lineno, node.col_offset) for node in name_nodes] 

431 positions_are_unique = len(set(name_positions)) == len(name_positions) 

432 correct_source_segments = all( 

433 ast.get_source_segment(source, node) == node.id 

434 for node in name_nodes 

435 ) 

436 return positions_are_unique and correct_source_segments 

437 

438 def annotate_fstring_nodes(tree): 

439 # type: (ast.AST) -> None 

440 """ 

441 Add a special attribute `_broken_positions` to nodes inside f-strings 

442 if the lineno/col_offset cannot be trusted. 

443 """ 

444 for joinedstr in walk(tree): 

445 if not isinstance(joinedstr, ast.JoinedStr): 

446 continue 

447 for part in joinedstr.values: 

448 # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. 

449 setattr(part, '_broken_positions', True) # use setattr for mypy 

450 

451 if isinstance(part, ast.FormattedValue): 

452 if not fstring_positions_work(): 

453 for child in walk(part.value): 

454 setattr(child, '_broken_positions', True) 

455 

456 if part.format_spec: # this is another JoinedStr 

457 # Again, the standard positions span the full f-string. 

458 setattr(part.format_spec, '_broken_positions', True) 

459 # Recursively handle this inner JoinedStr in the same way. 

460 # While this is usually automatic for other nodes, 

461 # the children of f-strings are explicitly excluded in iter_children_ast. 

462 annotate_fstring_nodes(part.format_spec) 

463else: 

464 def fstring_positions_work(): 

465 # type: () -> bool 

466 return False 

467 

468 def annotate_fstring_nodes(_tree): 

469 # type: (ast.AST) -> None 

470 pass