Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/asttokens/mark_tokens.py: 73%

255 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 07:18 +0000

1# Copyright 2016 Grist Labs, Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import ast 

16import numbers 

17import sys 

18import token 

19from ast import Module 

20from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING 

21 

22import six 

23 

24from . import util 

25from .asttokens import ASTTokens 

26from .util import AstConstant 

27from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer 

28 

29if TYPE_CHECKING: 

30 from .util import AstNode 

31 

32 

33# Mapping of matching braces. To find a token here, look up token[:2]. 

34_matching_pairs_left = { 

35 (token.OP, '('): (token.OP, ')'), 

36 (token.OP, '['): (token.OP, ']'), 

37 (token.OP, '{'): (token.OP, '}'), 

38} 

39 

40_matching_pairs_right = { 

41 (token.OP, ')'): (token.OP, '('), 

42 (token.OP, ']'): (token.OP, '['), 

43 (token.OP, '}'): (token.OP, '{'), 

44} 

45 

46 

47class MarkTokens(object): 

48 """ 

49 Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes 

50 to each of them. This is the heart of the token-marking logic. 

51 """ 

52 def __init__(self, code): 

53 # type: (ASTTokens) -> None 

54 self._code = code 

55 self._methods = util.NodeMethods() 

56 self._iter_children = None # type: Optional[Callable] 

57 

58 def visit_tree(self, node): 

59 # type: (Module) -> None 

60 self._iter_children = util.iter_children_func(node) 

61 util.visit_tree(node, self._visit_before_children, self._visit_after_children) 

62 

63 def _visit_before_children(self, node, parent_token): 

64 # type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]] 

65 col = getattr(node, 'col_offset', None) 

66 token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None 

67 

68 if not token and util.is_module(node): 

69 # We'll assume that a Module node starts at the start of the source code. 

70 token = self._code.get_token(1, 0) 

71 

72 # Use our own token, or our parent's if we don't have one, to pass to child calls as 

73 # parent_token argument. The second value becomes the token argument of _visit_after_children. 

74 return (token or parent_token, token) 

75 

76 def _visit_after_children(self, node, parent_token, token): 

77 # type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None 

78 # This processes the node generically first, after all children have been processed. 

79 

80 # Get the first and last tokens that belong to children. Note how this doesn't assume that we 

81 # iterate through children in order that corresponds to occurrence in source code. This 

82 # assumption can fail (e.g. with return annotations). 

83 first = token 

84 last = None 

85 for child in cast(Callable, self._iter_children)(node): 

86 # astroid slices have especially wrong positions, we don't want them to corrupt their parents. 

87 if util.is_empty_astroid_slice(child): 

88 continue 

89 if not first or child.first_token.index < first.index: 

90 first = child.first_token 

91 if not last or child.last_token.index > last.index: 

92 last = child.last_token 

93 

94 # If we don't have a first token from _visit_before_children, and there were no children, then 

95 # use the parent's token as the first token. 

96 first = first or parent_token 

97 

98 # If no children, set last token to the first one. 

99 last = last or first 

100 

101 # Statements continue to before NEWLINE. This helps cover a few different cases at once. 

102 if util.is_stmt(node): 

103 last = self._find_last_in_stmt(cast(util.Token, last)) 

104 

105 # Capture any unmatched brackets. 

106 first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node) 

107 

108 # Give a chance to node-specific methods to adjust. 

109 nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last) 

110 

111 if (nfirst, nlast) != (first, last): 

112 # If anything changed, expand again to capture any unmatched brackets. 

113 nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node) 

114 

115 node.first_token = nfirst 

116 node.last_token = nlast 

117 

118 def _find_last_in_stmt(self, start_token): 

119 # type: (util.Token) -> util.Token 

120 t = start_token 

121 while (not util.match_token(t, token.NEWLINE) and 

122 not util.match_token(t, token.OP, ';') and 

123 not token.ISEOF(t.type)): 

124 t = self._code.next_token(t, include_extra=True) 

125 return self._code.prev_token(t) 

126 

127 def _expand_to_matching_pairs(self, first_token, last_token, node): 

128 # type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token] 

129 """ 

130 Scan tokens in [first_token, last_token] range that are between node's children, and for any 

131 unmatched brackets, adjust first/last tokens to include the closing pair. 

132 """ 

133 # We look for opening parens/braces among non-child tokens (i.e. tokens between our actual 

134 # child nodes). If we find any closing ones, we match them to the opens. 

135 to_match_right = [] # type: List[Tuple[int, str]] 

136 to_match_left = [] 

137 for tok in self._code.token_range(first_token, last_token): 

138 tok_info = tok[:2] 

139 if to_match_right and tok_info == to_match_right[-1]: 

140 to_match_right.pop() 

141 elif tok_info in _matching_pairs_left: 

142 to_match_right.append(_matching_pairs_left[tok_info]) 

143 elif tok_info in _matching_pairs_right: 

144 to_match_left.append(_matching_pairs_right[tok_info]) 

145 

146 # Once done, extend `last_token` to match any unclosed parens/braces. 

147 for match in reversed(to_match_right): 

148 last = self._code.next_token(last_token) 

149 # Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter 

150 while any(util.match_token(last, token.OP, x) for x in (',', ':')): 

151 last = self._code.next_token(last) 

152 # Now check for the actual closing delimiter. 

153 if util.match_token(last, *match): 

154 last_token = last 

155 

156 # And extend `first_token` to match any unclosed opening parens/braces. 

157 for match in to_match_left: 

158 first = self._code.prev_token(first_token) 

159 if util.match_token(first, *match): 

160 first_token = first 

161 

162 return (first_token, last_token) 

163 

164 #---------------------------------------------------------------------- 

165 # Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair 

166 # that will actually be assigned. 

167 

168 def visit_default(self, node, first_token, last_token): 

169 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

170 # pylint: disable=no-self-use 

171 # By default, we don't need to adjust the token we computed earlier. 

172 return (first_token, last_token) 

173 

174 def handle_comp(self, open_brace, node, first_token, last_token): 

175 # type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

176 # For list/set/dict comprehensions, we only get the token of the first child, so adjust it to 

177 # include the opening brace (the closing brace will be matched automatically). 

178 before = self._code.prev_token(first_token) 

179 util.expect_token(before, token.OP, open_brace) 

180 return (before, last_token) 

181 

182 # Python 3.8 fixed the starting position of list comprehensions: 

183 # https://bugs.python.org/issue31241 

184 if sys.version_info < (3, 8): 

185 def visit_listcomp(self, node, first_token, last_token): 

186 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

187 return self.handle_comp('[', node, first_token, last_token) 

188 

189 if six.PY2: 

190 # We shouldn't do this on PY3 because its SetComp/DictComp already have a correct start. 

191 def visit_setcomp(self, node, first_token, last_token): 

192 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

193 return self.handle_comp('{', node, first_token, last_token) 

194 

195 def visit_dictcomp(self, node, first_token, last_token): 

196 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

197 return self.handle_comp('{', node, first_token, last_token) 

198 

199 def visit_comprehension(self, 

200 node, # type: AstNode 

201 first_token, # type: util.Token 

202 last_token, # type: util.Token 

203 ): 

204 # type: (...) -> Tuple[util.Token, util.Token] 

205 # The 'comprehension' node starts with 'for' but we only get first child; we search backwards 

206 # to find the 'for' keyword. 

207 first = self._code.find_token(first_token, token.NAME, 'for', reverse=True) 

208 return (first, last_token) 

209 

210 def visit_if(self, node, first_token, last_token): 

211 # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

212 while first_token.string not in ('if', 'elif'): 

213 first_token = self._code.prev_token(first_token) 

214 return first_token, last_token 

215 

216 def handle_attr(self, node, first_token, last_token): 

217 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

218 # Attribute node has ".attr" (2 tokens) after the last child. 

219 dot = self._code.find_token(last_token, token.OP, '.') 

220 name = self._code.next_token(dot) 

221 util.expect_token(name, token.NAME) 

222 return (first_token, name) 

223 

224 visit_attribute = handle_attr 

225 visit_assignattr = handle_attr 

226 visit_delattr = handle_attr 

227 

228 def handle_def(self, node, first_token, last_token): 

229 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

230 # With astroid, nodes that start with a doc-string can have an empty body, in which case we 

231 # need to adjust the last token to include the doc string. 

232 if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr] 

233 last_token = self._code.find_token(last_token, token.STRING) 

234 

235 # Include @ from decorator 

236 if first_token.index > 0: 

237 prev = self._code.prev_token(first_token) 

238 if util.match_token(prev, token.OP, '@'): 

239 first_token = prev 

240 return (first_token, last_token) 

241 

242 visit_classdef = handle_def 

243 visit_functiondef = handle_def 

244 

245 def handle_following_brackets(self, node, last_token, opening_bracket): 

246 # type: (AstNode, util.Token, str) -> util.Token 

247 # This is for calls and subscripts, which have a pair of brackets 

248 # at the end which may contain no nodes, e.g. foo() or bar[:]. 

249 # We look for the opening bracket and then let the matching pair be found automatically 

250 # Remember that last_token is at the end of all children, 

251 # so we are not worried about encountering a bracket that belongs to a child. 

252 first_child = next(cast(Callable, self._iter_children)(node)) 

253 call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket) 

254 if call_start.index > last_token.index: 

255 last_token = call_start 

256 return last_token 

257 

258 def visit_call(self, node, first_token, last_token): 

259 # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

260 last_token = self.handle_following_brackets(node, last_token, '(') 

261 

262 # Handling a python bug with decorators with empty parens, e.g. 

263 # @deco() 

264 # def ... 

265 if util.match_token(first_token, token.OP, '@'): 

266 first_token = self._code.next_token(first_token) 

267 return (first_token, last_token) 

268 

269 def visit_matchclass(self, node, first_token, last_token): 

270 # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

271 last_token = self.handle_following_brackets(node, last_token, '(') 

272 return (first_token, last_token) 

273 

274 def visit_subscript(self, 

275 node, # type: AstNode 

276 first_token, # type: util.Token 

277 last_token, # type: util.Token 

278 ): 

279 # type: (...) -> Tuple[util.Token, util.Token] 

280 last_token = self.handle_following_brackets(node, last_token, '[') 

281 return (first_token, last_token) 

282 

283 def visit_slice(self, node, first_token, last_token): 

284 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

285 # consume `:` tokens to the left and right. In Python 3.9, Slice nodes are 

286 # given a col_offset, (and end_col_offset), so this will always start inside 

287 # the slice, even if it is the empty slice. However, in 3.8 and below, this 

288 # will only expand to the full slice if the slice contains a node with a 

289 # col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and 

290 # x[:1] will even on earlier versions of Python. 

291 while True: 

292 prev = self._code.prev_token(first_token) 

293 if prev.string != ':': 

294 break 

295 first_token = prev 

296 while True: 

297 next_ = self._code.next_token(last_token) 

298 if next_.string != ':': 

299 break 

300 last_token = next_ 

301 return (first_token, last_token) 

302 

303 def handle_bare_tuple(self, node, first_token, last_token): 

304 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

305 # A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple. 

306 maybe_comma = self._code.next_token(last_token) 

307 if util.match_token(maybe_comma, token.OP, ','): 

308 last_token = maybe_comma 

309 return (first_token, last_token) 

310 

311 if sys.version_info >= (3, 8): 

312 # In Python3.8 parsed tuples include parentheses when present. 

313 def handle_tuple_nonempty(self, node, first_token, last_token): 

314 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

315 assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) 

316 # It's a bare tuple if the first token belongs to the first child. The first child may 

317 # include extraneous parentheses (which don't create new nodes), so account for those too. 

318 child = node.elts[0] 

319 if TYPE_CHECKING: 

320 child = cast(AstNode, child) 

321 child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True) 

322 if first_token == child_first: 

323 return self.handle_bare_tuple(node, first_token, last_token) 

324 return (first_token, last_token) 

325 else: 

326 # Before python 3.8, parsed tuples do not include parens. 

327 def handle_tuple_nonempty(self, node, first_token, last_token): 

328 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

329 (first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token) 

330 return self._gobble_parens(first_token, last_token, False) 

331 

332 def visit_tuple(self, node, first_token, last_token): 

333 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

334 assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) 

335 if not node.elts: 

336 # An empty tuple is just "()", and we need no further info. 

337 return (first_token, last_token) 

338 return self.handle_tuple_nonempty(node, first_token, last_token) 

339 

340 def _gobble_parens(self, first_token, last_token, include_all=False): 

341 # type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token] 

342 # Expands a range of tokens to include one or all pairs of surrounding parentheses, and 

343 # returns (first, last) tokens that include these parens. 

344 while first_token.index > 0: 

345 prev = self._code.prev_token(first_token) 

346 next = self._code.next_token(last_token) 

347 if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'): 

348 first_token, last_token = prev, next 

349 if include_all: 

350 continue 

351 break 

352 return (first_token, last_token) 

353 

354 def visit_str(self, node, first_token, last_token): 

355 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

356 return self.handle_str(first_token, last_token) 

357 

358 def visit_joinedstr(self, 

359 node, # type: AstNode 

360 first_token, # type: util.Token 

361 last_token, # type: util.Token 

362 ): 

363 # type: (...) -> Tuple[util.Token, util.Token] 

364 if sys.version_info < (3, 12): 

365 # Older versions don't tokenize the contents of f-strings 

366 return self.handle_str(first_token, last_token) 

367 

368 last = first_token 

369 while True: 

370 if util.match_token(last, getattr(token, "FSTRING_START")): 

371 # Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`) 

372 # of the f-string. We can't just look for the next FSTRING_END 

373 # because f-strings can be nested, e.g. f"{f'{x}'}", so we need 

374 # to treat this like matching balanced parentheses. 

375 count = 1 

376 while count > 0: 

377 last = self._code.next_token(last) 

378 # mypy complains about token.FSTRING_START and token.FSTRING_END. 

379 if util.match_token(last, getattr(token, "FSTRING_START")): 

380 count += 1 

381 elif util.match_token(last, getattr(token, "FSTRING_END")): 

382 count -= 1 

383 last_token = last 

384 last = self._code.next_token(last_token) 

385 elif util.match_token(last, token.STRING): 

386 # Similar to handle_str, we also need to handle adjacent strings. 

387 last_token = last 

388 last = self._code.next_token(last_token) 

389 else: 

390 break 

391 return (first_token, last_token) 

392 

393 def visit_bytes(self, node, first_token, last_token): 

394 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

395 return self.handle_str(first_token, last_token) 

396 

397 def handle_str(self, first_token, last_token): 

398 # type: (util.Token, util.Token) -> Tuple[util.Token, util.Token] 

399 # Multiple adjacent STRING tokens form a single string. 

400 last = self._code.next_token(last_token) 

401 while util.match_token(last, token.STRING): 

402 last_token = last 

403 last = self._code.next_token(last_token) 

404 return (first_token, last_token) 

405 

406 def handle_num(self, 

407 node, # type: AstNode 

408 value, # type: Union[complex, int, numbers.Number] 

409 first_token, # type: util.Token 

410 last_token, # type: util.Token 

411 ): 

412 # type: (...) -> Tuple[util.Token, util.Token] 

413 # A constant like '-1' gets turned into two tokens; this will skip the '-'. 

414 while util.match_token(last_token, token.OP): 

415 last_token = self._code.next_token(last_token) 

416 

417 if isinstance(value, complex): 

418 # A complex number like -2j cannot be compared directly to 0 

419 # A complex number like 1-2j is expressed as a binary operation 

420 # so we don't need to worry about it 

421 value = value.imag 

422 

423 # This makes sure that the - is included 

424 if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator] 

425 first_token = self._code.prev_token(first_token) 

426 return (first_token, last_token) 

427 

428 def visit_num(self, node, first_token, last_token): 

429 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

430 return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token) 

431 

432 # In Astroid, the Num and Str nodes are replaced by Const. 

433 def visit_const(self, node, first_token, last_token): 

434 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

435 assert isinstance(node, AstConstant) or isinstance(node, nc.Const) 

436 if isinstance(node.value, numbers.Number): 

437 return self.handle_num(node, node.value, first_token, last_token) 

438 elif isinstance(node.value, (six.text_type, six.binary_type)): 

439 return self.visit_str(node, first_token, last_token) 

440 return (first_token, last_token) 

441 

442 # In Python >= 3.6, there is a similar class 'Constant' for literals 

443 # In 3.8 it became the type produced by ast.parse 

444 # https://bugs.python.org/issue32892 

445 visit_constant = visit_const 

446 

447 def visit_keyword(self, node, first_token, last_token): 

448 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

449 # Until python 3.9 (https://bugs.python.org/issue40141), 

450 # ast.keyword nodes didn't have line info. Astroid has lineno None. 

451 assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword) 

452 if node.arg is not None and getattr(node, 'lineno', None) is None: 

453 equals = self._code.find_token(first_token, token.OP, '=', reverse=True) 

454 name = self._code.prev_token(equals) 

455 util.expect_token(name, token.NAME, node.arg) 

456 first_token = name 

457 return (first_token, last_token) 

458 

459 def visit_starred(self, node, first_token, last_token): 

460 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

461 # Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted. 

462 if not util.match_token(first_token, token.OP, '*'): 

463 star = self._code.prev_token(first_token) 

464 if util.match_token(star, token.OP, '*'): 

465 first_token = star 

466 return (first_token, last_token) 

467 

468 def visit_assignname(self, node, first_token, last_token): 

469 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

470 # Astroid may turn 'except' clause into AssignName, but we need to adjust it. 

471 if util.match_token(first_token, token.NAME, 'except'): 

472 colon = self._code.find_token(last_token, token.OP, ':') 

473 first_token = last_token = self._code.prev_token(colon) 

474 return (first_token, last_token) 

475 

476 if six.PY2: 

477 # No need for this on Python3, which already handles 'with' nodes correctly. 

478 def visit_with(self, node, first_token, last_token): 

479 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

480 first = self._code.find_token(first_token, token.NAME, 'with', reverse=True) 

481 return (first, last_token) 

482 

483 # Async nodes should typically start with the word 'async' 

484 # but Python < 3.7 doesn't put the col_offset there 

485 # AsyncFunctionDef is slightly different because it might have 

486 # decorators before that, which visit_functiondef handles 

487 def handle_async(self, node, first_token, last_token): 

488 # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] 

489 if not first_token.string == 'async': 

490 first_token = self._code.prev_token(first_token) 

491 return (first_token, last_token) 

492 

493 visit_asyncfor = handle_async 

494 visit_asyncwith = handle_async 

495 

496 def visit_asyncfunctiondef(self, 

497 node, # type: AstNode 

498 first_token, # type: util.Token 

499 last_token, # type: util.Token 

500 ): 

501 # type: (...) -> Tuple[util.Token, util.Token] 

502 if util.match_token(first_token, token.NAME, 'def'): 

503 # Include the 'async' token 

504 first_token = self._code.prev_token(first_token) 

505 return self.visit_functiondef(node, first_token, last_token)