Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/python/tokenize.py: 96%

427 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:36 +0000

1# -*- coding: utf-8 -*- 

2""" 

3This tokenizer has been copied from the ``tokenize.py`` standard library 

4tokenizer. The reason was simple: The standard library tokenizer fails 

5if the indentation is not right. To make it possible to do error recovery the 

6 tokenizer needed to be rewritten. 

7 

8Basically this is a stripped down version of the standard library module, so 

9you can read the documentation there. Additionally we included some speed and 

10memory optimizations here. 

11""" 

12from __future__ import absolute_import 

13 

14import sys 

15import re 

16import itertools as _itertools 

17from codecs import BOM_UTF8 

18from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \ 

19 Pattern, Set 

20 

21from parso.python.token import PythonTokenTypes 

22from parso.utils import split_lines, PythonVersionInfo, parse_version_string 

23 

24 

25# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) 

26MAX_UNICODE = '\U0010ffff' 

27 

28STRING = PythonTokenTypes.STRING 

29NAME = PythonTokenTypes.NAME 

30NUMBER = PythonTokenTypes.NUMBER 

31OP = PythonTokenTypes.OP 

32NEWLINE = PythonTokenTypes.NEWLINE 

33INDENT = PythonTokenTypes.INDENT 

34DEDENT = PythonTokenTypes.DEDENT 

35ENDMARKER = PythonTokenTypes.ENDMARKER 

36ERRORTOKEN = PythonTokenTypes.ERRORTOKEN 

37ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT 

38FSTRING_START = PythonTokenTypes.FSTRING_START 

39FSTRING_STRING = PythonTokenTypes.FSTRING_STRING 

40FSTRING_END = PythonTokenTypes.FSTRING_END 

41 

42 

43class TokenCollection(NamedTuple): 

44 pseudo_token: Pattern 

45 single_quoted: Set[str] 

46 triple_quoted: Set[str] 

47 endpats: Dict[str, Pattern] 

48 whitespace: Pattern 

49 fstring_pattern_map: Dict[str, str] 

50 always_break_tokens: Tuple[str] 

51 

52 

53BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') 

54 

55_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {} 

56 

57 

58def group(*choices, capture=False, **kwargs): 

59 assert not kwargs 

60 

61 start = '(' 

62 if not capture: 

63 start += '?:' 

64 return start + '|'.join(choices) + ')' 

65 

66 

67def maybe(*choices): 

68 return group(*choices) + '?' 

69 

70 

71# Return the empty string, plus all of the valid string prefixes. 

72def _all_string_prefixes(*, include_fstring=False, only_fstring=False): 

73 def different_case_versions(prefix): 

74 for s in _itertools.product(*[(c, c.upper()) for c in prefix]): 

75 yield ''.join(s) 

76 # The valid string prefixes. Only contain the lower case versions, 

77 # and don't contain any permuations (include 'fr', but not 

78 # 'rf'). The various permutations will be generated. 

79 valid_string_prefixes = ['b', 'r', 'u', 'br'] 

80 

81 result = {''} 

82 if include_fstring: 

83 f = ['f', 'fr'] 

84 if only_fstring: 

85 valid_string_prefixes = f 

86 result = set() 

87 else: 

88 valid_string_prefixes += f 

89 elif only_fstring: 

90 return set() 

91 

92 # if we add binary f-strings, add: ['fb', 'fbr'] 

93 for prefix in valid_string_prefixes: 

94 for t in _itertools.permutations(prefix): 

95 # create a list with upper and lower versions of each 

96 # character 

97 result.update(different_case_versions(t)) 

98 return result 

99 

100 

101def _compile(expr): 

102 return re.compile(expr, re.UNICODE) 

103 

104 

105def _get_token_collection(version_info): 

106 try: 

107 return _token_collection_cache[tuple(version_info)] 

108 except KeyError: 

109 _token_collection_cache[tuple(version_info)] = result = \ 

110 _create_token_collection(version_info) 

111 return result 

112 

113 

114unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*' 

115fstring_string_single_line = _compile( 

116 r'(?:\{\{|\}\}|\\N\{' + unicode_character_name 

117 + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+' 

118) 

119fstring_string_multi_line = _compile( 

120 r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+' 

121) 

122fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') 

123fstring_format_spec_multi_line = _compile(r'[^{}]+') 

124 

125 

126def _create_token_collection(version_info): 

127 # Note: we use unicode matching for names ("\w") but ascii matching for 

128 # number literals. 

129 Whitespace = r'[ \f\t]*' 

130 whitespace = _compile(Whitespace) 

131 Comment = r'#[^\r\n]*' 

132 Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' 

133 

134 Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' 

135 Binnumber = r'0[bB](?:_?[01])+' 

136 Octnumber = r'0[oO](?:_?[0-7])+' 

137 Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' 

138 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) 

139 Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' 

140 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', 

141 r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) 

142 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent 

143 Floatnumber = group(Pointfloat, Expfloat) 

144 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') 

145 Number = group(Imagnumber, Floatnumber, Intnumber) 

146 

147 # Note that since _all_string_prefixes includes the empty string, 

148 # StringPrefix can be the empty string (making it optional). 

149 possible_prefixes = _all_string_prefixes() 

150 StringPrefix = group(*possible_prefixes) 

151 StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True)) 

152 fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True) 

153 FStringStart = group(*fstring_prefixes) 

154 

155 # Tail end of ' string. 

156 Single = r"(?:\\.|[^'\\])*'" 

157 # Tail end of " string. 

158 Double = r'(?:\\.|[^"\\])*"' 

159 # Tail end of ''' string. 

160 Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''" 

161 # Tail end of """ string. 

162 Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""' 

163 Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""') 

164 

165 # Because of leftmost-then-longest match semantics, be sure to put the 

166 # longest operators first (e.g., if = came before ==, == would get 

167 # recognized as two instances of =). 

168 Operator = group(r"\*\*=?", r">>=?", r"<<=?", 

169 r"//=?", r"->", 

170 r"[+\-*/%&@`|^!=<>]=?", 

171 r"~") 

172 

173 Bracket = '[][(){}]' 

174 

175 special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]'] 

176 if version_info >= (3, 8): 

177 special_args.insert(0, ":=?") 

178 else: 

179 special_args.insert(0, ":") 

180 Special = group(*special_args) 

181 

182 Funny = group(Operator, Bracket, Special) 

183 

184 # First (or only) line of ' or " string. 

185 ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" 

186 + group("'", r'\\(?:\r\n?|\n)'), 

187 StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' 

188 + group('"', r'\\(?:\r\n?|\n)')) 

189 pseudo_extra_pool = [Comment, Triple] 

190 all_quotes = '"', "'", '"""', "'''" 

191 if fstring_prefixes: 

192 pseudo_extra_pool.append(FStringStart + group(*all_quotes)) 

193 

194 PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool) 

195 PseudoToken = group(Whitespace, capture=True) + \ 

196 group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) 

197 

198 # For a given string prefix plus quotes, endpats maps it to a regex 

199 # to match the remainder of that string. _prefix can be empty, for 

200 # a normal single or triple quoted string (with no prefix). 

201 endpats = {} 

202 for _prefix in possible_prefixes: 

203 endpats[_prefix + "'"] = _compile(Single) 

204 endpats[_prefix + '"'] = _compile(Double) 

205 endpats[_prefix + "'''"] = _compile(Single3) 

206 endpats[_prefix + '"""'] = _compile(Double3) 

207 

208 # A set of all of the single and triple quoted string prefixes, 

209 # including the opening quotes. 

210 single_quoted = set() 

211 triple_quoted = set() 

212 fstring_pattern_map = {} 

213 for t in possible_prefixes: 

214 for quote in '"', "'": 

215 single_quoted.add(t + quote) 

216 

217 for quote in '"""', "'''": 

218 triple_quoted.add(t + quote) 

219 

220 for t in fstring_prefixes: 

221 for quote in all_quotes: 

222 fstring_pattern_map[t + quote] = quote 

223 

224 ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', 

225 'finally', 'while', 'with', 'return', 'continue', 

226 'break', 'del', 'pass', 'global', 'assert', 'nonlocal') 

227 pseudo_token_compiled = _compile(PseudoToken) 

228 return TokenCollection( 

229 pseudo_token_compiled, single_quoted, triple_quoted, endpats, 

230 whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS) 

231 ) 

232 

233 

234class Token(NamedTuple): 

235 type: PythonTokenTypes 

236 string: str 

237 start_pos: Tuple[int, int] 

238 prefix: str 

239 

240 @property 

241 def end_pos(self) -> Tuple[int, int]: 

242 lines = split_lines(self.string) 

243 if len(lines) > 1: 

244 return self.start_pos[0] + len(lines) - 1, 0 

245 else: 

246 return self.start_pos[0], self.start_pos[1] + len(self.string) 

247 

248 

249class PythonToken(Token): 

250 def __repr__(self): 

251 return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % 

252 self._replace(type=self.type.name)) 

253 

254 

255class FStringNode: 

256 def __init__(self, quote): 

257 self.quote = quote 

258 self.parentheses_count = 0 

259 self.previous_lines = '' 

260 self.last_string_start_pos = None 

261 # In the syntax there can be multiple format_spec's nested: 

262 # {x:{y:3}} 

263 self.format_spec_count = 0 

264 

265 def open_parentheses(self, character): 

266 self.parentheses_count += 1 

267 

268 def close_parentheses(self, character): 

269 self.parentheses_count -= 1 

270 if self.parentheses_count == 0: 

271 # No parentheses means that the format spec is also finished. 

272 self.format_spec_count = 0 

273 

274 def allow_multiline(self): 

275 return len(self.quote) == 3 

276 

277 def is_in_expr(self): 

278 return self.parentheses_count > self.format_spec_count 

279 

280 def is_in_format_spec(self): 

281 return not self.is_in_expr() and self.format_spec_count 

282 

283 

284def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix): 

285 for fstring_stack_index, node in enumerate(fstring_stack): 

286 lstripped_string = string.lstrip() 

287 len_lstrip = len(string) - len(lstripped_string) 

288 if lstripped_string.startswith(node.quote): 

289 token = PythonToken( 

290 FSTRING_END, 

291 node.quote, 

292 (line_nr, column + len_lstrip), 

293 prefix=additional_prefix+string[:len_lstrip], 

294 ) 

295 additional_prefix = '' 

296 assert not node.previous_lines 

297 del fstring_stack[fstring_stack_index:] 

298 return token, '', len(node.quote) + len_lstrip 

299 return None, additional_prefix, 0 

300 

301 

302def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): 

303 tos = fstring_stack[-1] 

304 allow_multiline = tos.allow_multiline() 

305 if tos.is_in_format_spec(): 

306 if allow_multiline: 

307 regex = fstring_format_spec_multi_line 

308 else: 

309 regex = fstring_format_spec_single_line 

310 else: 

311 if allow_multiline: 

312 regex = fstring_string_multi_line 

313 else: 

314 regex = fstring_string_single_line 

315 

316 match = regex.match(line, pos) 

317 if match is None: 

318 return tos.previous_lines, pos 

319 

320 if not tos.previous_lines: 

321 tos.last_string_start_pos = (lnum, pos) 

322 

323 string = match.group(0) 

324 for fstring_stack_node in fstring_stack: 

325 end_match = endpats[fstring_stack_node.quote].match(string) 

326 if end_match is not None: 

327 string = end_match.group(0)[:-len(fstring_stack_node.quote)] 

328 

329 new_pos = pos 

330 new_pos += len(string) 

331 # even if allow_multiline is False, we still need to check for trailing 

332 # newlines, because a single-line f-string can contain line continuations 

333 if string.endswith('\n') or string.endswith('\r'): 

334 tos.previous_lines += string 

335 string = '' 

336 else: 

337 string = tos.previous_lines + string 

338 

339 return string, new_pos 

340 

341 

342def tokenize( 

343 code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0) 

344) -> Iterator[PythonToken]: 

345 """Generate tokens from a the source code (string).""" 

346 lines = split_lines(code, keepends=True) 

347 return tokenize_lines(lines, version_info=version_info, start_pos=start_pos) 

348 

349 

350def _print_tokens(func): 

351 """ 

352 A small helper function to help debug the tokenize_lines function. 

353 """ 

354 def wrapper(*args, **kwargs): 

355 for token in func(*args, **kwargs): 

356 print(token) # This print is intentional for debugging! 

357 yield token 

358 

359 return wrapper 

360 

361 

362# @_print_tokens 

363def tokenize_lines( 

364 lines: Iterable[str], 

365 *, 

366 version_info: PythonVersionInfo, 

367 indents: List[int] = None, 

368 start_pos: Tuple[int, int] = (1, 0), 

369 is_first_token=True, 

370) -> Iterator[PythonToken]: 

371 """ 

372 A heavily modified Python standard library tokenizer. 

373 

374 Additionally to the default information, yields also the prefix of each 

375 token. This idea comes from lib2to3. The prefix contains all information 

376 that is irrelevant for the parser like newlines in parentheses or comments. 

377 """ 

378 def dedent_if_necessary(start): 

379 while start < indents[-1]: 

380 if start > indents[-2]: 

381 yield PythonToken(ERROR_DEDENT, '', (lnum, start), '') 

382 indents[-1] = start 

383 break 

384 indents.pop() 

385 yield PythonToken(DEDENT, '', spos, '') 

386 

387 pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ 

388 fstring_pattern_map, always_break_tokens, = \ 

389 _get_token_collection(version_info) 

390 paren_level = 0 # count parentheses 

391 if indents is None: 

392 indents = [0] 

393 max_ = 0 

394 numchars = '0123456789' 

395 contstr = '' 

396 contline: str 

397 contstr_start: Tuple[int, int] 

398 endprog: Pattern 

399 # We start with a newline. This makes indent at the first position 

400 # possible. It's not valid Python, but still better than an INDENT in the 

401 # second line (and not in the first). This makes quite a few things in 

402 # Jedi's fast parser possible. 

403 new_line = True 

404 prefix = '' # Should never be required, but here for safety 

405 additional_prefix = '' 

406 lnum = start_pos[0] - 1 

407 fstring_stack: List[FStringNode] = [] 

408 for line in lines: # loop over lines in stream 

409 lnum += 1 

410 pos = 0 

411 max_ = len(line) 

412 if is_first_token: 

413 if line.startswith(BOM_UTF8_STRING): 

414 additional_prefix = BOM_UTF8_STRING 

415 line = line[1:] 

416 max_ = len(line) 

417 

418 # Fake that the part before was already parsed. 

419 line = '^' * start_pos[1] + line 

420 pos = start_pos[1] 

421 max_ += start_pos[1] 

422 

423 is_first_token = False 

424 

425 if contstr: # continued string 

426 endmatch = endprog.match(line) # noqa: F821 

427 if endmatch: 

428 pos = endmatch.end(0) 

429 yield PythonToken( 

430 STRING, contstr + line[:pos], 

431 contstr_start, prefix) # noqa: F821 

432 contstr = '' 

433 contline = '' 

434 else: 

435 contstr = contstr + line 

436 contline = contline + line 

437 continue 

438 

439 while pos < max_: 

440 if fstring_stack: 

441 tos = fstring_stack[-1] 

442 if not tos.is_in_expr(): 

443 string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) 

444 if string: 

445 yield PythonToken( 

446 FSTRING_STRING, string, 

447 tos.last_string_start_pos, 

448 # Never has a prefix because it can start anywhere and 

449 # include whitespace. 

450 prefix='' 

451 ) 

452 tos.previous_lines = '' 

453 continue 

454 if pos == max_: 

455 break 

456 

457 rest = line[pos:] 

458 fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary( 

459 fstring_stack, 

460 rest, 

461 lnum, 

462 pos, 

463 additional_prefix, 

464 ) 

465 pos += quote_length 

466 if fstring_end_token is not None: 

467 yield fstring_end_token 

468 continue 

469 

470 # in an f-string, match until the end of the string 

471 if fstring_stack: 

472 string_line = line 

473 for fstring_stack_node in fstring_stack: 

474 quote = fstring_stack_node.quote 

475 end_match = endpats[quote].match(line, pos) 

476 if end_match is not None: 

477 end_match_string = end_match.group(0) 

478 if len(end_match_string) - len(quote) + pos < len(string_line): 

479 string_line = line[:pos] + end_match_string[:-len(quote)] 

480 pseudomatch = pseudo_token.match(string_line, pos) 

481 else: 

482 pseudomatch = pseudo_token.match(line, pos) 

483 

484 if pseudomatch: 

485 prefix = additional_prefix + pseudomatch.group(1) 

486 additional_prefix = '' 

487 start, pos = pseudomatch.span(2) 

488 spos = (lnum, start) 

489 token = pseudomatch.group(2) 

490 if token == '': 

491 assert prefix 

492 additional_prefix = prefix 

493 # This means that we have a line with whitespace/comments at 

494 # the end, which just results in an endmarker. 

495 break 

496 initial = token[0] 

497 else: 

498 match = whitespace.match(line, pos) 

499 initial = line[match.end()] 

500 start = match.end() 

501 spos = (lnum, start) 

502 

503 if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None): 

504 new_line = False 

505 if paren_level == 0 and not fstring_stack: 

506 indent_start = start 

507 if indent_start > indents[-1]: 

508 yield PythonToken(INDENT, '', spos, '') 

509 indents.append(indent_start) 

510 yield from dedent_if_necessary(indent_start) 

511 

512 if not pseudomatch: # scan for tokens 

513 match = whitespace.match(line, pos) 

514 if new_line and paren_level == 0 and not fstring_stack: 

515 yield from dedent_if_necessary(match.end()) 

516 pos = match.end() 

517 new_line = False 

518 yield PythonToken( 

519 ERRORTOKEN, line[pos], (lnum, pos), 

520 additional_prefix + match.group(0) 

521 ) 

522 additional_prefix = '' 

523 pos += 1 

524 continue 

525 

526 if (initial in numchars # ordinary number 

527 or (initial == '.' and token != '.' and token != '...')): 

528 yield PythonToken(NUMBER, token, spos, prefix) 

529 elif pseudomatch.group(3) is not None: # ordinary name 

530 if token in always_break_tokens and (fstring_stack or paren_level): 

531 fstring_stack[:] = [] 

532 paren_level = 0 

533 # We only want to dedent if the token is on a new line. 

534 m = re.match(r'[ \f\t]*$', line[:start]) 

535 if m is not None: 

536 yield from dedent_if_necessary(m.end()) 

537 if token.isidentifier(): 

538 yield PythonToken(NAME, token, spos, prefix) 

539 else: 

540 yield from _split_illegal_unicode_name(token, spos, prefix) 

541 elif initial in '\r\n': 

542 if any(not f.allow_multiline() for f in fstring_stack): 

543 fstring_stack.clear() 

544 

545 if not new_line and paren_level == 0 and not fstring_stack: 

546 yield PythonToken(NEWLINE, token, spos, prefix) 

547 else: 

548 additional_prefix = prefix + token 

549 new_line = True 

550 elif initial == '#': # Comments 

551 assert not token.endswith("\n") and not token.endswith("\r") 

552 if fstring_stack and fstring_stack[-1].is_in_expr(): 

553 # `#` is not allowed in f-string expressions 

554 yield PythonToken(ERRORTOKEN, initial, spos, prefix) 

555 pos = start + 1 

556 else: 

557 additional_prefix = prefix + token 

558 elif token in triple_quoted: 

559 endprog = endpats[token] 

560 endmatch = endprog.match(line, pos) 

561 if endmatch: # all on one line 

562 pos = endmatch.end(0) 

563 token = line[start:pos] 

564 yield PythonToken(STRING, token, spos, prefix) 

565 else: 

566 contstr_start = spos # multiple lines 

567 contstr = line[start:] 

568 contline = line 

569 break 

570 

571 # Check up to the first 3 chars of the token to see if 

572 # they're in the single_quoted set. If so, they start 

573 # a string. 

574 # We're using the first 3, because we're looking for 

575 # "rb'" (for example) at the start of the token. If 

576 # we switch to longer prefixes, this needs to be 

577 # adjusted. 

578 # Note that initial == token[:1]. 

579 # Also note that single quote checking must come after 

580 # triple quote checking (above). 

581 elif initial in single_quoted or \ 

582 token[:2] in single_quoted or \ 

583 token[:3] in single_quoted: 

584 if token[-1] in '\r\n': # continued string 

585 # This means that a single quoted string ends with a 

586 # backslash and is continued. 

587 contstr_start = lnum, start 

588 endprog = (endpats.get(initial) or endpats.get(token[1]) 

589 or endpats.get(token[2])) 

590 contstr = line[start:] 

591 contline = line 

592 break 

593 else: # ordinary string 

594 yield PythonToken(STRING, token, spos, prefix) 

595 elif token in fstring_pattern_map: # The start of an fstring. 

596 fstring_stack.append(FStringNode(fstring_pattern_map[token])) 

597 yield PythonToken(FSTRING_START, token, spos, prefix) 

598 elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt 

599 additional_prefix += prefix + line[start:] 

600 break 

601 else: 

602 if token in '([{': 

603 if fstring_stack: 

604 fstring_stack[-1].open_parentheses(token) 

605 else: 

606 paren_level += 1 

607 elif token in ')]}': 

608 if fstring_stack: 

609 fstring_stack[-1].close_parentheses(token) 

610 else: 

611 if paren_level: 

612 paren_level -= 1 

613 elif token.startswith(':') and fstring_stack \ 

614 and fstring_stack[-1].parentheses_count \ 

615 - fstring_stack[-1].format_spec_count == 1: 

616 # `:` and `:=` both count 

617 fstring_stack[-1].format_spec_count += 1 

618 token = ':' 

619 pos = start + 1 

620 

621 yield PythonToken(OP, token, spos, prefix) 

622 

623 if contstr: 

624 yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) 

625 if contstr.endswith('\n') or contstr.endswith('\r'): 

626 new_line = True 

627 

628 if fstring_stack: 

629 tos = fstring_stack[-1] 

630 if tos.previous_lines: 

631 yield PythonToken( 

632 FSTRING_STRING, tos.previous_lines, 

633 tos.last_string_start_pos, 

634 # Never has a prefix because it can start anywhere and 

635 # include whitespace. 

636 prefix='' 

637 ) 

638 

639 end_pos = lnum, max_ 

640 # As the last position we just take the maximally possible position. We 

641 # remove -1 for the last new line. 

642 for indent in indents[1:]: 

643 indents.pop() 

644 yield PythonToken(DEDENT, '', end_pos, '') 

645 yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) 

646 

647 

648def _split_illegal_unicode_name(token, start_pos, prefix): 

649 def create_token(): 

650 return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix) 

651 

652 found = '' 

653 is_illegal = False 

654 pos = start_pos 

655 for i, char in enumerate(token): 

656 if is_illegal: 

657 if char.isidentifier(): 

658 yield create_token() 

659 found = char 

660 is_illegal = False 

661 prefix = '' 

662 pos = start_pos[0], start_pos[1] + i 

663 else: 

664 found += char 

665 else: 

666 new_found = found + char 

667 if new_found.isidentifier(): 

668 found = new_found 

669 else: 

670 if found: 

671 yield create_token() 

672 prefix = '' 

673 pos = start_pos[0], start_pos[1] + i 

674 found = char 

675 is_illegal = True 

676 

677 if found: 

678 yield create_token() 

679 

680 

681if __name__ == "__main__": 

682 path = sys.argv[1] 

683 with open(path) as f: 

684 code = f.read() 

685 

686 for token in tokenize(code, version_info=parse_version_string('3.10')): 

687 print(token)