Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/core/inputtransformer2.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

439 statements  

1"""Input transformer machinery to support IPython special syntax. 

2 

3This includes the machinery to recognise and transform ``%magic`` commands, 

4``!system`` commands, ``help?`` querying, prompt stripping, and so forth. 

5 

6Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were 

7deprecated in 7.0 and removed in 9.0 

8""" 

9 

10# Copyright (c) IPython Development Team. 

11# Distributed under the terms of the Modified BSD License. 

12 

13import ast 

14from codeop import CommandCompiler, Compile 

15import re 

16import sys 

17import tokenize 

18from typing import List, Tuple, Optional, Any 

19import warnings 

20from textwrap import dedent 

21 

22from IPython.utils import tokenutil 

23 

24_indent_re = re.compile(r'^[ \t]+') 

25 

26def leading_empty_lines(lines): 

27 """Remove leading empty lines 

28 

29 If the leading lines are empty or contain only whitespace, they will be 

30 removed. 

31 """ 

32 if not lines: 

33 return lines 

34 for i, line in enumerate(lines): 

35 if line and not line.isspace(): 

36 return lines[i:] 

37 return lines 

38 

39def leading_indent(lines): 

40 """Remove leading indentation. 

41 

42 Removes the minimum common leading indentation from all lines. 

43 """ 

44 if not lines: 

45 return lines 

46 return dedent("".join(lines)).splitlines(keepends=True) 

47 

48class PromptStripper: 

49 """Remove matching input prompts from a block of input. 

50 

51 Parameters 

52 ---------- 

53 prompt_re : regular expression 

54 A regular expression matching any input prompt (including continuation, 

55 e.g. ``...``) 

56 initial_re : regular expression, optional 

57 A regular expression matching only the initial prompt, but not continuation. 

58 If no initial expression is given, prompt_re will be used everywhere. 

59 Used mainly for plain Python prompts (``>>>``), where the continuation prompt 

60 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped. 

61 

62 Notes 

63 ----- 

64 

65 If initial_re and prompt_re differ, 

66 only initial_re will be tested against the first line. 

67 If any prompt is found on the first two lines, 

68 prompts will be stripped from the rest of the block. 

69 """ 

70 def __init__(self, prompt_re, initial_re=None, *, doctest=False): 

71 self.prompt_re = prompt_re 

72 self.initial_re = initial_re or prompt_re 

73 self.doctest = doctest 

74 if doctest: 

75 # Doctest/xdoctest prompts may be indented (e.g. " >>>"). 

76 # We only treat "..." as a continuation prompt when the same pasted 

77 # block contains at least one ">>>" line, to avoid ambiguity with the 

78 # Python Ellipsis literal. 

79 self._doctest_initial_re = re.compile(r'^\s*>>>') 

80 self._doctest_ps1_re = re.compile(r'^\s*>>>\s?') 

81 self._doctest_ps2_re = re.compile(r'^\s*\.\.\.\s?') 

82 

83 # Very small state machine to detect triple-quoted strings in the 

84 # *same* input block (e.g. user typed """ then pasted doctest). 

85 # We preserve literal >>> / ... inside triple-quoted strings. 

86 self._triple_quote_re = re.compile(r"(?<!\\)(\"\"\"|''')") 

87 

88 

89 def _triple_quote_mask(self, lines: List[str]) -> List[bool]: 

90 """ 

91 Return a boolean mask: True if the corresponding line is considered 

92 inside a triple-quoted string literal. 

93 

94 This is intentionally heuristic (fast + good enough for paste handling). 

95 """ 

96 mask: List[bool] = [] 

97 in_triple: str | None = None # either ''' or """ 

98 for line in lines: 

99 mask.append(in_triple is not None) 

100 # Toggle state for each occurrence of """ or ''' in the line. 

101 for m in self._triple_quote_re.finditer(line): 

102 q = m.group(1) 

103 if in_triple is None: 

104 in_triple = q 

105 mask[-1] = True # current line is inside triple quotes 

106 elif in_triple == q: 

107 in_triple = None 

108 # else: ignore mismatched triple quote while inside 

109 return mask 

110 

111 

112 def _strip(self, lines): 

113 return [self.prompt_re.sub('', l, count=1) for l in lines] 

114 

115 def __call__(self, lines): 

116 if not lines: 

117 return lines 

118 

119 if self.doctest: 

120 triple_mask = self._triple_quote_mask(lines) 

121 

122 # Detect doctest prompts only outside triple-quoted strings. 

123 has_doctest_outside = any( 

124 (not in_triple) and self._doctest_initial_re.match(l) 

125 for l, in_triple in zip(lines, triple_mask) 

126 ) 

127 if not has_doctest_outside: 

128 return lines 

129 

130 out_lines: List[str] = [] 

131 stripped_mask: List[bool] = [] 

132 

133 for l, in_triple in zip(lines, triple_mask): 

134 if in_triple: 

135 out_lines.append(l) 

136 stripped_mask.append(False) 

137 continue 

138 

139 if self._doctest_ps1_re.match(l): 

140 new_l = self._doctest_ps1_re.sub('', l, count=1) 

141 elif self._doctest_ps2_re.match(l): 

142 new_l = self._doctest_ps2_re.sub('', l, count=1) 

143 else: 

144 new_l = l 

145 out_lines.append(new_l) 

146 stripped_mask.append(new_l != l) 

147 

148 # Dedent only the non-triple-quoted segments where stripping occurred. 

149 dedented: List[str] = [] 

150 i = 0 

151 while i < len(out_lines): 

152 j = i 

153 in_triple = triple_mask[i] 

154 while j < len(out_lines) and triple_mask[j] == in_triple: 

155 j += 1 

156 

157 segment = out_lines[i:j] 

158 seg_stripped = any(stripped_mask[i:j]) 

159 

160 if (not in_triple) and seg_stripped: 

161 dedented.extend(dedent(''.join(segment)).splitlines(keepends=True)) 

162 else: 

163 dedented.extend(segment) 

164 

165 i = j 

166 

167 return dedented 

168 

169 if self.initial_re.match(lines[0]) or \ 

170 (len(lines) > 1 and self.prompt_re.match(lines[1])): 

171 return self._strip(lines) 

172 return lines 

173 

174classic_prompt = PromptStripper( 

175 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'), 

176 initial_re=re.compile(r'^>>>( |$)'), 

177 doctest=True, 

178) 

179 

180ipython_prompt = PromptStripper( 

181 re.compile( 

182 r""" 

183 ^( # Match from the beginning of a line, either: 

184 

185 # 1. First-line prompt: 

186 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there 

187 In\ # The 'In' of the prompt, with a space 

188 \[\d+\]: # Command index, as displayed in the prompt 

189 \ # With a mandatory trailing space 

190 

191 | # ... or ... 

192 

193 # 2. The three dots of the multiline prompt 

194 \s* # All leading whitespace characters 

195 \.{3,}: # The three (or more) dots 

196 \ ? # With an optional trailing space 

197 

198 ) 

199 """, 

200 re.VERBOSE, 

201 ) 

202) 

203 

204 

205def cell_magic(lines): 

206 if not lines or not lines[0].startswith('%%'): 

207 return lines 

208 if re.match(r'%%\w+\?', lines[0]): 

209 # This case will be handled by help_end 

210 return lines 

211 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ') 

212 body = ''.join(lines[1:]) 

213 return ['get_ipython().run_cell_magic(%r, %r, %r)\n' 

214 % (magic_name, first_line, body)] 

215 

216 

217def _find_assign_op(token_line) -> Optional[int]: 

218 """Get the index of the first assignment in the line ('=' not inside brackets) 

219 

220 Note: We don't try to support multiple special assignment (a = b = %foo) 

221 """ 

222 paren_level = 0 

223 for i, ti in enumerate(token_line): 

224 s = ti.string 

225 if s == '=' and paren_level == 0: 

226 return i 

227 if s in {'(','[','{'}: 

228 paren_level += 1 

229 elif s in {')', ']', '}'}: 

230 if paren_level > 0: 

231 paren_level -= 1 

232 return None 

233 

234def find_end_of_continued_line(lines, start_line: int): 

235 """Find the last line of a line explicitly extended using backslashes. 

236 

237 Uses 0-indexed line numbers. 

238 """ 

239 end_line = start_line 

240 while lines[end_line].endswith('\\\n'): 

241 end_line += 1 

242 if end_line >= len(lines): 

243 break 

244 return end_line 

245 

246def assemble_continued_line(lines, start: Tuple[int, int], end_line: int): 

247 r"""Assemble a single line from multiple continued line pieces 

248 

249 Continued lines are lines ending in ``\``, and the line following the last 

250 ``\`` in the block. 

251 

252 For example, this code continues over multiple lines:: 

253 

254 if (assign_ix is not None) \ 

255 and (len(line) >= assign_ix + 2) \ 

256 and (line[assign_ix+1].string == '%') \ 

257 and (line[assign_ix+2].type == tokenize.NAME): 

258 

259 This statement contains four continued line pieces. 

260 Assembling these pieces into a single line would give:: 

261 

262 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[... 

263 

264 This uses 0-indexed line numbers. *start* is (lineno, colno). 

265 

266 Used to allow ``%magic`` and ``!system`` commands to be continued over 

267 multiple lines. 

268 """ 

269 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1] 

270 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline 

271 + [parts[-1].rstrip()]) # Strip newline from last line 

272 

273class TokenTransformBase: 

274 """Base class for transformations which examine tokens. 

275 

276 Special syntax should not be transformed when it occurs inside strings or 

277 comments. This is hard to reliably avoid with regexes. The solution is to 

278 tokenise the code as Python, and recognise the special syntax in the tokens. 

279 

280 IPython's special syntax is not valid Python syntax, so tokenising may go 

281 wrong after the special syntax starts. These classes therefore find and 

282 transform *one* instance of special syntax at a time into regular Python 

283 syntax. After each transformation, tokens are regenerated to find the next 

284 piece of special syntax. 

285 

286 Subclasses need to implement one class method (find) 

287 and one regular method (transform). 

288 

289 The priority attribute can select which transformation to apply if multiple 

290 transformers match in the same place. Lower numbers have higher priority. 

291 This allows "%magic?" to be turned into a help call rather than a magic call. 

292 """ 

293 # Lower numbers -> higher priority (for matches in the same location) 

294 priority = 10 

295 

296 def sortby(self): 

297 return self.start_line, self.start_col, self.priority 

298 

299 def __init__(self, start): 

300 self.start_line = start[0] - 1 # Shift from 1-index to 0-index 

301 self.start_col = start[1] 

302 

303 @classmethod 

304 def find(cls, tokens_by_line): 

305 """Find one instance of special syntax in the provided tokens. 

306 

307 Tokens are grouped into logical lines for convenience, 

308 so it is easy to e.g. look at the first token of each line. 

309 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects. 

310 

311 This should return an instance of its class, pointing to the start 

312 position it has found, or None if it found no match. 

313 """ 

314 raise NotImplementedError 

315 

316 def transform(self, lines: List[str]): 

317 """Transform one instance of special syntax found by ``find()`` 

318 

319 Takes a list of strings representing physical lines, 

320 returns a similar list of transformed lines. 

321 """ 

322 raise NotImplementedError 

323 

324class MagicAssign(TokenTransformBase): 

325 """Transformer for assignments from magics (a = %foo)""" 

326 @classmethod 

327 def find(cls, tokens_by_line): 

328 """Find the first magic assignment (a = %foo) in the cell. 

329 """ 

330 for line in tokens_by_line: 

331 assign_ix = _find_assign_op(line) 

332 if (assign_ix is not None) \ 

333 and (len(line) >= assign_ix + 2) \ 

334 and (line[assign_ix+1].string == '%') \ 

335 and (line[assign_ix+2].type == tokenize.NAME): 

336 return cls(line[assign_ix+1].start) 

337 

338 def transform(self, lines: List[str]): 

339 """Transform a magic assignment found by the ``find()`` classmethod. 

340 """ 

341 start_line, start_col = self.start_line, self.start_col 

342 lhs = lines[start_line][:start_col] 

343 end_line = find_end_of_continued_line(lines, start_line) 

344 rhs = assemble_continued_line(lines, (start_line, start_col), end_line) 

345 assert rhs.startswith('%'), rhs 

346 magic_name, _, args = rhs[1:].partition(' ') 

347 

348 lines_before = lines[:start_line] 

349 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args) 

350 new_line = lhs + call + '\n' 

351 lines_after = lines[end_line+1:] 

352 

353 return lines_before + [new_line] + lines_after 

354 

355 

356class SystemAssign(TokenTransformBase): 

357 """Transformer for assignments from system commands (a = !foo)""" 

358 @classmethod 

359 def find_pre_312(cls, tokens_by_line): 

360 for line in tokens_by_line: 

361 assign_ix = _find_assign_op(line) 

362 if (assign_ix is not None) \ 

363 and not line[assign_ix].line.strip().startswith('=') \ 

364 and (len(line) >= assign_ix + 2) \ 

365 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN): 

366 ix = assign_ix + 1 

367 

368 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN: 

369 if line[ix].string == '!': 

370 return cls(line[ix].start) 

371 elif not line[ix].string.isspace(): 

372 break 

373 ix += 1 

374 

375 @classmethod 

376 def find_post_312(cls, tokens_by_line): 

377 for line in tokens_by_line: 

378 assign_ix = _find_assign_op(line) 

379 if ( 

380 (assign_ix is not None) 

381 and not line[assign_ix].line.strip().startswith("=") 

382 and (len(line) >= assign_ix + 2) 

383 and (line[assign_ix + 1].type == tokenize.OP) 

384 and (line[assign_ix + 1].string == "!") 

385 ): 

386 return cls(line[assign_ix + 1].start) 

387 

388 @classmethod 

389 def find(cls, tokens_by_line): 

390 """Find the first system assignment (a = !foo) in the cell.""" 

391 if sys.version_info < (3, 12): 

392 return cls.find_pre_312(tokens_by_line) 

393 return cls.find_post_312(tokens_by_line) 

394 

395 def transform(self, lines: List[str]): 

396 """Transform a system assignment found by the ``find()`` classmethod. 

397 """ 

398 start_line, start_col = self.start_line, self.start_col 

399 

400 lhs = lines[start_line][:start_col] 

401 end_line = find_end_of_continued_line(lines, start_line) 

402 rhs = assemble_continued_line(lines, (start_line, start_col), end_line) 

403 assert rhs.startswith('!'), rhs 

404 cmd = rhs[1:] 

405 

406 lines_before = lines[:start_line] 

407 call = "get_ipython().getoutput({!r})".format(cmd) 

408 new_line = lhs + call + '\n' 

409 lines_after = lines[end_line + 1:] 

410 

411 return lines_before + [new_line] + lines_after 

412 

413# The escape sequences that define the syntax transformations IPython will 

414# apply to user input. These can NOT be just changed here: many regular 

415# expressions and other parts of the code may use their hardcoded values, and 

416# for all intents and purposes they constitute the 'IPython syntax', so they 

417# should be considered fixed. 

418 

419ESC_SHELL = '!' # Send line to underlying system shell 

420ESC_SH_CAP = '!!' # Send line to system shell and capture output 

421ESC_HELP = '?' # Find information about object 

422ESC_HELP2 = '??' # Find extra-detailed information about object 

423ESC_MAGIC = '%' # Call magic function 

424ESC_MAGIC2 = '%%' # Call cell-magic function 

425ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call 

426ESC_QUOTE2 = ';' # Quote all args as a single string, call 

427ESC_PAREN = '/' # Call first argument with rest of line as arguments 

428 

429ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'} 

430ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately 

431 

432def _make_help_call(target, esc): 

433 """Prepares a pinfo(2)/psearch call from a target name and the escape 

434 (i.e. ? or ??)""" 

435 method = 'pinfo2' if esc == '??' \ 

436 else 'psearch' if '*' in target \ 

437 else 'pinfo' 

438 arg = " ".join([method, target]) 

439 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args) 

440 t_magic_name, _, t_magic_arg_s = arg.partition(' ') 

441 t_magic_name = t_magic_name.lstrip(ESC_MAGIC) 

442 return "get_ipython().run_line_magic(%r, %r)" % (t_magic_name, t_magic_arg_s) 

443 

444 

445def _tr_help(content): 

446 """Translate lines escaped with: ? 

447 

448 A naked help line should fire the intro help screen (shell.show_usage()) 

449 """ 

450 if not content: 

451 return 'get_ipython().show_usage()' 

452 

453 return _make_help_call(content, '?') 

454 

455def _tr_help2(content): 

456 """Translate lines escaped with: ?? 

457 

458 A naked help line should fire the intro help screen (shell.show_usage()) 

459 """ 

460 if not content: 

461 return 'get_ipython().show_usage()' 

462 

463 return _make_help_call(content, '??') 

464 

465def _tr_magic(content): 

466 "Translate lines escaped with a percent sign: %" 

467 name, _, args = content.partition(' ') 

468 return 'get_ipython().run_line_magic(%r, %r)' % (name, args) 

469 

470def _tr_quote(content): 

471 "Translate lines escaped with a comma: ," 

472 name, _, args = content.partition(' ') 

473 return '%s("%s")' % (name, '", "'.join(args.split()) ) 

474 

475def _tr_quote2(content): 

476 "Translate lines escaped with a semicolon: ;" 

477 name, _, args = content.partition(' ') 

478 return '%s("%s")' % (name, args) 

479 

480def _tr_paren(content): 

481 "Translate lines escaped with a slash: /" 

482 name, _, args = content.partition(" ") 

483 if name == "": 

484 raise SyntaxError(f'"{ESC_SHELL}" must be followed by a callable name') 

485 

486 return '%s(%s)' % (name, ", ".join(args.split())) 

487 

488tr = { ESC_SHELL : 'get_ipython().system({!r})'.format, 

489 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format, 

490 ESC_HELP : _tr_help, 

491 ESC_HELP2 : _tr_help2, 

492 ESC_MAGIC : _tr_magic, 

493 ESC_QUOTE : _tr_quote, 

494 ESC_QUOTE2 : _tr_quote2, 

495 ESC_PAREN : _tr_paren } 

496 

497class EscapedCommand(TokenTransformBase): 

498 """Transformer for escaped commands like %foo, !foo, or /foo""" 

499 @classmethod 

500 def find(cls, tokens_by_line): 

501 """Find the first escaped command (%foo, !foo, etc.) in the cell. 

502 """ 

503 for line in tokens_by_line: 

504 if not line: 

505 continue 

506 ix = 0 

507 ll = len(line) 

508 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}: 

509 ix += 1 

510 if ix >= ll: 

511 continue 

512 if line[ix].string in ESCAPE_SINGLES: 

513 return cls(line[ix].start) 

514 

515 def transform(self, lines): 

516 """Transform an escaped line found by the ``find()`` classmethod. 

517 """ 

518 start_line, start_col = self.start_line, self.start_col 

519 

520 indent = lines[start_line][:start_col] 

521 end_line = find_end_of_continued_line(lines, start_line) 

522 line = assemble_continued_line(lines, (start_line, start_col), end_line) 

523 

524 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES: 

525 escape, content = line[:2], line[2:] 

526 else: 

527 escape, content = line[:1], line[1:] 

528 

529 if escape in tr: 

530 call = tr[escape](content) 

531 else: 

532 call = '' 

533 

534 lines_before = lines[:start_line] 

535 new_line = indent + call + '\n' 

536 lines_after = lines[end_line + 1:] 

537 

538 return lines_before + [new_line] + lines_after 

539 

540 

541_help_end_re = re.compile( 

542 r"""(%{0,2} 

543 (?!\d)[\w*]+ # Variable name 

544 (\.(?!\d)[\w*]+|\[-?[0-9]+\])* # .etc.etc or [0], we only support literal integers. 

545 ) 

546 (\?\??)$ # ? or ?? 

547 """, 

548 re.VERBOSE, 

549) 

550 

551 

552class HelpEnd(TokenTransformBase): 

553 """Transformer for help syntax: obj? and obj??""" 

554 # This needs to be higher priority (lower number) than EscapedCommand so 

555 # that inspecting magics (%foo?) works. 

556 priority = 5 

557 

558 def __init__(self, start, q_locn): 

559 super().__init__(start) 

560 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed 

561 self.q_col = q_locn[1] 

562 

563 @classmethod 

564 def find(cls, tokens_by_line): 

565 """Find the first help command (foo?) in the cell. 

566 """ 

567 for line in tokens_by_line: 

568 # Last token is NEWLINE; look at last but one 

569 if len(line) > 2 and line[-2].string == '?': 

570 # Find the first token that's not INDENT/DEDENT 

571 ix = 0 

572 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}: 

573 ix += 1 

574 return cls(line[ix].start, line[-2].start) 

575 

576 def transform(self, lines): 

577 """Transform a help command found by the ``find()`` classmethod. 

578 """ 

579 

580 piece = "".join(lines[self.start_line : self.q_line + 1]) 

581 indent, content = piece[: self.start_col], piece[self.start_col :] 

582 lines_before = lines[: self.start_line] 

583 lines_after = lines[self.q_line + 1 :] 

584 

585 m = _help_end_re.search(content) 

586 if not m: 

587 raise SyntaxError(content) 

588 assert m is not None, content 

589 target = m.group(1) 

590 esc = m.group(3) 

591 

592 

593 call = _make_help_call(target, esc) 

594 new_line = indent + call + '\n' 

595 

596 return lines_before + [new_line] + lines_after 

597 

598def make_tokens_by_line(lines:List[str]): 

599 """Tokenize a series of lines and group tokens by line. 

600 

601 The tokens for a multiline Python string or expression are grouped as one 

602 line. All lines except the last lines should keep their line ending ('\\n', 

603 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)` 

604 for example when passing block of text to this function. 

605 

606 """ 

607 # NL tokens are used inside multiline expressions, but also after blank 

608 # lines or comments. This is intentional - see https://bugs.python.org/issue17061 

609 # We want to group the former case together but split the latter, so we 

610 # track parentheses level, similar to the internals of tokenize. 

611 

612 # reexported from token on 3.7+ 

613 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore 

614 tokens_by_line: List[List[Any]] = [[]] 

615 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")): 

616 warnings.warn( 

617 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified", 

618 stacklevel=2, 

619 ) 

620 parenlev = 0 

621 try: 

622 for token in tokenutil.generate_tokens_catch_errors( 

623 iter(lines).__next__, extra_errors_to_catch=["expected EOF"] 

624 ): 

625 tokens_by_line[-1].append(token) 

626 if (token.type == NEWLINE) \ 

627 or ((token.type == NL) and (parenlev <= 0)): 

628 tokens_by_line.append([]) 

629 elif token.string in {'(', '[', '{'}: 

630 parenlev += 1 

631 elif token.string in {')', ']', '}'}: 

632 if parenlev > 0: 

633 parenlev -= 1 

634 except tokenize.TokenError: 

635 # Input ended in a multiline string or expression. That's OK for us. 

636 pass 

637 

638 

639 if not tokens_by_line[-1]: 

640 tokens_by_line.pop() 

641 

642 

643 return tokens_by_line 

644 

645 

646def has_sunken_brackets(tokens: List[tokenize.TokenInfo]): 

647 """Check if the depth of brackets in the list of tokens drops below 0""" 

648 parenlev = 0 

649 for token in tokens: 

650 if token.string in {"(", "[", "{"}: 

651 parenlev += 1 

652 elif token.string in {")", "]", "}"}: 

653 parenlev -= 1 

654 if parenlev < 0: 

655 return True 

656 return False 

657 

658# Arbitrary limit to prevent getting stuck in infinite loops 

659TRANSFORM_LOOP_LIMIT = 500 

660 

661class TransformerManager: 

662 """Applies various transformations to a cell or code block. 

663 

664 The key methods for external use are ``transform_cell()`` 

665 and ``check_complete()``. 

666 """ 

667 def __init__(self): 

668 self.cleanup_transforms = [ 

669 leading_empty_lines, 

670 leading_indent, 

671 classic_prompt, 

672 ipython_prompt, 

673 ] 

674 self.line_transforms = [ 

675 cell_magic, 

676 ] 

677 self.token_transformers = [ 

678 MagicAssign, 

679 SystemAssign, 

680 EscapedCommand, 

681 HelpEnd, 

682 ] 

683 

684 def do_one_token_transform(self, lines): 

685 """Find and run the transform earliest in the code. 

686 

687 Returns (changed, lines). 

688 

689 This method is called repeatedly until changed is False, indicating 

690 that all available transformations are complete. 

691 

692 The tokens following IPython special syntax might not be valid, so 

693 the transformed code is retokenised every time to identify the next 

694 piece of special syntax. Hopefully long code cells are mostly valid 

695 Python, not using lots of IPython special syntax, so this shouldn't be 

696 a performance issue. 

697 """ 

698 tokens_by_line = make_tokens_by_line(lines) 

699 candidates = [] 

700 for transformer_cls in self.token_transformers: 

701 transformer = transformer_cls.find(tokens_by_line) 

702 if transformer: 

703 candidates.append(transformer) 

704 

705 if not candidates: 

706 # Nothing to transform 

707 return False, lines 

708 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby) 

709 for transformer in ordered_transformers: 

710 try: 

711 return True, transformer.transform(lines) 

712 except SyntaxError: 

713 pass 

714 return False, lines 

715 

716 def do_token_transforms(self, lines): 

717 for _ in range(TRANSFORM_LOOP_LIMIT): 

718 changed, lines = self.do_one_token_transform(lines) 

719 if not changed: 

720 return lines 

721 

722 raise RuntimeError("Input transformation still changing after " 

723 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT) 

724 

725 def transform_cell(self, cell: str) -> str: 

726 """Transforms a cell of input code""" 

727 if not cell.endswith('\n'): 

728 cell += '\n' # Ensure the cell has a trailing newline 

729 lines = cell.splitlines(keepends=True) 

730 for transform in self.cleanup_transforms + self.line_transforms: 

731 lines = transform(lines) 

732 

733 lines = self.do_token_transforms(lines) 

734 return ''.join(lines) 

735 

736 def check_complete(self, cell: str): 

737 """Return whether a block of code is ready to execute, or should be continued 

738 

739 Parameters 

740 ---------- 

741 cell : string 

742 Python input code, which can be multiline. 

743 

744 Returns 

745 ------- 

746 status : str 

747 One of 'complete', 'incomplete', or 'invalid' if source is not a 

748 prefix of valid code. 

749 indent_spaces : int or None 

750 The number of spaces by which to indent the next line of code. If 

751 status is not 'incomplete', this is None. 

752 """ 

753 # Remember if the lines ends in a new line. 

754 ends_with_newline = False 

755 for character in reversed(cell): 

756 if character == '\n': 

757 ends_with_newline = True 

758 break 

759 elif character.strip(): 

760 break 

761 else: 

762 continue 

763 

764 if not ends_with_newline: 

765 # Append an newline for consistent tokenization 

766 # See https://bugs.python.org/issue33899 

767 cell += '\n' 

768 

769 lines = cell.splitlines(keepends=True) 

770 

771 if not lines: 

772 return 'complete', None 

773 

774 for line in reversed(lines): 

775 if not line.strip(): 

776 continue 

777 elif line.strip("\n").endswith("\\"): 

778 return "incomplete", find_last_indent(lines) 

779 else: 

780 break 

781 

782 try: 

783 for transform in self.cleanup_transforms: 

784 if not getattr(transform, 'has_side_effects', False): 

785 lines = transform(lines) 

786 except SyntaxError: 

787 return 'invalid', None 

788 

789 if lines[0].startswith('%%'): 

790 # Special case for cell magics - completion marked by blank line 

791 if lines[-1].strip(): 

792 return 'incomplete', find_last_indent(lines) 

793 else: 

794 return 'complete', None 

795 

796 try: 

797 for transform in self.line_transforms: 

798 if not getattr(transform, 'has_side_effects', False): 

799 lines = transform(lines) 

800 lines = self.do_token_transforms(lines) 

801 except SyntaxError: 

802 return 'invalid', None 

803 

804 tokens_by_line = make_tokens_by_line(lines) 

805 

806 # Bail if we got one line and there are more closing parentheses than 

807 # the opening ones 

808 if ( 

809 len(lines) == 1 

810 and tokens_by_line 

811 and has_sunken_brackets(tokens_by_line[0]) 

812 ): 

813 return "invalid", None 

814 

815 if not tokens_by_line: 

816 return 'incomplete', find_last_indent(lines) 

817 

818 if ( 

819 tokens_by_line[-1][-1].type != tokenize.ENDMARKER 

820 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN 

821 ): 

822 # We're in a multiline string or expression 

823 return 'incomplete', find_last_indent(lines) 

824 

825 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore 

826 

827 # Pop the last line which only contains DEDENTs and ENDMARKER 

828 last_token_line = None 

829 if {t.type for t in tokens_by_line[-1]} in [ 

830 {tokenize.DEDENT, tokenize.ENDMARKER}, 

831 {tokenize.ENDMARKER} 

832 ] and len(tokens_by_line) > 1: 

833 last_token_line = tokens_by_line.pop() 

834 

835 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types: 

836 tokens_by_line[-1].pop() 

837 

838 if not tokens_by_line[-1]: 

839 return 'incomplete', find_last_indent(lines) 

840 

841 if tokens_by_line[-1][-1].string == ':': 

842 # The last line starts a block (e.g. 'if foo:') 

843 ix = 0 

844 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}: 

845 ix += 1 

846 

847 indent = tokens_by_line[-1][ix].start[1] 

848 return 'incomplete', indent + 4 

849 

850 if tokens_by_line[-1][0].line.endswith('\\'): 

851 return 'incomplete', None 

852 

853 # At this point, our checks think the code is complete (or invalid). 

854 # We'll use codeop.compile_command to check this with the real parser 

855 try: 

856 with warnings.catch_warnings(): 

857 warnings.simplefilter('error', SyntaxWarning) 

858 res = compile_command(''.join(lines), symbol='exec') 

859 except (SyntaxError, OverflowError, ValueError, TypeError, 

860 MemoryError, SyntaxWarning): 

861 return 'invalid', None 

862 else: 

863 if res is None: 

864 return 'incomplete', find_last_indent(lines) 

865 

866 if last_token_line and last_token_line[0].type == tokenize.DEDENT: 

867 if ends_with_newline: 

868 return 'complete', None 

869 return 'incomplete', find_last_indent(lines) 

870 

871 # If there's a blank line at the end, assume we're ready to execute 

872 if not lines[-1].strip(): 

873 return 'complete', None 

874 

875 return 'complete', None 

876 

877 

878def find_last_indent(lines): 

879 m = _indent_re.match(lines[-1]) 

880 if not m: 

881 return 0 

882 return len(m.group(0).replace('\t', ' '*4)) 

883 

884 

885class MaybeAsyncCompile(Compile): 

886 def __init__(self, extra_flags=0): 

887 super().__init__() 

888 self.flags |= extra_flags 

889 

890 

891class MaybeAsyncCommandCompiler(CommandCompiler): 

892 def __init__(self, extra_flags=0): 

893 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags) 

894 

895 

896_extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT 

897 

898compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)