1"""Input transformer machinery to support IPython special syntax.
2
3This includes the machinery to recognise and transform ``%magic`` commands,
4``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
6Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7deprecated in 7.0 and removed in 9.0
8"""
9
10# Copyright (c) IPython Development Team.
11# Distributed under the terms of the Modified BSD License.
12
13import ast
14from codeop import CommandCompiler, Compile
15import re
16import sys
17import tokenize
18from typing import List, Tuple, Optional, Any
19import warnings
20from textwrap import dedent
21
22from IPython.utils import tokenutil
23
24_indent_re = re.compile(r'^[ \t]+')
25
26def leading_empty_lines(lines):
27 """Remove leading empty lines
28
29 If the leading lines are empty or contain only whitespace, they will be
30 removed.
31 """
32 if not lines:
33 return lines
34 for i, line in enumerate(lines):
35 if line and not line.isspace():
36 return lines[i:]
37 return lines
38
39def leading_indent(lines):
40 """Remove leading indentation.
41
42 Removes the minimum common leading indentation from all lines.
43 """
44 if not lines:
45 return lines
46 return dedent("".join(lines)).splitlines(keepends=True)
47
48class PromptStripper:
49 """Remove matching input prompts from a block of input.
50
51 Parameters
52 ----------
53 prompt_re : regular expression
54 A regular expression matching any input prompt (including continuation,
55 e.g. ``...``)
56 initial_re : regular expression, optional
57 A regular expression matching only the initial prompt, but not continuation.
58 If no initial expression is given, prompt_re will be used everywhere.
59 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
60 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
61
62 Notes
63 -----
64
65 If initial_re and prompt_re differ,
66 only initial_re will be tested against the first line.
67 If any prompt is found on the first two lines,
68 prompts will be stripped from the rest of the block.
69 """
70 def __init__(self, prompt_re, initial_re=None, *, doctest=False):
71 self.prompt_re = prompt_re
72 self.initial_re = initial_re or prompt_re
73 self.doctest = doctest
74 if doctest:
75 # Doctest/xdoctest prompts may be indented (e.g. " >>>").
76 # We only treat "..." as a continuation prompt when the same pasted
77 # block contains at least one ">>>" line, to avoid ambiguity with the
78 # Python Ellipsis literal.
79 self._doctest_initial_re = re.compile(r'^\s*>>>')
80 self._doctest_ps1_re = re.compile(r'^\s*>>>\s?')
81 self._doctest_ps2_re = re.compile(r'^\s*\.\.\.\s?')
82
83 # Very small state machine to detect triple-quoted strings in the
84 # *same* input block (e.g. user typed """ then pasted doctest).
85 # We preserve literal >>> / ... inside triple-quoted strings.
86 self._triple_quote_re = re.compile(r"(?<!\\)(\"\"\"|''')")
87
88
89 def _triple_quote_mask(self, lines: List[str]) -> List[bool]:
90 """
91 Return a boolean mask: True if the corresponding line is considered
92 inside a triple-quoted string literal.
93
94 This is intentionally heuristic (fast + good enough for paste handling).
95 """
96 mask: List[bool] = []
97 in_triple: str | None = None # either ''' or """
98 for line in lines:
99 mask.append(in_triple is not None)
100 # Toggle state for each occurrence of """ or ''' in the line.
101 for m in self._triple_quote_re.finditer(line):
102 q = m.group(1)
103 if in_triple is None:
104 in_triple = q
105 mask[-1] = True # current line is inside triple quotes
106 elif in_triple == q:
107 in_triple = None
108 # else: ignore mismatched triple quote while inside
109 return mask
110
111
112 def _strip(self, lines):
113 return [self.prompt_re.sub('', l, count=1) for l in lines]
114
115 def __call__(self, lines):
116 if not lines:
117 return lines
118
119 if self.doctest:
120 triple_mask = self._triple_quote_mask(lines)
121
122 # Detect doctest prompts only outside triple-quoted strings.
123 has_doctest_outside = any(
124 (not in_triple) and self._doctest_initial_re.match(l)
125 for l, in_triple in zip(lines, triple_mask)
126 )
127 if not has_doctest_outside:
128 return lines
129
130 out_lines: List[str] = []
131 stripped_mask: List[bool] = []
132
133 for l, in_triple in zip(lines, triple_mask):
134 if in_triple:
135 out_lines.append(l)
136 stripped_mask.append(False)
137 continue
138
139 if self._doctest_ps1_re.match(l):
140 new_l = self._doctest_ps1_re.sub('', l, count=1)
141 elif self._doctest_ps2_re.match(l):
142 new_l = self._doctest_ps2_re.sub('', l, count=1)
143 else:
144 new_l = l
145 out_lines.append(new_l)
146 stripped_mask.append(new_l != l)
147
148 # Dedent only the non-triple-quoted segments where stripping occurred.
149 dedented: List[str] = []
150 i = 0
151 while i < len(out_lines):
152 j = i
153 in_triple = triple_mask[i]
154 while j < len(out_lines) and triple_mask[j] == in_triple:
155 j += 1
156
157 segment = out_lines[i:j]
158 seg_stripped = any(stripped_mask[i:j])
159
160 if (not in_triple) and seg_stripped:
161 dedented.extend(dedent(''.join(segment)).splitlines(keepends=True))
162 else:
163 dedented.extend(segment)
164
165 i = j
166
167 return dedented
168
169 if self.initial_re.match(lines[0]) or \
170 (len(lines) > 1 and self.prompt_re.match(lines[1])):
171 return self._strip(lines)
172 return lines
173
174classic_prompt = PromptStripper(
175 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
176 initial_re=re.compile(r'^>>>( |$)'),
177 doctest=True,
178)
179
180ipython_prompt = PromptStripper(
181 re.compile(
182 r"""
183 ^( # Match from the beginning of a line, either:
184
185 # 1. First-line prompt:
186 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
187 In\ # The 'In' of the prompt, with a space
188 \[\d+\]: # Command index, as displayed in the prompt
189 \ # With a mandatory trailing space
190
191 | # ... or ...
192
193 # 2. The three dots of the multiline prompt
194 \s* # All leading whitespace characters
195 \.{3,}: # The three (or more) dots
196 \ ? # With an optional trailing space
197
198 )
199 """,
200 re.VERBOSE,
201 )
202)
203
204
205def cell_magic(lines):
206 if not lines or not lines[0].startswith('%%'):
207 return lines
208 if re.match(r'%%\w+\?', lines[0]):
209 # This case will be handled by help_end
210 return lines
211 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
212 body = ''.join(lines[1:])
213 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
214 % (magic_name, first_line, body)]
215
216
217def _find_assign_op(token_line) -> Optional[int]:
218 """Get the index of the first assignment in the line ('=' not inside brackets)
219
220 Note: We don't try to support multiple special assignment (a = b = %foo)
221 """
222 paren_level = 0
223 for i, ti in enumerate(token_line):
224 s = ti.string
225 if s == '=' and paren_level == 0:
226 return i
227 if s in {'(','[','{'}:
228 paren_level += 1
229 elif s in {')', ']', '}'}:
230 if paren_level > 0:
231 paren_level -= 1
232 return None
233
234def find_end_of_continued_line(lines, start_line: int):
235 """Find the last line of a line explicitly extended using backslashes.
236
237 Uses 0-indexed line numbers.
238 """
239 end_line = start_line
240 while lines[end_line].endswith('\\\n'):
241 end_line += 1
242 if end_line >= len(lines):
243 break
244 return end_line
245
246def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
247 r"""Assemble a single line from multiple continued line pieces
248
249 Continued lines are lines ending in ``\``, and the line following the last
250 ``\`` in the block.
251
252 For example, this code continues over multiple lines::
253
254 if (assign_ix is not None) \
255 and (len(line) >= assign_ix + 2) \
256 and (line[assign_ix+1].string == '%') \
257 and (line[assign_ix+2].type == tokenize.NAME):
258
259 This statement contains four continued line pieces.
260 Assembling these pieces into a single line would give::
261
262 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
263
264 This uses 0-indexed line numbers. *start* is (lineno, colno).
265
266 Used to allow ``%magic`` and ``!system`` commands to be continued over
267 multiple lines.
268 """
269 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
270 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
271 + [parts[-1].rstrip()]) # Strip newline from last line
272
273class TokenTransformBase:
274 """Base class for transformations which examine tokens.
275
276 Special syntax should not be transformed when it occurs inside strings or
277 comments. This is hard to reliably avoid with regexes. The solution is to
278 tokenise the code as Python, and recognise the special syntax in the tokens.
279
280 IPython's special syntax is not valid Python syntax, so tokenising may go
281 wrong after the special syntax starts. These classes therefore find and
282 transform *one* instance of special syntax at a time into regular Python
283 syntax. After each transformation, tokens are regenerated to find the next
284 piece of special syntax.
285
286 Subclasses need to implement one class method (find)
287 and one regular method (transform).
288
289 The priority attribute can select which transformation to apply if multiple
290 transformers match in the same place. Lower numbers have higher priority.
291 This allows "%magic?" to be turned into a help call rather than a magic call.
292 """
293 # Lower numbers -> higher priority (for matches in the same location)
294 priority = 10
295
296 def sortby(self):
297 return self.start_line, self.start_col, self.priority
298
299 def __init__(self, start):
300 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
301 self.start_col = start[1]
302
303 @classmethod
304 def find(cls, tokens_by_line):
305 """Find one instance of special syntax in the provided tokens.
306
307 Tokens are grouped into logical lines for convenience,
308 so it is easy to e.g. look at the first token of each line.
309 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
310
311 This should return an instance of its class, pointing to the start
312 position it has found, or None if it found no match.
313 """
314 raise NotImplementedError
315
316 def transform(self, lines: List[str]):
317 """Transform one instance of special syntax found by ``find()``
318
319 Takes a list of strings representing physical lines,
320 returns a similar list of transformed lines.
321 """
322 raise NotImplementedError
323
324class MagicAssign(TokenTransformBase):
325 """Transformer for assignments from magics (a = %foo)"""
326 @classmethod
327 def find(cls, tokens_by_line):
328 """Find the first magic assignment (a = %foo) in the cell.
329 """
330 for line in tokens_by_line:
331 assign_ix = _find_assign_op(line)
332 if (assign_ix is not None) \
333 and (len(line) >= assign_ix + 2) \
334 and (line[assign_ix+1].string == '%') \
335 and (line[assign_ix+2].type == tokenize.NAME):
336 return cls(line[assign_ix+1].start)
337
338 def transform(self, lines: List[str]):
339 """Transform a magic assignment found by the ``find()`` classmethod.
340 """
341 start_line, start_col = self.start_line, self.start_col
342 lhs = lines[start_line][:start_col]
343 end_line = find_end_of_continued_line(lines, start_line)
344 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
345 assert rhs.startswith('%'), rhs
346 magic_name, _, args = rhs[1:].partition(' ')
347
348 lines_before = lines[:start_line]
349 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
350 new_line = lhs + call + '\n'
351 lines_after = lines[end_line+1:]
352
353 return lines_before + [new_line] + lines_after
354
355
356class SystemAssign(TokenTransformBase):
357 """Transformer for assignments from system commands (a = !foo)"""
358 @classmethod
359 def find_pre_312(cls, tokens_by_line):
360 for line in tokens_by_line:
361 assign_ix = _find_assign_op(line)
362 if (assign_ix is not None) \
363 and not line[assign_ix].line.strip().startswith('=') \
364 and (len(line) >= assign_ix + 2) \
365 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
366 ix = assign_ix + 1
367
368 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
369 if line[ix].string == '!':
370 return cls(line[ix].start)
371 elif not line[ix].string.isspace():
372 break
373 ix += 1
374
375 @classmethod
376 def find_post_312(cls, tokens_by_line):
377 for line in tokens_by_line:
378 assign_ix = _find_assign_op(line)
379 if (
380 (assign_ix is not None)
381 and not line[assign_ix].line.strip().startswith("=")
382 and (len(line) >= assign_ix + 2)
383 and (line[assign_ix + 1].type == tokenize.OP)
384 and (line[assign_ix + 1].string == "!")
385 ):
386 return cls(line[assign_ix + 1].start)
387
388 @classmethod
389 def find(cls, tokens_by_line):
390 """Find the first system assignment (a = !foo) in the cell."""
391 if sys.version_info < (3, 12):
392 return cls.find_pre_312(tokens_by_line)
393 return cls.find_post_312(tokens_by_line)
394
395 def transform(self, lines: List[str]):
396 """Transform a system assignment found by the ``find()`` classmethod.
397 """
398 start_line, start_col = self.start_line, self.start_col
399
400 lhs = lines[start_line][:start_col]
401 end_line = find_end_of_continued_line(lines, start_line)
402 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
403 assert rhs.startswith('!'), rhs
404 cmd = rhs[1:]
405
406 lines_before = lines[:start_line]
407 call = "get_ipython().getoutput({!r})".format(cmd)
408 new_line = lhs + call + '\n'
409 lines_after = lines[end_line + 1:]
410
411 return lines_before + [new_line] + lines_after
412
413# The escape sequences that define the syntax transformations IPython will
414# apply to user input. These can NOT be just changed here: many regular
415# expressions and other parts of the code may use their hardcoded values, and
416# for all intents and purposes they constitute the 'IPython syntax', so they
417# should be considered fixed.
418
419ESC_SHELL = '!' # Send line to underlying system shell
420ESC_SH_CAP = '!!' # Send line to system shell and capture output
421ESC_HELP = '?' # Find information about object
422ESC_HELP2 = '??' # Find extra-detailed information about object
423ESC_MAGIC = '%' # Call magic function
424ESC_MAGIC2 = '%%' # Call cell-magic function
425ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
426ESC_QUOTE2 = ';' # Quote all args as a single string, call
427ESC_PAREN = '/' # Call first argument with rest of line as arguments
428
429ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
430ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
431
432def _make_help_call(target, esc):
433 """Prepares a pinfo(2)/psearch call from a target name and the escape
434 (i.e. ? or ??)"""
435 method = 'pinfo2' if esc == '??' \
436 else 'psearch' if '*' in target \
437 else 'pinfo'
438 arg = " ".join([method, target])
439 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
440 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
441 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
442 return "get_ipython().run_line_magic(%r, %r)" % (t_magic_name, t_magic_arg_s)
443
444
445def _tr_help(content):
446 """Translate lines escaped with: ?
447
448 A naked help line should fire the intro help screen (shell.show_usage())
449 """
450 if not content:
451 return 'get_ipython().show_usage()'
452
453 return _make_help_call(content, '?')
454
455def _tr_help2(content):
456 """Translate lines escaped with: ??
457
458 A naked help line should fire the intro help screen (shell.show_usage())
459 """
460 if not content:
461 return 'get_ipython().show_usage()'
462
463 return _make_help_call(content, '??')
464
465def _tr_magic(content):
466 "Translate lines escaped with a percent sign: %"
467 name, _, args = content.partition(' ')
468 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
469
470def _tr_quote(content):
471 "Translate lines escaped with a comma: ,"
472 name, _, args = content.partition(' ')
473 return '%s("%s")' % (name, '", "'.join(args.split()) )
474
475def _tr_quote2(content):
476 "Translate lines escaped with a semicolon: ;"
477 name, _, args = content.partition(' ')
478 return '%s("%s")' % (name, args)
479
480def _tr_paren(content):
481 "Translate lines escaped with a slash: /"
482 name, _, args = content.partition(" ")
483 if name == "":
484 raise SyntaxError(f'"{ESC_SHELL}" must be followed by a callable name')
485
486 return '%s(%s)' % (name, ", ".join(args.split()))
487
488tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
489 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
490 ESC_HELP : _tr_help,
491 ESC_HELP2 : _tr_help2,
492 ESC_MAGIC : _tr_magic,
493 ESC_QUOTE : _tr_quote,
494 ESC_QUOTE2 : _tr_quote2,
495 ESC_PAREN : _tr_paren }
496
497class EscapedCommand(TokenTransformBase):
498 """Transformer for escaped commands like %foo, !foo, or /foo"""
499 @classmethod
500 def find(cls, tokens_by_line):
501 """Find the first escaped command (%foo, !foo, etc.) in the cell.
502 """
503 for line in tokens_by_line:
504 if not line:
505 continue
506 ix = 0
507 ll = len(line)
508 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
509 ix += 1
510 if ix >= ll:
511 continue
512 if line[ix].string in ESCAPE_SINGLES:
513 return cls(line[ix].start)
514
515 def transform(self, lines):
516 """Transform an escaped line found by the ``find()`` classmethod.
517 """
518 start_line, start_col = self.start_line, self.start_col
519
520 indent = lines[start_line][:start_col]
521 end_line = find_end_of_continued_line(lines, start_line)
522 line = assemble_continued_line(lines, (start_line, start_col), end_line)
523
524 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
525 escape, content = line[:2], line[2:]
526 else:
527 escape, content = line[:1], line[1:]
528
529 if escape in tr:
530 call = tr[escape](content)
531 else:
532 call = ''
533
534 lines_before = lines[:start_line]
535 new_line = indent + call + '\n'
536 lines_after = lines[end_line + 1:]
537
538 return lines_before + [new_line] + lines_after
539
540
541_help_end_re = re.compile(
542 r"""(%{0,2}
543 (?!\d)[\w*]+ # Variable name
544 (\.(?!\d)[\w*]+|\[-?[0-9]+\])* # .etc.etc or [0], we only support literal integers.
545 )
546 (\?\??)$ # ? or ??
547 """,
548 re.VERBOSE,
549)
550
551
552class HelpEnd(TokenTransformBase):
553 """Transformer for help syntax: obj? and obj??"""
554 # This needs to be higher priority (lower number) than EscapedCommand so
555 # that inspecting magics (%foo?) works.
556 priority = 5
557
558 def __init__(self, start, q_locn):
559 super().__init__(start)
560 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
561 self.q_col = q_locn[1]
562
563 @classmethod
564 def find(cls, tokens_by_line):
565 """Find the first help command (foo?) in the cell.
566 """
567 for line in tokens_by_line:
568 # Last token is NEWLINE; look at last but one
569 if len(line) > 2 and line[-2].string == '?':
570 # Find the first token that's not INDENT/DEDENT
571 ix = 0
572 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
573 ix += 1
574 return cls(line[ix].start, line[-2].start)
575
576 def transform(self, lines):
577 """Transform a help command found by the ``find()`` classmethod.
578 """
579
580 piece = "".join(lines[self.start_line : self.q_line + 1])
581 indent, content = piece[: self.start_col], piece[self.start_col :]
582 lines_before = lines[: self.start_line]
583 lines_after = lines[self.q_line + 1 :]
584
585 m = _help_end_re.search(content)
586 if not m:
587 raise SyntaxError(content)
588 assert m is not None, content
589 target = m.group(1)
590 esc = m.group(3)
591
592
593 call = _make_help_call(target, esc)
594 new_line = indent + call + '\n'
595
596 return lines_before + [new_line] + lines_after
597
598def make_tokens_by_line(lines:List[str]):
599 """Tokenize a series of lines and group tokens by line.
600
601 The tokens for a multiline Python string or expression are grouped as one
602 line. All lines except the last lines should keep their line ending ('\\n',
603 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
604 for example when passing block of text to this function.
605
606 """
607 # NL tokens are used inside multiline expressions, but also after blank
608 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
609 # We want to group the former case together but split the latter, so we
610 # track parentheses level, similar to the internals of tokenize.
611
612 # reexported from token on 3.7+
613 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
614 tokens_by_line: List[List[Any]] = [[]]
615 if len(lines) > 1 and not lines[0].endswith(("\n", "\r", "\r\n", "\x0b", "\x0c")):
616 warnings.warn(
617 "`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified",
618 stacklevel=2,
619 )
620 parenlev = 0
621 try:
622 for token in tokenutil.generate_tokens_catch_errors(
623 iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
624 ):
625 tokens_by_line[-1].append(token)
626 if (token.type == NEWLINE) \
627 or ((token.type == NL) and (parenlev <= 0)):
628 tokens_by_line.append([])
629 elif token.string in {'(', '[', '{'}:
630 parenlev += 1
631 elif token.string in {')', ']', '}'}:
632 if parenlev > 0:
633 parenlev -= 1
634 except tokenize.TokenError:
635 # Input ended in a multiline string or expression. That's OK for us.
636 pass
637
638
639 if not tokens_by_line[-1]:
640 tokens_by_line.pop()
641
642
643 return tokens_by_line
644
645
646def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
647 """Check if the depth of brackets in the list of tokens drops below 0"""
648 parenlev = 0
649 for token in tokens:
650 if token.string in {"(", "[", "{"}:
651 parenlev += 1
652 elif token.string in {")", "]", "}"}:
653 parenlev -= 1
654 if parenlev < 0:
655 return True
656 return False
657
658# Arbitrary limit to prevent getting stuck in infinite loops
659TRANSFORM_LOOP_LIMIT = 500
660
661class TransformerManager:
662 """Applies various transformations to a cell or code block.
663
664 The key methods for external use are ``transform_cell()``
665 and ``check_complete()``.
666 """
667 def __init__(self):
668 self.cleanup_transforms = [
669 leading_empty_lines,
670 leading_indent,
671 classic_prompt,
672 ipython_prompt,
673 ]
674 self.line_transforms = [
675 cell_magic,
676 ]
677 self.token_transformers = [
678 MagicAssign,
679 SystemAssign,
680 EscapedCommand,
681 HelpEnd,
682 ]
683
684 def do_one_token_transform(self, lines):
685 """Find and run the transform earliest in the code.
686
687 Returns (changed, lines).
688
689 This method is called repeatedly until changed is False, indicating
690 that all available transformations are complete.
691
692 The tokens following IPython special syntax might not be valid, so
693 the transformed code is retokenised every time to identify the next
694 piece of special syntax. Hopefully long code cells are mostly valid
695 Python, not using lots of IPython special syntax, so this shouldn't be
696 a performance issue.
697 """
698 tokens_by_line = make_tokens_by_line(lines)
699 candidates = []
700 for transformer_cls in self.token_transformers:
701 transformer = transformer_cls.find(tokens_by_line)
702 if transformer:
703 candidates.append(transformer)
704
705 if not candidates:
706 # Nothing to transform
707 return False, lines
708 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
709 for transformer in ordered_transformers:
710 try:
711 return True, transformer.transform(lines)
712 except SyntaxError:
713 pass
714 return False, lines
715
716 def do_token_transforms(self, lines):
717 for _ in range(TRANSFORM_LOOP_LIMIT):
718 changed, lines = self.do_one_token_transform(lines)
719 if not changed:
720 return lines
721
722 raise RuntimeError("Input transformation still changing after "
723 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
724
725 def transform_cell(self, cell: str) -> str:
726 """Transforms a cell of input code"""
727 if not cell.endswith('\n'):
728 cell += '\n' # Ensure the cell has a trailing newline
729 lines = cell.splitlines(keepends=True)
730 for transform in self.cleanup_transforms + self.line_transforms:
731 lines = transform(lines)
732
733 lines = self.do_token_transforms(lines)
734 return ''.join(lines)
735
736 def check_complete(self, cell: str):
737 """Return whether a block of code is ready to execute, or should be continued
738
739 Parameters
740 ----------
741 cell : string
742 Python input code, which can be multiline.
743
744 Returns
745 -------
746 status : str
747 One of 'complete', 'incomplete', or 'invalid' if source is not a
748 prefix of valid code.
749 indent_spaces : int or None
750 The number of spaces by which to indent the next line of code. If
751 status is not 'incomplete', this is None.
752 """
753 # Remember if the lines ends in a new line.
754 ends_with_newline = False
755 for character in reversed(cell):
756 if character == '\n':
757 ends_with_newline = True
758 break
759 elif character.strip():
760 break
761 else:
762 continue
763
764 if not ends_with_newline:
765 # Append an newline for consistent tokenization
766 # See https://bugs.python.org/issue33899
767 cell += '\n'
768
769 lines = cell.splitlines(keepends=True)
770
771 if not lines:
772 return 'complete', None
773
774 for line in reversed(lines):
775 if not line.strip():
776 continue
777 elif line.strip("\n").endswith("\\"):
778 return "incomplete", find_last_indent(lines)
779 else:
780 break
781
782 try:
783 for transform in self.cleanup_transforms:
784 if not getattr(transform, 'has_side_effects', False):
785 lines = transform(lines)
786 except SyntaxError:
787 return 'invalid', None
788
789 if lines[0].startswith('%%'):
790 # Special case for cell magics - completion marked by blank line
791 if lines[-1].strip():
792 return 'incomplete', find_last_indent(lines)
793 else:
794 return 'complete', None
795
796 try:
797 for transform in self.line_transforms:
798 if not getattr(transform, 'has_side_effects', False):
799 lines = transform(lines)
800 lines = self.do_token_transforms(lines)
801 except SyntaxError:
802 return 'invalid', None
803
804 tokens_by_line = make_tokens_by_line(lines)
805
806 # Bail if we got one line and there are more closing parentheses than
807 # the opening ones
808 if (
809 len(lines) == 1
810 and tokens_by_line
811 and has_sunken_brackets(tokens_by_line[0])
812 ):
813 return "invalid", None
814
815 if not tokens_by_line:
816 return 'incomplete', find_last_indent(lines)
817
818 if (
819 tokens_by_line[-1][-1].type != tokenize.ENDMARKER
820 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
821 ):
822 # We're in a multiline string or expression
823 return 'incomplete', find_last_indent(lines)
824
825 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
826
827 # Pop the last line which only contains DEDENTs and ENDMARKER
828 last_token_line = None
829 if {t.type for t in tokens_by_line[-1]} in [
830 {tokenize.DEDENT, tokenize.ENDMARKER},
831 {tokenize.ENDMARKER}
832 ] and len(tokens_by_line) > 1:
833 last_token_line = tokens_by_line.pop()
834
835 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
836 tokens_by_line[-1].pop()
837
838 if not tokens_by_line[-1]:
839 return 'incomplete', find_last_indent(lines)
840
841 if tokens_by_line[-1][-1].string == ':':
842 # The last line starts a block (e.g. 'if foo:')
843 ix = 0
844 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
845 ix += 1
846
847 indent = tokens_by_line[-1][ix].start[1]
848 return 'incomplete', indent + 4
849
850 if tokens_by_line[-1][0].line.endswith('\\'):
851 return 'incomplete', None
852
853 # At this point, our checks think the code is complete (or invalid).
854 # We'll use codeop.compile_command to check this with the real parser
855 try:
856 with warnings.catch_warnings():
857 warnings.simplefilter('error', SyntaxWarning)
858 res = compile_command(''.join(lines), symbol='exec')
859 except (SyntaxError, OverflowError, ValueError, TypeError,
860 MemoryError, SyntaxWarning):
861 return 'invalid', None
862 else:
863 if res is None:
864 return 'incomplete', find_last_indent(lines)
865
866 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
867 if ends_with_newline:
868 return 'complete', None
869 return 'incomplete', find_last_indent(lines)
870
871 # If there's a blank line at the end, assume we're ready to execute
872 if not lines[-1].strip():
873 return 'complete', None
874
875 return 'complete', None
876
877
878def find_last_indent(lines):
879 m = _indent_re.match(lines[-1])
880 if not m:
881 return 0
882 return len(m.group(0).replace('\t', ' '*4))
883
884
885class MaybeAsyncCompile(Compile):
886 def __init__(self, extra_flags=0):
887 super().__init__()
888 self.flags |= extra_flags
889
890
891class MaybeAsyncCommandCompiler(CommandCompiler):
892 def __init__(self, extra_flags=0):
893 self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
894
895
896_extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
897
898compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)