1"""Functions to process IPython magics with."""
2
3import ast
4import collections
5import dataclasses
6import re
7import secrets
8import string
9from collections.abc import Collection
10from functools import lru_cache
11from importlib.util import find_spec
12from typing import TypeGuard
13
14from black.mode import Mode
15from black.output import out
16from black.report import NothingChanged
17
18TRANSFORMED_MAGICS = frozenset((
19 "get_ipython().run_cell_magic",
20 "get_ipython().system",
21 "get_ipython().getoutput",
22 "get_ipython().run_line_magic",
23))
24TOKENS_TO_IGNORE = frozenset((
25 "ENDMARKER",
26 "NL",
27 "NEWLINE",
28 "COMMENT",
29 "DEDENT",
30 "UNIMPORTANT_WS",
31 "ESCAPED_NL",
32))
33PYTHON_CELL_MAGICS = frozenset((
34 "capture",
35 "prun",
36 "pypy",
37 "python",
38 "python3",
39 "time",
40 "timeit",
41))
42
43
44@dataclasses.dataclass(frozen=True)
45class Replacement:
46 mask: str
47 src: str
48
49
50@lru_cache
51def jupyter_dependencies_are_installed(*, warn: bool) -> bool:
52 installed = (
53 find_spec("tokenize_rt") is not None and find_spec("IPython") is not None
54 )
55 if not installed and warn:
56 msg = (
57 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
58 'You can fix this by running ``pip install "black[jupyter]"``'
59 )
60 out(msg)
61 return installed
62
63
64def validate_cell(src: str, mode: Mode) -> None:
65 r"""Check that cell does not already contain TransformerManager transformations,
66 or non-Python cell magics, which might cause tokenizer_rt to break because of
67 indentations.
68
69 If a cell contains ``!ls``, then it'll be transformed to
70 ``get_ipython().system('ls')``. However, if the cell originally contained
71 ``get_ipython().system('ls')``, then it would get transformed in the same way:
72
73 >>> TransformerManager().transform_cell("get_ipython().system('ls')")
74 "get_ipython().system('ls')\n"
75 >>> TransformerManager().transform_cell("!ls")
76 "get_ipython().system('ls')\n"
77
78 Due to the impossibility of safely roundtripping in such situations, cells
79 containing transformed magics will be ignored.
80 """
81 if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
82 raise NothingChanged
83
84 line = _get_code_start(src)
85 if line.startswith("%%") and (
86 line.split(maxsplit=1)[0][2:]
87 not in PYTHON_CELL_MAGICS | mode.python_cell_magics
88 ):
89 raise NothingChanged
90
91
92def remove_trailing_semicolon(src: str) -> tuple[str, bool]:
93 """Remove trailing semicolon from Jupyter notebook cell.
94
95 For example,
96
97 fig, ax = plt.subplots()
98 ax.plot(x_data, y_data); # plot data
99
100 would become
101
102 fig, ax = plt.subplots()
103 ax.plot(x_data, y_data) # plot data
104
105 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
106 ``tokenize_rt`` so that round-tripping works fine.
107 """
108 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
109
110 tokens = src_to_tokens(src)
111 trailing_semicolon = False
112 for idx, token in reversed_enumerate(tokens):
113 if token.name in TOKENS_TO_IGNORE:
114 continue
115 if token.name == "OP" and token.src == ";":
116 del tokens[idx]
117 trailing_semicolon = True
118 break
119 if not trailing_semicolon:
120 return src, False
121 return tokens_to_src(tokens), True
122
123
124def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
125 """Put trailing semicolon back if cell originally had it.
126
127 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
128 ``tokenize_rt`` so that round-tripping works fine.
129 """
130 if not has_trailing_semicolon:
131 return src
132 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
133
134 tokens = src_to_tokens(src)
135 for idx, token in reversed_enumerate(tokens):
136 if token.name in TOKENS_TO_IGNORE:
137 continue
138 tokens[idx] = token._replace(src=token.src + ";")
139 break
140 else: # pragma: nocover
141 raise AssertionError(
142 "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
143 "Please report a bug on https://github.com/psf/black/issues. "
144 ) from None
145 return str(tokens_to_src(tokens))
146
147
148def mask_cell(src: str) -> tuple[str, list[Replacement]]:
149 """Mask IPython magics so content becomes parseable Python code.
150
151 For example,
152
153 %matplotlib inline
154 'foo'
155
156 becomes
157
158 b"25716f358c32750"
159 'foo'
160
161 The replacements are returned, along with the transformed code.
162 """
163 replacements: list[Replacement] = []
164 try:
165 ast.parse(src)
166 except SyntaxError:
167 # Might have IPython magics, will process below.
168 pass
169 else:
170 # Syntax is fine, nothing to mask, early return.
171 return src, replacements
172
173 from IPython.core.inputtransformer2 import TransformerManager
174
175 transformer_manager = TransformerManager()
176 # A side effect of the following transformation is that it also removes any
177 # empty lines at the beginning of the cell.
178 transformed = transformer_manager.transform_cell(src)
179 transformed, cell_magic_replacements = replace_cell_magics(transformed)
180 replacements += cell_magic_replacements
181 transformed = transformer_manager.transform_cell(transformed)
182 transformed, magic_replacements = replace_magics(transformed)
183 if len(transformed.strip().splitlines()) != len(src.strip().splitlines()):
184 # Multi-line magic, not supported.
185 raise NothingChanged
186 replacements += magic_replacements
187 return transformed, replacements
188
189
190def create_token(n_chars: int) -> str:
191 """Create a randomly generated token that is n_chars characters long."""
192 assert n_chars > 0
193 if n_chars == 1:
194 return secrets.choice(string.ascii_letters)
195 if n_chars < 4:
196 return "_" + "".join(
197 secrets.choice(string.ascii_letters + string.digits + "_")
198 for _ in range(n_chars - 1)
199 )
200 n_bytes = max(n_chars // 2 - 1, 1)
201 token = secrets.token_hex(n_bytes)
202 if len(token) + 3 > n_chars:
203 token = token[:-1]
204 # We use a bytestring so that the string does not get interpreted
205 # as a docstring.
206 return f'b"{token}"'
207
208
209def get_token(src: str, magic: str, existing_tokens: Collection[str] = ()) -> str:
210 """Return randomly generated token to mask IPython magic with.
211
212 For example, if 'magic' was `%matplotlib inline`, then a possible
213 token to mask it with would be `"43fdd17f7e5ddc83"`. The token
214 will be the same length as the magic, and we make sure that it was
215 not already present anywhere else in the cell.
216 """
217 assert magic
218 n_chars = len(magic)
219 token = create_token(n_chars)
220 counter = 0
221 while token in src or token in existing_tokens:
222 token = create_token(n_chars)
223 counter += 1
224 if counter > 100:
225 raise AssertionError(
226 "INTERNAL ERROR: Black was not able to replace IPython magic. "
227 "Please report a bug on https://github.com/psf/black/issues. "
228 f"The magic might be helpful: {magic}"
229 ) from None
230 return token
231
232
233def replace_cell_magics(src: str) -> tuple[str, list[Replacement]]:
234 r"""Replace cell magic with token.
235
236 Note that 'src' will already have been processed by IPython's
237 TransformerManager().transform_cell.
238
239 Example,
240
241 get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\n')
242
243 becomes
244
245 "a794."
246 ls =!ls
247
248 The replacement, along with the transformed code, is returned.
249 """
250 replacements: list[Replacement] = []
251
252 tree = ast.parse(src)
253
254 cell_magic_finder = CellMagicFinder()
255 cell_magic_finder.visit(tree)
256 if cell_magic_finder.cell_magic is None:
257 return src, replacements
258 header = cell_magic_finder.cell_magic.header
259 mask = get_token(src, header)
260 replacements.append(Replacement(mask=mask, src=header))
261 return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
262
263
264def replace_magics(src: str) -> tuple[str, list[Replacement]]:
265 """Replace magics within body of cell.
266
267 Note that 'src' will already have been processed by IPython's
268 TransformerManager().transform_cell.
269
270 Example, this
271
272 get_ipython().run_line_magic('matplotlib', 'inline')
273 'foo'
274
275 becomes
276
277 "5e67db56d490fd39"
278 'foo'
279
280 The replacement, along with the transformed code, are returned.
281 """
282 replacements = []
283 existing_tokens: set[str] = set()
284 magic_finder = MagicFinder()
285 magic_finder.visit(ast.parse(src))
286 new_srcs = []
287 for i, line in enumerate(src.split("\n"), start=1):
288 if i in magic_finder.magics:
289 offsets_and_magics = magic_finder.magics[i]
290 if len(offsets_and_magics) != 1: # pragma: nocover
291 raise AssertionError(
292 f"Expecting one magic per line, got: {offsets_and_magics}\n"
293 "Please report a bug on https://github.com/psf/black/issues."
294 )
295 col_offset, magic = (
296 offsets_and_magics[0].col_offset,
297 offsets_and_magics[0].magic,
298 )
299 mask = get_token(src, magic, existing_tokens)
300 replacements.append(Replacement(mask=mask, src=magic))
301 existing_tokens.add(mask)
302 line = line[:col_offset] + mask
303 new_srcs.append(line)
304 return "\n".join(new_srcs), replacements
305
306
307def unmask_cell(src: str, replacements: list[Replacement]) -> str:
308 """Remove replacements from cell.
309
310 For example
311
312 "9b20"
313 foo = bar
314
315 becomes
316
317 %%time
318 foo = bar
319 """
320 for replacement in replacements:
321 if src.count(replacement.mask) != 1:
322 raise NothingChanged
323 src = src.replace(replacement.mask, replacement.src, 1)
324 return src
325
326
327def _get_code_start(src: str) -> str:
328 """Provides the first line where the code starts.
329
330 Iterates over lines of code until it finds the first line that doesn't
331 contain only empty spaces and comments. It removes any empty spaces at the
332 start of the line and returns it. If such line doesn't exist, it returns an
333 empty string.
334 """
335 for match in re.finditer(".+", src):
336 line = match.group(0).lstrip()
337 if line and not line.startswith("#"):
338 return line
339 return ""
340
341
342def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
343 """Check if attribute is IPython magic.
344
345 Note that the source of the abstract syntax tree
346 will already have been processed by IPython's
347 TransformerManager().transform_cell.
348 """
349 return (
350 isinstance(node, ast.Attribute)
351 and isinstance(node.value, ast.Call)
352 and isinstance(node.value.func, ast.Name)
353 and node.value.func.id == "get_ipython"
354 )
355
356
357def _get_str_args(args: list[ast.expr]) -> list[str]:
358 str_args = []
359 for arg in args:
360 assert isinstance(arg, ast.Constant) and isinstance(arg.value, str)
361 str_args.append(arg.value)
362 return str_args
363
364
365@dataclasses.dataclass(frozen=True)
366class CellMagic:
367 name: str
368 params: str | None
369 body: str
370
371 @property
372 def header(self) -> str:
373 if self.params:
374 return f"%%{self.name} {self.params}"
375 return f"%%{self.name}"
376
377
378# ast.NodeVisitor + dataclass = breakage under mypyc.
379class CellMagicFinder(ast.NodeVisitor):
380 r"""Find cell magics.
381
382 Note that the source of the abstract syntax tree
383 will already have been processed by IPython's
384 TransformerManager().transform_cell.
385
386 For example,
387
388 %%time\n
389 foo()
390
391 would have been transformed to
392
393 get_ipython().run_cell_magic('time', '', 'foo()\n')
394
395 and we look for instances of the latter.
396 """
397
398 def __init__(self, cell_magic: CellMagic | None = None) -> None:
399 self.cell_magic = cell_magic
400
401 def visit_Expr(self, node: ast.Expr) -> None:
402 """Find cell magic, extract header and body."""
403 if (
404 isinstance(node.value, ast.Call)
405 and _is_ipython_magic(node.value.func)
406 and node.value.func.attr == "run_cell_magic"
407 ):
408 args = _get_str_args(node.value.args)
409 self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
410 self.generic_visit(node)
411
412
413@dataclasses.dataclass(frozen=True)
414class OffsetAndMagic:
415 col_offset: int
416 magic: str
417
418
419# Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
420# as mypyc will generate broken code.
421class MagicFinder(ast.NodeVisitor):
422 """Visit cell to look for get_ipython calls.
423
424 Note that the source of the abstract syntax tree
425 will already have been processed by IPython's
426 TransformerManager().transform_cell.
427
428 For example,
429
430 %matplotlib inline
431
432 would have been transformed to
433
434 get_ipython().run_line_magic('matplotlib', 'inline')
435
436 and we look for instances of the latter (and likewise for other
437 types of magics).
438 """
439
440 def __init__(self) -> None:
441 self.magics: dict[int, list[OffsetAndMagic]] = collections.defaultdict(list)
442
443 def visit_Assign(self, node: ast.Assign) -> None:
444 """Look for system assign magics.
445
446 For example,
447
448 black_version = !black --version
449 env = %env var
450
451 would have been (respectively) transformed to
452
453 black_version = get_ipython().getoutput('black --version')
454 env = get_ipython().run_line_magic('env', 'var')
455
456 and we look for instances of any of the latter.
457 """
458 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
459 args = _get_str_args(node.value.args)
460 if node.value.func.attr == "getoutput":
461 src = f"!{args[0]}"
462 elif node.value.func.attr == "run_line_magic":
463 src = f"%{args[0]}"
464 if args[1]:
465 src += f" {args[1]}"
466 else:
467 raise AssertionError(
468 f"Unexpected IPython magic {node.value.func.attr!r} found. "
469 "Please report a bug on https://github.com/psf/black/issues."
470 ) from None
471 self.magics[node.value.lineno].append(
472 OffsetAndMagic(node.value.col_offset, src)
473 )
474 self.generic_visit(node)
475
476 def visit_Expr(self, node: ast.Expr) -> None:
477 """Look for magics in body of cell.
478
479 For examples,
480
481 !ls
482 !!ls
483 ?ls
484 ??ls
485
486 would (respectively) get transformed to
487
488 get_ipython().system('ls')
489 get_ipython().getoutput('ls')
490 get_ipython().run_line_magic('pinfo', 'ls')
491 get_ipython().run_line_magic('pinfo2', 'ls')
492
493 and we look for instances of any of the latter.
494 """
495 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
496 args = _get_str_args(node.value.args)
497 if node.value.func.attr == "run_line_magic":
498 if args[0] == "pinfo":
499 src = f"?{args[1]}"
500 elif args[0] == "pinfo2":
501 src = f"??{args[1]}"
502 else:
503 src = f"%{args[0]}"
504 if args[1]:
505 src += f" {args[1]}"
506 elif node.value.func.attr == "system":
507 src = f"!{args[0]}"
508 elif node.value.func.attr == "getoutput":
509 src = f"!!{args[0]}"
510 else:
511 raise NothingChanged # unsupported magic.
512 self.magics[node.value.lineno].append(
513 OffsetAndMagic(node.value.col_offset, src)
514 )
515 self.generic_visit(node)