1"""Functions to process IPython magics with."""
2
3import ast
4import collections
5import dataclasses
6import re
7import secrets
8from functools import lru_cache
9from importlib.util import find_spec
10from typing import TypeGuard
11
12from black.mode import Mode
13from black.output import out
14from black.report import NothingChanged
15
16TRANSFORMED_MAGICS = frozenset((
17 "get_ipython().run_cell_magic",
18 "get_ipython().system",
19 "get_ipython().getoutput",
20 "get_ipython().run_line_magic",
21))
22TOKENS_TO_IGNORE = frozenset((
23 "ENDMARKER",
24 "NL",
25 "NEWLINE",
26 "COMMENT",
27 "DEDENT",
28 "UNIMPORTANT_WS",
29 "ESCAPED_NL",
30))
31PYTHON_CELL_MAGICS = frozenset((
32 "capture",
33 "prun",
34 "pypy",
35 "python",
36 "python3",
37 "time",
38 "timeit",
39))
40
41
42@dataclasses.dataclass(frozen=True)
43class Replacement:
44 mask: str
45 src: str
46
47
48@lru_cache
49def jupyter_dependencies_are_installed(*, warn: bool) -> bool:
50 installed = (
51 find_spec("tokenize_rt") is not None and find_spec("IPython") is not None
52 )
53 if not installed and warn:
54 msg = (
55 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
56 'You can fix this by running ``pip install "black[jupyter]"``'
57 )
58 out(msg)
59 return installed
60
61
62def validate_cell(src: str, mode: Mode) -> None:
63 r"""Check that cell does not already contain TransformerManager transformations,
64 or non-Python cell magics, which might cause tokenizer_rt to break because of
65 indentations.
66
67 If a cell contains ``!ls``, then it'll be transformed to
68 ``get_ipython().system('ls')``. However, if the cell originally contained
69 ``get_ipython().system('ls')``, then it would get transformed in the same way:
70
71 >>> TransformerManager().transform_cell("get_ipython().system('ls')")
72 "get_ipython().system('ls')\n"
73 >>> TransformerManager().transform_cell("!ls")
74 "get_ipython().system('ls')\n"
75
76 Due to the impossibility of safely roundtripping in such situations, cells
77 containing transformed magics will be ignored.
78 """
79 if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
80 raise NothingChanged
81
82 line = _get_code_start(src)
83 if line.startswith("%%") and (
84 line.split(maxsplit=1)[0][2:]
85 not in PYTHON_CELL_MAGICS | mode.python_cell_magics
86 ):
87 raise NothingChanged
88
89
90def remove_trailing_semicolon(src: str) -> tuple[str, bool]:
91 """Remove trailing semicolon from Jupyter notebook cell.
92
93 For example,
94
95 fig, ax = plt.subplots()
96 ax.plot(x_data, y_data); # plot data
97
98 would become
99
100 fig, ax = plt.subplots()
101 ax.plot(x_data, y_data) # plot data
102
103 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
104 ``tokenize_rt`` so that round-tripping works fine.
105 """
106 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
107
108 tokens = src_to_tokens(src)
109 trailing_semicolon = False
110 for idx, token in reversed_enumerate(tokens):
111 if token.name in TOKENS_TO_IGNORE:
112 continue
113 if token.name == "OP" and token.src == ";":
114 del tokens[idx]
115 trailing_semicolon = True
116 break
117 if not trailing_semicolon:
118 return src, False
119 return tokens_to_src(tokens), True
120
121
122def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
123 """Put trailing semicolon back if cell originally had it.
124
125 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
126 ``tokenize_rt`` so that round-tripping works fine.
127 """
128 if not has_trailing_semicolon:
129 return src
130 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
131
132 tokens = src_to_tokens(src)
133 for idx, token in reversed_enumerate(tokens):
134 if token.name in TOKENS_TO_IGNORE:
135 continue
136 tokens[idx] = token._replace(src=token.src + ";")
137 break
138 else: # pragma: nocover
139 raise AssertionError(
140 "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
141 "Please report a bug on https://github.com/psf/black/issues. "
142 ) from None
143 return str(tokens_to_src(tokens))
144
145
146def mask_cell(src: str) -> tuple[str, list[Replacement]]:
147 """Mask IPython magics so content becomes parseable Python code.
148
149 For example,
150
151 %matplotlib inline
152 'foo'
153
154 becomes
155
156 b"25716f358c32750"
157 'foo'
158
159 The replacements are returned, along with the transformed code.
160 """
161 replacements: list[Replacement] = []
162 try:
163 ast.parse(src)
164 except SyntaxError:
165 # Might have IPython magics, will process below.
166 pass
167 else:
168 # Syntax is fine, nothing to mask, early return.
169 return src, replacements
170
171 from IPython.core.inputtransformer2 import TransformerManager
172
173 transformer_manager = TransformerManager()
174 # A side effect of the following transformation is that it also removes any
175 # empty lines at the beginning of the cell.
176 transformed = transformer_manager.transform_cell(src)
177 transformed, cell_magic_replacements = replace_cell_magics(transformed)
178 replacements += cell_magic_replacements
179 transformed = transformer_manager.transform_cell(transformed)
180 transformed, magic_replacements = replace_magics(transformed)
181 if len(transformed.strip().splitlines()) != len(src.strip().splitlines()):
182 # Multi-line magic, not supported.
183 raise NothingChanged
184 replacements += magic_replacements
185 return transformed, replacements
186
187
188def create_token(n_chars: int) -> str:
189 """Create a randomly generated token that is n_chars characters long."""
190 assert n_chars > 0
191 n_bytes = max(n_chars // 2 - 1, 1)
192 token = secrets.token_hex(n_bytes)
193 if len(token) + 3 > n_chars:
194 token = token[:-1]
195 # We use a bytestring so that the string does not get interpreted
196 # as a docstring.
197 return f'b"{token}"'
198
199
200def get_token(src: str, magic: str) -> str:
201 """Return randomly generated token to mask IPython magic with.
202
203 For example, if 'magic' was `%matplotlib inline`, then a possible
204 token to mask it with would be `"43fdd17f7e5ddc83"`. The token
205 will be the same length as the magic, and we make sure that it was
206 not already present anywhere else in the cell.
207 """
208 assert magic
209 n_chars = len(magic)
210 token = create_token(n_chars)
211 counter = 0
212 while token in src:
213 token = create_token(n_chars)
214 counter += 1
215 if counter > 100:
216 raise AssertionError(
217 "INTERNAL ERROR: Black was not able to replace IPython magic. "
218 "Please report a bug on https://github.com/psf/black/issues. "
219 f"The magic might be helpful: {magic}"
220 ) from None
221 return token
222
223
224def replace_cell_magics(src: str) -> tuple[str, list[Replacement]]:
225 r"""Replace cell magic with token.
226
227 Note that 'src' will already have been processed by IPython's
228 TransformerManager().transform_cell.
229
230 Example,
231
232 get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\n')
233
234 becomes
235
236 "a794."
237 ls =!ls
238
239 The replacement, along with the transformed code, is returned.
240 """
241 replacements: list[Replacement] = []
242
243 tree = ast.parse(src)
244
245 cell_magic_finder = CellMagicFinder()
246 cell_magic_finder.visit(tree)
247 if cell_magic_finder.cell_magic is None:
248 return src, replacements
249 header = cell_magic_finder.cell_magic.header
250 mask = get_token(src, header)
251 replacements.append(Replacement(mask=mask, src=header))
252 return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
253
254
255def replace_magics(src: str) -> tuple[str, list[Replacement]]:
256 """Replace magics within body of cell.
257
258 Note that 'src' will already have been processed by IPython's
259 TransformerManager().transform_cell.
260
261 Example, this
262
263 get_ipython().run_line_magic('matplotlib', 'inline')
264 'foo'
265
266 becomes
267
268 "5e67db56d490fd39"
269 'foo'
270
271 The replacement, along with the transformed code, are returned.
272 """
273 replacements = []
274 magic_finder = MagicFinder()
275 magic_finder.visit(ast.parse(src))
276 new_srcs = []
277 for i, line in enumerate(src.split("\n"), start=1):
278 if i in magic_finder.magics:
279 offsets_and_magics = magic_finder.magics[i]
280 if len(offsets_and_magics) != 1: # pragma: nocover
281 raise AssertionError(
282 f"Expecting one magic per line, got: {offsets_and_magics}\n"
283 "Please report a bug on https://github.com/psf/black/issues."
284 )
285 col_offset, magic = (
286 offsets_and_magics[0].col_offset,
287 offsets_and_magics[0].magic,
288 )
289 mask = get_token(src, magic)
290 replacements.append(Replacement(mask=mask, src=magic))
291 line = line[:col_offset] + mask
292 new_srcs.append(line)
293 return "\n".join(new_srcs), replacements
294
295
296def unmask_cell(src: str, replacements: list[Replacement]) -> str:
297 """Remove replacements from cell.
298
299 For example
300
301 "9b20"
302 foo = bar
303
304 becomes
305
306 %%time
307 foo = bar
308 """
309 for replacement in replacements:
310 src = src.replace(replacement.mask, replacement.src)
311 return src
312
313
314def _get_code_start(src: str) -> str:
315 """Provides the first line where the code starts.
316
317 Iterates over lines of code until it finds the first line that doesn't
318 contain only empty spaces and comments. It removes any empty spaces at the
319 start of the line and returns it. If such line doesn't exist, it returns an
320 empty string.
321 """
322 for match in re.finditer(".+", src):
323 line = match.group(0).lstrip()
324 if line and not line.startswith("#"):
325 return line
326 return ""
327
328
329def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
330 """Check if attribute is IPython magic.
331
332 Note that the source of the abstract syntax tree
333 will already have been processed by IPython's
334 TransformerManager().transform_cell.
335 """
336 return (
337 isinstance(node, ast.Attribute)
338 and isinstance(node.value, ast.Call)
339 and isinstance(node.value.func, ast.Name)
340 and node.value.func.id == "get_ipython"
341 )
342
343
344def _get_str_args(args: list[ast.expr]) -> list[str]:
345 str_args = []
346 for arg in args:
347 assert isinstance(arg, ast.Constant) and isinstance(arg.value, str)
348 str_args.append(arg.value)
349 return str_args
350
351
352@dataclasses.dataclass(frozen=True)
353class CellMagic:
354 name: str
355 params: str | None
356 body: str
357
358 @property
359 def header(self) -> str:
360 if self.params:
361 return f"%%{self.name} {self.params}"
362 return f"%%{self.name}"
363
364
365# ast.NodeVisitor + dataclass = breakage under mypyc.
366class CellMagicFinder(ast.NodeVisitor):
367 r"""Find cell magics.
368
369 Note that the source of the abstract syntax tree
370 will already have been processed by IPython's
371 TransformerManager().transform_cell.
372
373 For example,
374
375 %%time\n
376 foo()
377
378 would have been transformed to
379
380 get_ipython().run_cell_magic('time', '', 'foo()\n')
381
382 and we look for instances of the latter.
383 """
384
385 def __init__(self, cell_magic: CellMagic | None = None) -> None:
386 self.cell_magic = cell_magic
387
388 def visit_Expr(self, node: ast.Expr) -> None:
389 """Find cell magic, extract header and body."""
390 if (
391 isinstance(node.value, ast.Call)
392 and _is_ipython_magic(node.value.func)
393 and node.value.func.attr == "run_cell_magic"
394 ):
395 args = _get_str_args(node.value.args)
396 self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
397 self.generic_visit(node)
398
399
400@dataclasses.dataclass(frozen=True)
401class OffsetAndMagic:
402 col_offset: int
403 magic: str
404
405
406# Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
407# as mypyc will generate broken code.
408class MagicFinder(ast.NodeVisitor):
409 """Visit cell to look for get_ipython calls.
410
411 Note that the source of the abstract syntax tree
412 will already have been processed by IPython's
413 TransformerManager().transform_cell.
414
415 For example,
416
417 %matplotlib inline
418
419 would have been transformed to
420
421 get_ipython().run_line_magic('matplotlib', 'inline')
422
423 and we look for instances of the latter (and likewise for other
424 types of magics).
425 """
426
427 def __init__(self) -> None:
428 self.magics: dict[int, list[OffsetAndMagic]] = collections.defaultdict(list)
429
430 def visit_Assign(self, node: ast.Assign) -> None:
431 """Look for system assign magics.
432
433 For example,
434
435 black_version = !black --version
436 env = %env var
437
438 would have been (respectively) transformed to
439
440 black_version = get_ipython().getoutput('black --version')
441 env = get_ipython().run_line_magic('env', 'var')
442
443 and we look for instances of any of the latter.
444 """
445 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
446 args = _get_str_args(node.value.args)
447 if node.value.func.attr == "getoutput":
448 src = f"!{args[0]}"
449 elif node.value.func.attr == "run_line_magic":
450 src = f"%{args[0]}"
451 if args[1]:
452 src += f" {args[1]}"
453 else:
454 raise AssertionError(
455 f"Unexpected IPython magic {node.value.func.attr!r} found. "
456 "Please report a bug on https://github.com/psf/black/issues."
457 ) from None
458 self.magics[node.value.lineno].append(
459 OffsetAndMagic(node.value.col_offset, src)
460 )
461 self.generic_visit(node)
462
463 def visit_Expr(self, node: ast.Expr) -> None:
464 """Look for magics in body of cell.
465
466 For examples,
467
468 !ls
469 !!ls
470 ?ls
471 ??ls
472
473 would (respectively) get transformed to
474
475 get_ipython().system('ls')
476 get_ipython().getoutput('ls')
477 get_ipython().run_line_magic('pinfo', 'ls')
478 get_ipython().run_line_magic('pinfo2', 'ls')
479
480 and we look for instances of any of the latter.
481 """
482 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
483 args = _get_str_args(node.value.args)
484 if node.value.func.attr == "run_line_magic":
485 if args[0] == "pinfo":
486 src = f"?{args[1]}"
487 elif args[0] == "pinfo2":
488 src = f"??{args[1]}"
489 else:
490 src = f"%{args[0]}"
491 if args[1]:
492 src += f" {args[1]}"
493 elif node.value.func.attr == "system":
494 src = f"!{args[0]}"
495 elif node.value.func.attr == "getoutput":
496 src = f"!!{args[0]}"
497 else:
498 raise NothingChanged # unsupported magic.
499 self.magics[node.value.lineno].append(
500 OffsetAndMagic(node.value.col_offset, src)
501 )
502 self.generic_visit(node)