1"""Functions to process IPython magics with."""
2
3import ast
4import collections
5import dataclasses
6import re
7import secrets
8import sys
9from functools import lru_cache
10from importlib.util import find_spec
11from typing import Optional
12
13if sys.version_info >= (3, 10):
14 from typing import TypeGuard
15else:
16 from typing_extensions import TypeGuard
17
18from black.mode import Mode
19from black.output import out
20from black.report import NothingChanged
21
22TRANSFORMED_MAGICS = frozenset((
23 "get_ipython().run_cell_magic",
24 "get_ipython().system",
25 "get_ipython().getoutput",
26 "get_ipython().run_line_magic",
27))
28TOKENS_TO_IGNORE = frozenset((
29 "ENDMARKER",
30 "NL",
31 "NEWLINE",
32 "COMMENT",
33 "DEDENT",
34 "UNIMPORTANT_WS",
35 "ESCAPED_NL",
36))
37PYTHON_CELL_MAGICS = frozenset((
38 "capture",
39 "prun",
40 "pypy",
41 "python",
42 "python3",
43 "time",
44 "timeit",
45))
46
47
48@dataclasses.dataclass(frozen=True)
49class Replacement:
50 mask: str
51 src: str
52
53
54@lru_cache
55def jupyter_dependencies_are_installed(*, warn: bool) -> bool:
56 installed = (
57 find_spec("tokenize_rt") is not None and find_spec("IPython") is not None
58 )
59 if not installed and warn:
60 msg = (
61 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
62 'You can fix this by running ``pip install "black[jupyter]"``'
63 )
64 out(msg)
65 return installed
66
67
68def validate_cell(src: str, mode: Mode) -> None:
69 """Check that cell does not already contain TransformerManager transformations,
70 or non-Python cell magics, which might cause tokenizer_rt to break because of
71 indentations.
72
73 If a cell contains ``!ls``, then it'll be transformed to
74 ``get_ipython().system('ls')``. However, if the cell originally contained
75 ``get_ipython().system('ls')``, then it would get transformed in the same way:
76
77 >>> TransformerManager().transform_cell("get_ipython().system('ls')")
78 "get_ipython().system('ls')\n"
79 >>> TransformerManager().transform_cell("!ls")
80 "get_ipython().system('ls')\n"
81
82 Due to the impossibility of safely roundtripping in such situations, cells
83 containing transformed magics will be ignored.
84 """
85 if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
86 raise NothingChanged
87
88 line = _get_code_start(src)
89 if line.startswith("%%") and (
90 line.split(maxsplit=1)[0][2:]
91 not in PYTHON_CELL_MAGICS | mode.python_cell_magics
92 ):
93 raise NothingChanged
94
95
96def remove_trailing_semicolon(src: str) -> tuple[str, bool]:
97 """Remove trailing semicolon from Jupyter notebook cell.
98
99 For example,
100
101 fig, ax = plt.subplots()
102 ax.plot(x_data, y_data); # plot data
103
104 would become
105
106 fig, ax = plt.subplots()
107 ax.plot(x_data, y_data) # plot data
108
109 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
110 ``tokenize_rt`` so that round-tripping works fine.
111 """
112 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
113
114 tokens = src_to_tokens(src)
115 trailing_semicolon = False
116 for idx, token in reversed_enumerate(tokens):
117 if token.name in TOKENS_TO_IGNORE:
118 continue
119 if token.name == "OP" and token.src == ";":
120 del tokens[idx]
121 trailing_semicolon = True
122 break
123 if not trailing_semicolon:
124 return src, False
125 return tokens_to_src(tokens), True
126
127
128def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
129 """Put trailing semicolon back if cell originally had it.
130
131 Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
132 ``tokenize_rt`` so that round-tripping works fine.
133 """
134 if not has_trailing_semicolon:
135 return src
136 from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
137
138 tokens = src_to_tokens(src)
139 for idx, token in reversed_enumerate(tokens):
140 if token.name in TOKENS_TO_IGNORE:
141 continue
142 tokens[idx] = token._replace(src=token.src + ";")
143 break
144 else: # pragma: nocover
145 raise AssertionError(
146 "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
147 "Please report a bug on https://github.com/psf/black/issues. "
148 ) from None
149 return str(tokens_to_src(tokens))
150
151
152def mask_cell(src: str) -> tuple[str, list[Replacement]]:
153 """Mask IPython magics so content becomes parseable Python code.
154
155 For example,
156
157 %matplotlib inline
158 'foo'
159
160 becomes
161
162 b"25716f358c32750"
163 'foo'
164
165 The replacements are returned, along with the transformed code.
166 """
167 replacements: list[Replacement] = []
168 try:
169 ast.parse(src)
170 except SyntaxError:
171 # Might have IPython magics, will process below.
172 pass
173 else:
174 # Syntax is fine, nothing to mask, early return.
175 return src, replacements
176
177 from IPython.core.inputtransformer2 import TransformerManager
178
179 transformer_manager = TransformerManager()
180 # A side effect of the following transformation is that it also removes any
181 # empty lines at the beginning of the cell.
182 transformed = transformer_manager.transform_cell(src)
183 transformed, cell_magic_replacements = replace_cell_magics(transformed)
184 replacements += cell_magic_replacements
185 transformed = transformer_manager.transform_cell(transformed)
186 transformed, magic_replacements = replace_magics(transformed)
187 if len(transformed.strip().splitlines()) != len(src.strip().splitlines()):
188 # Multi-line magic, not supported.
189 raise NothingChanged
190 replacements += magic_replacements
191 return transformed, replacements
192
193
194def create_token(n_chars: int) -> str:
195 """Create a randomly generated token that is n_chars characters long."""
196 assert n_chars > 0
197 n_bytes = max(n_chars // 2 - 1, 1)
198 token = secrets.token_hex(n_bytes)
199 if len(token) + 3 > n_chars:
200 token = token[:-1]
201 # We use a bytestring so that the string does not get interpreted
202 # as a docstring.
203 return f'b"{token}"'
204
205
206def get_token(src: str, magic: str) -> str:
207 """Return randomly generated token to mask IPython magic with.
208
209 For example, if 'magic' was `%matplotlib inline`, then a possible
210 token to mask it with would be `"43fdd17f7e5ddc83"`. The token
211 will be the same length as the magic, and we make sure that it was
212 not already present anywhere else in the cell.
213 """
214 assert magic
215 n_chars = len(magic)
216 token = create_token(n_chars)
217 counter = 0
218 while token in src:
219 token = create_token(n_chars)
220 counter += 1
221 if counter > 100:
222 raise AssertionError(
223 "INTERNAL ERROR: Black was not able to replace IPython magic. "
224 "Please report a bug on https://github.com/psf/black/issues. "
225 f"The magic might be helpful: {magic}"
226 ) from None
227 return token
228
229
230def replace_cell_magics(src: str) -> tuple[str, list[Replacement]]:
231 """Replace cell magic with token.
232
233 Note that 'src' will already have been processed by IPython's
234 TransformerManager().transform_cell.
235
236 Example,
237
238 get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
239
240 becomes
241
242 "a794."
243 ls =!ls
244
245 The replacement, along with the transformed code, is returned.
246 """
247 replacements: list[Replacement] = []
248
249 tree = ast.parse(src)
250
251 cell_magic_finder = CellMagicFinder()
252 cell_magic_finder.visit(tree)
253 if cell_magic_finder.cell_magic is None:
254 return src, replacements
255 header = cell_magic_finder.cell_magic.header
256 mask = get_token(src, header)
257 replacements.append(Replacement(mask=mask, src=header))
258 return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
259
260
261def replace_magics(src: str) -> tuple[str, list[Replacement]]:
262 """Replace magics within body of cell.
263
264 Note that 'src' will already have been processed by IPython's
265 TransformerManager().transform_cell.
266
267 Example, this
268
269 get_ipython().run_line_magic('matplotlib', 'inline')
270 'foo'
271
272 becomes
273
274 "5e67db56d490fd39"
275 'foo'
276
277 The replacement, along with the transformed code, are returned.
278 """
279 replacements = []
280 magic_finder = MagicFinder()
281 magic_finder.visit(ast.parse(src))
282 new_srcs = []
283 for i, line in enumerate(src.split("\n"), start=1):
284 if i in magic_finder.magics:
285 offsets_and_magics = magic_finder.magics[i]
286 if len(offsets_and_magics) != 1: # pragma: nocover
287 raise AssertionError(
288 f"Expecting one magic per line, got: {offsets_and_magics}\n"
289 "Please report a bug on https://github.com/psf/black/issues."
290 )
291 col_offset, magic = (
292 offsets_and_magics[0].col_offset,
293 offsets_and_magics[0].magic,
294 )
295 mask = get_token(src, magic)
296 replacements.append(Replacement(mask=mask, src=magic))
297 line = line[:col_offset] + mask
298 new_srcs.append(line)
299 return "\n".join(new_srcs), replacements
300
301
302def unmask_cell(src: str, replacements: list[Replacement]) -> str:
303 """Remove replacements from cell.
304
305 For example
306
307 "9b20"
308 foo = bar
309
310 becomes
311
312 %%time
313 foo = bar
314 """
315 for replacement in replacements:
316 src = src.replace(replacement.mask, replacement.src)
317 return src
318
319
320def _get_code_start(src: str) -> str:
321 """Provides the first line where the code starts.
322
323 Iterates over lines of code until it finds the first line that doesn't
324 contain only empty spaces and comments. It removes any empty spaces at the
325 start of the line and returns it. If such line doesn't exist, it returns an
326 empty string.
327 """
328 for match in re.finditer(".+", src):
329 line = match.group(0).lstrip()
330 if line and not line.startswith("#"):
331 return line
332 return ""
333
334
335def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
336 """Check if attribute is IPython magic.
337
338 Note that the source of the abstract syntax tree
339 will already have been processed by IPython's
340 TransformerManager().transform_cell.
341 """
342 return (
343 isinstance(node, ast.Attribute)
344 and isinstance(node.value, ast.Call)
345 and isinstance(node.value.func, ast.Name)
346 and node.value.func.id == "get_ipython"
347 )
348
349
350def _get_str_args(args: list[ast.expr]) -> list[str]:
351 str_args = []
352 for arg in args:
353 assert isinstance(arg, ast.Constant) and isinstance(arg.value, str)
354 str_args.append(arg.value)
355 return str_args
356
357
358@dataclasses.dataclass(frozen=True)
359class CellMagic:
360 name: str
361 params: Optional[str]
362 body: str
363
364 @property
365 def header(self) -> str:
366 if self.params:
367 return f"%%{self.name} {self.params}"
368 return f"%%{self.name}"
369
370
371# ast.NodeVisitor + dataclass = breakage under mypyc.
372class CellMagicFinder(ast.NodeVisitor):
373 """Find cell magics.
374
375 Note that the source of the abstract syntax tree
376 will already have been processed by IPython's
377 TransformerManager().transform_cell.
378
379 For example,
380
381 %%time\n
382 foo()
383
384 would have been transformed to
385
386 get_ipython().run_cell_magic('time', '', 'foo()\\n')
387
388 and we look for instances of the latter.
389 """
390
391 def __init__(self, cell_magic: Optional[CellMagic] = None) -> None:
392 self.cell_magic = cell_magic
393
394 def visit_Expr(self, node: ast.Expr) -> None:
395 """Find cell magic, extract header and body."""
396 if (
397 isinstance(node.value, ast.Call)
398 and _is_ipython_magic(node.value.func)
399 and node.value.func.attr == "run_cell_magic"
400 ):
401 args = _get_str_args(node.value.args)
402 self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
403 self.generic_visit(node)
404
405
406@dataclasses.dataclass(frozen=True)
407class OffsetAndMagic:
408 col_offset: int
409 magic: str
410
411
412# Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
413# as mypyc will generate broken code.
414class MagicFinder(ast.NodeVisitor):
415 """Visit cell to look for get_ipython calls.
416
417 Note that the source of the abstract syntax tree
418 will already have been processed by IPython's
419 TransformerManager().transform_cell.
420
421 For example,
422
423 %matplotlib inline
424
425 would have been transformed to
426
427 get_ipython().run_line_magic('matplotlib', 'inline')
428
429 and we look for instances of the latter (and likewise for other
430 types of magics).
431 """
432
433 def __init__(self) -> None:
434 self.magics: dict[int, list[OffsetAndMagic]] = collections.defaultdict(list)
435
436 def visit_Assign(self, node: ast.Assign) -> None:
437 """Look for system assign magics.
438
439 For example,
440
441 black_version = !black --version
442 env = %env var
443
444 would have been (respectively) transformed to
445
446 black_version = get_ipython().getoutput('black --version')
447 env = get_ipython().run_line_magic('env', 'var')
448
449 and we look for instances of any of the latter.
450 """
451 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
452 args = _get_str_args(node.value.args)
453 if node.value.func.attr == "getoutput":
454 src = f"!{args[0]}"
455 elif node.value.func.attr == "run_line_magic":
456 src = f"%{args[0]}"
457 if args[1]:
458 src += f" {args[1]}"
459 else:
460 raise AssertionError(
461 f"Unexpected IPython magic {node.value.func.attr!r} found. "
462 "Please report a bug on https://github.com/psf/black/issues."
463 ) from None
464 self.magics[node.value.lineno].append(
465 OffsetAndMagic(node.value.col_offset, src)
466 )
467 self.generic_visit(node)
468
469 def visit_Expr(self, node: ast.Expr) -> None:
470 """Look for magics in body of cell.
471
472 For examples,
473
474 !ls
475 !!ls
476 ?ls
477 ??ls
478
479 would (respectively) get transformed to
480
481 get_ipython().system('ls')
482 get_ipython().getoutput('ls')
483 get_ipython().run_line_magic('pinfo', 'ls')
484 get_ipython().run_line_magic('pinfo2', 'ls')
485
486 and we look for instances of any of the latter.
487 """
488 if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
489 args = _get_str_args(node.value.args)
490 if node.value.func.attr == "run_line_magic":
491 if args[0] == "pinfo":
492 src = f"?{args[1]}"
493 elif args[0] == "pinfo2":
494 src = f"??{args[1]}"
495 else:
496 src = f"%{args[0]}"
497 if args[1]:
498 src += f" {args[1]}"
499 elif node.value.func.attr == "system":
500 src = f"!{args[0]}"
501 elif node.value.func.attr == "getoutput":
502 src = f"!!{args[0]}"
503 else:
504 raise NothingChanged # unsupported magic.
505 self.magics[node.value.lineno].append(
506 OffsetAndMagic(node.value.col_offset, src)
507 )
508 self.generic_visit(node)