1"""This module defines TemplateExporter, a highly configurable converter
2that uses Jinja2 to export notebook files into different formats.
3"""
4
5# Copyright (c) IPython Development Team.
6# Distributed under the terms of the Modified BSD License.
7from __future__ import annotations
8
9import html
10import json
11import os
12import typing as t
13import uuid
14import warnings
15from pathlib import Path
16
17from jinja2 import (
18 BaseLoader,
19 ChoiceLoader,
20 DictLoader,
21 Environment,
22 FileSystemLoader,
23 TemplateNotFound,
24)
25from jupyter_core.paths import jupyter_path
26from nbformat import NotebookNode
27from traitlets import Bool, Dict, HasTraits, List, Unicode, default, observe, validate
28from traitlets.config import Config
29from traitlets.utils.importstring import import_item
30
31from nbconvert import filters
32
33from .exporter import Exporter
34
35# Jinja2 extensions to load.
36JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"]
37
38ROOT = os.path.dirname(__file__)
39DEV_MODE = os.path.exists(os.path.join(ROOT, "../../.git"))
40
41
42default_filters = {
43 "indent": filters.indent,
44 "markdown2html": filters.markdown2html,
45 "markdown2asciidoc": filters.markdown2asciidoc,
46 "ansi2html": filters.ansi2html,
47 "filter_data_type": filters.DataTypeFilter,
48 "get_lines": filters.get_lines,
49 "highlight2html": filters.Highlight2HTML,
50 "highlight2latex": filters.Highlight2Latex,
51 "ipython2python": filters.ipython2python,
52 "posix_path": filters.posix_path,
53 "markdown2latex": filters.markdown2latex,
54 "markdown2rst": filters.markdown2rst,
55 "comment_lines": filters.comment_lines,
56 "strip_ansi": filters.strip_ansi,
57 "strip_dollars": filters.strip_dollars,
58 "strip_files_prefix": filters.strip_files_prefix,
59 "html2text": filters.html2text,
60 "add_anchor": filters.add_anchor,
61 "ansi2latex": filters.ansi2latex,
62 "wrap_text": filters.wrap_text,
63 "escape_latex": filters.escape_latex,
64 "citation2latex": filters.citation2latex,
65 "path2url": filters.path2url,
66 "add_prompts": filters.add_prompts,
67 "ascii_only": filters.ascii_only,
68 "prevent_list_blocks": filters.prevent_list_blocks,
69 "get_metadata": filters.get_metadata,
70 "convert_pandoc": filters.convert_pandoc,
71 "json_dumps": json.dumps,
72 # For removing any HTML
73 "escape_html": lambda s: html.escape(str(s)),
74 "escape_html_keep_quotes": lambda s: html.escape(str(s), quote=False),
75 "escape_html_script": lambda s: s.replace("/", "\\/"),
76 # For sanitizing HTML for any XSS
77 "clean_html": filters.clean_html,
78 "strip_trailing_newline": filters.strip_trailing_newline,
79 "text_base64": filters.text_base64,
80}
81
82
83# copy of https://github.com/jupyter/jupyter_server/blob/b62458a7f5ad6b5246d2f142258dedaa409de5d9/jupyter_server/config_manager.py#L19
84def recursive_update(target, new):
85 """Recursively update one dictionary using another.
86 None values will delete their keys.
87 """
88 for k, v in new.items():
89 if isinstance(v, dict):
90 if k not in target:
91 target[k] = {}
92 recursive_update(target[k], v)
93 if not target[k]:
94 # Prune empty subdicts
95 del target[k]
96
97 elif v is None:
98 target.pop(k, None)
99
100 else:
101 target[k] = v
102 return target # return for convenience
103
104
105# define function at the top level to avoid pickle errors
106def deprecated(msg):
107 """Emit a deprecation warning."""
108 warnings.warn(msg, DeprecationWarning, stacklevel=2)
109
110
111class ExtensionTolerantLoader(BaseLoader):
112 """A template loader which optionally adds a given extension when searching.
113
114 Constructor takes two arguments: *loader* is another Jinja loader instance
115 to wrap. *extension* is the extension, which will be added to the template
116 name if finding the template without it fails. This should include the dot,
117 e.g. '.tpl'.
118 """
119
120 def __init__(self, loader, extension):
121 """Initialize the loader."""
122 self.loader = loader
123 self.extension = extension
124
125 def get_source(self, environment, template):
126 """Get the source for a template."""
127 try:
128 return self.loader.get_source(environment, template)
129 except TemplateNotFound:
130 if template.endswith(self.extension):
131 raise TemplateNotFound(template) from None
132 return self.loader.get_source(environment, template + self.extension)
133
134 def list_templates(self):
135 """List available templates."""
136 return self.loader.list_templates()
137
138
139class TemplateExporter(Exporter):
140 """
141 Exports notebooks into other file formats. Uses Jinja 2 templating engine
142 to output new formats. Inherit from this class if you are creating a new
143 template type along with new filters/preprocessors. If the filters/
144 preprocessors provided by default suffice, there is no need to inherit from
145 this class. Instead, override the template_file and file_extension
146 traits via a config file.
147
148 Filters available by default for templates:
149
150 {filters}
151 """
152
153 # finish the docstring
154 __doc__ = (
155 __doc__.format(filters="- " + "\n - ".join(sorted(default_filters.keys())))
156 if __doc__
157 else None
158 )
159
160 _template_cached = None
161
162 def _invalidate_template_cache(self, change=None):
163 self._template_cached = None
164
165 @property
166 def template(self):
167 if self._template_cached is None:
168 self._template_cached = self._load_template()
169 return self._template_cached
170
171 _environment_cached = None
172
173 def _invalidate_environment_cache(self, change=None):
174 self._environment_cached = None
175 self._invalidate_template_cache()
176
177 @property
178 def environment(self):
179 if self._environment_cached is None:
180 self._environment_cached = self._create_environment()
181 return self._environment_cached
182
183 @property
184 def default_config(self):
185 c = Config(
186 {
187 "RegexRemovePreprocessor": {"enabled": True},
188 "TagRemovePreprocessor": {"enabled": True},
189 }
190 )
191 if super().default_config:
192 c2 = super().default_config.copy()
193 c2.merge(c)
194 c = c2
195 return c
196
197 template_name = Unicode(help="Name of the template to use").tag(
198 config=True, affects_template=True
199 )
200
201 template_file = Unicode(None, allow_none=True, help="Name of the template file to use").tag(
202 config=True, affects_template=True
203 )
204
205 raw_template = Unicode("", help="raw template string").tag(affects_environment=True)
206
207 enable_async = Bool(False, help="Enable Jinja async template execution").tag(
208 affects_environment=True
209 )
210
211 _last_template_file = ""
212 _raw_template_key = "<memory>"
213
214 @validate("template_name")
215 def _template_name_validate(self, change):
216 template_name = change["value"]
217 if template_name and template_name.endswith(".tpl"):
218 warnings.warn(
219 f"5.x style template name passed '{self.template_name}'. Use --template-name for the template directory with a index.<ext>.j2 file and/or --template-file to denote a different template.",
220 DeprecationWarning,
221 stacklevel=2,
222 )
223 directory, self.template_file = os.path.split(self.template_name)
224 if directory:
225 directory, template_name = os.path.split(directory)
226 if directory and os.path.isabs(directory):
227 self.extra_template_basedirs = [directory]
228 return template_name
229
230 @observe("template_file")
231 def _template_file_changed(self, change):
232 new = change["new"]
233 if new == "default":
234 self.template_file = self.default_template # type:ignore[attr-defined]
235 return
236 # check if template_file is a file path
237 # rather than a name already on template_path
238 full_path = os.path.abspath(new)
239 if os.path.isfile(full_path):
240 directory, self.template_file = os.path.split(full_path)
241 self.extra_template_paths = [directory, *self.extra_template_paths]
242 # While not strictly an invalid template file name, the extension hints that there isn't a template directory involved
243 if self.template_file and self.template_file.endswith(".tpl"):
244 warnings.warn(
245 f"5.x style template file passed '{new}'. Use --template-name for the template directory with a index.<ext>.j2 file and/or --template-file to denote a different template.",
246 DeprecationWarning,
247 stacklevel=2,
248 )
249
250 @default("template_file")
251 def _template_file_default(self):
252 if self.template_extension:
253 return "index" + self.template_extension
254 return None
255
256 @observe("raw_template")
257 def _raw_template_changed(self, change):
258 if not change["new"]:
259 self.template_file = self._last_template_file
260 self._invalidate_template_cache()
261
262 template_paths = List(["."]).tag(config=True, affects_environment=True)
263 extra_template_basedirs = List(Unicode()).tag(config=True, affects_environment=True)
264 extra_template_paths = List(Unicode()).tag(config=True, affects_environment=True)
265
266 @default("extra_template_basedirs")
267 def _default_extra_template_basedirs(self):
268 return [os.getcwd()]
269
270 # Extension that the template files use.
271 template_extension = Unicode().tag(config=True, affects_environment=True)
272
273 template_data_paths = List(
274 jupyter_path("nbconvert", "templates"), help="Path where templates can be installed too."
275 ).tag(affects_environment=True)
276
277 @default("template_extension")
278 def _template_extension_default(self):
279 if self.file_extension:
280 return self.file_extension + ".j2"
281 return self.file_extension
282
283 exclude_input = Bool(
284 False, help="This allows you to exclude code cell inputs from all templates if set to True."
285 ).tag(config=True)
286
287 exclude_input_prompt = Bool(
288 False, help="This allows you to exclude input prompts from all templates if set to True."
289 ).tag(config=True)
290
291 exclude_output = Bool(
292 False,
293 help="This allows you to exclude code cell outputs from all templates if set to True.",
294 ).tag(config=True)
295
296 exclude_output_prompt = Bool(
297 False, help="This allows you to exclude output prompts from all templates if set to True."
298 ).tag(config=True)
299
300 exclude_output_stdin = Bool(
301 True,
302 help="This allows you to exclude output of stdin stream from lab template if set to True.",
303 ).tag(config=True)
304
305 exclude_code_cell = Bool(
306 False, help="This allows you to exclude code cells from all templates if set to True."
307 ).tag(config=True)
308
309 exclude_markdown = Bool(
310 False, help="This allows you to exclude markdown cells from all templates if set to True."
311 ).tag(config=True)
312
313 exclude_raw = Bool(
314 False, help="This allows you to exclude raw cells from all templates if set to True."
315 ).tag(config=True)
316
317 exclude_unknown = Bool(
318 False, help="This allows you to exclude unknown cells from all templates if set to True."
319 ).tag(config=True)
320
321 extra_loaders: List[t.Any] = List(
322 help="Jinja loaders to find templates. Will be tried in order "
323 "before the default FileSystem ones.",
324 ).tag(affects_environment=True)
325
326 filters = Dict(
327 help="""Dictionary of filters, by name and namespace, to add to the Jinja
328 environment."""
329 ).tag(config=True, affects_environment=True)
330
331 raw_mimetypes = List(
332 Unicode(), help="""formats of raw cells to be included in this Exporter's output."""
333 ).tag(config=True)
334
335 @default("raw_mimetypes")
336 def _raw_mimetypes_default(self):
337 return [self.output_mimetype, ""]
338
339 # TODO: passing config is wrong, but changing this revealed more complicated issues
340 def __init__(self, config=None, **kw):
341 """
342 Public constructor
343
344 Parameters
345 ----------
346 config : config
347 User configuration instance.
348 extra_loaders : list[of Jinja Loaders]
349 ordered list of Jinja loader to find templates. Will be tried in order
350 before the default FileSystem ones.
351 template_file : str (optional, kw arg)
352 Template to use when exporting.
353 """
354 super().__init__(config=config, **kw)
355
356 self.observe(
357 self._invalidate_environment_cache, list(self.traits(affects_environment=True))
358 )
359 self.observe(self._invalidate_template_cache, list(self.traits(affects_template=True)))
360
361 def _load_template(self):
362 """Load the Jinja template object from the template file
363
364 This is triggered by various trait changes that would change the template.
365 """
366
367 # this gives precedence to a raw_template if present
368 with self.hold_trait_notifications():
369 if self.template_file and (self.template_file != self._raw_template_key):
370 self._last_template_file = self.template_file
371 if self.raw_template:
372 self.template_file = self._raw_template_key
373
374 if not self.template_file:
375 msg = "No template_file specified!"
376 raise ValueError(msg)
377
378 # First try to load the
379 # template by name with extension added, then try loading the template
380 # as if the name is explicitly specified.
381 template_file = self.template_file
382 self.log.debug("Attempting to load template %s", template_file)
383 self.log.debug(" template_paths: %s", os.pathsep.join(self.template_paths))
384 return self.environment.get_template(template_file)
385
386 def from_filename( # type:ignore[override]
387 self, filename: str, resources: dict[str, t.Any] | None = None, **kw: t.Any
388 ) -> tuple[str, dict[str, t.Any]]:
389 """Convert a notebook from a filename."""
390 return super().from_filename(filename, resources, **kw) # type:ignore[return-value]
391
392 def from_file( # type:ignore[override]
393 self, file_stream: t.Any, resources: dict[str, t.Any] | None = None, **kw: t.Any
394 ) -> tuple[str, dict[str, t.Any]]:
395 """Convert a notebook from a file."""
396 return super().from_file(file_stream, resources, **kw) # type:ignore[return-value]
397
398 def from_notebook_node( # type:ignore[explicit-override, override]
399 self, nb: NotebookNode, resources: dict[str, t.Any] | None = None, **kw: t.Any
400 ) -> tuple[str, dict[str, t.Any]]:
401 """
402 Convert a notebook from a notebook node instance.
403
404 Parameters
405 ----------
406 nb : :class:`~nbformat.NotebookNode`
407 Notebook node
408 resources : dict
409 Additional resources that can be accessed read/write by
410 preprocessors and filters.
411 """
412 nb_copy, resources = super().from_notebook_node(nb, resources, **kw)
413 resources.setdefault("raw_mimetypes", self.raw_mimetypes)
414 resources.setdefault("output_mimetype", self.output_mimetype)
415 resources["global_content_filter"] = {
416 "include_code": not self.exclude_code_cell,
417 "include_markdown": not self.exclude_markdown,
418 "include_raw": not self.exclude_raw,
419 "include_unknown": not self.exclude_unknown,
420 "include_input": not self.exclude_input,
421 "include_output": not self.exclude_output,
422 "include_output_stdin": not self.exclude_output_stdin,
423 "include_input_prompt": not self.exclude_input_prompt,
424 "include_output_prompt": not self.exclude_output_prompt,
425 "no_prompt": self.exclude_input_prompt and self.exclude_output_prompt,
426 }
427
428 # Top level variables are passed to the template_exporter here.
429 output = self.template.render(nb=nb_copy, resources=resources)
430 output = output.lstrip("\r\n")
431 return output, resources
432
433 def _register_filter(self, environ, name, jinja_filter):
434 """
435 Register a filter.
436 A filter is a function that accepts and acts on one string.
437 The filters are accessible within the Jinja templating engine.
438
439 Parameters
440 ----------
441 name : str
442 name to give the filter in the Jinja engine
443 filter : filter
444 """
445 if jinja_filter is None:
446 msg = "filter"
447 raise TypeError(msg)
448 isclass = isinstance(jinja_filter, type)
449 constructed = not isclass
450
451 # Handle filter's registration based on it's type
452 if constructed and isinstance(jinja_filter, (str,)):
453 # filter is a string, import the namespace and recursively call
454 # this register_filter method
455 filter_cls = import_item(jinja_filter)
456 return self._register_filter(environ, name, filter_cls)
457
458 if constructed and callable(jinja_filter):
459 # filter is a function, no need to construct it.
460 environ.filters[name] = jinja_filter
461 return jinja_filter
462
463 if isclass and issubclass(jinja_filter, HasTraits):
464 # filter is configurable. Make sure to pass in new default for
465 # the enabled flag if one was specified.
466 filter_instance = jinja_filter(parent=self)
467 self._register_filter(environ, name, filter_instance)
468 return None
469
470 if isclass:
471 # filter is not configurable, construct it
472 filter_instance = jinja_filter()
473 self._register_filter(environ, name, filter_instance)
474 return None
475
476 # filter is an instance of something without a __call__
477 # attribute.
478 msg = "filter"
479 raise TypeError(msg)
480
481 def register_filter(self, name, jinja_filter):
482 """
483 Register a filter.
484 A filter is a function that accepts and acts on one string.
485 The filters are accessible within the Jinja templating engine.
486
487 Parameters
488 ----------
489 name : str
490 name to give the filter in the Jinja engine
491 filter : filter
492 """
493 return self._register_filter(self.environment, name, jinja_filter)
494
495 def default_filters(self):
496 """Override in subclasses to provide extra filters.
497
498 This should return an iterable of 2-tuples: (name, class-or-function).
499 You should call the method on the parent class and include the filters
500 it provides.
501
502 If a name is repeated, the last filter provided wins. Filters from
503 user-supplied config win over filters provided by classes.
504 """
505 return default_filters.items()
506
507 def _create_environment(self):
508 """
509 Create the Jinja templating environment.
510 """
511 paths = self.template_paths
512 self.log.debug("Template paths:\n\t%s", "\n\t".join(paths))
513
514 loaders = [
515 *self.extra_loaders,
516 ExtensionTolerantLoader(FileSystemLoader(paths), self.template_extension),
517 DictLoader({self._raw_template_key: self.raw_template}),
518 ]
519 environment = Environment( # noqa: S701
520 loader=ChoiceLoader(loaders),
521 extensions=JINJA_EXTENSIONS,
522 enable_async=self.enable_async,
523 )
524
525 environment.globals["uuid4"] = uuid.uuid4
526
527 # Add default filters to the Jinja2 environment
528 for key, value in self.default_filters():
529 self._register_filter(environment, key, value)
530
531 # Load user filters. Overwrite existing filters if need be.
532 if self.filters:
533 for key, user_filter in self.filters.items():
534 self._register_filter(environment, key, user_filter)
535
536 return environment
537
538 def _init_preprocessors(self):
539 super()._init_preprocessors()
540 conf = self._get_conf()
541 preprocessors = conf.get("preprocessors", {})
542 # preprocessors is a dict for three reasons
543 # * We rely on recursive_update, which can only merge dicts, lists will be overwritten
544 # * We can use the key with numerical prefixing to guarantee ordering (/etc/*.d/XY-file style)
545 # * We can disable preprocessors by overwriting the value with None
546 for _, preprocessor in sorted(preprocessors.items(), key=lambda x: x[0]):
547 if preprocessor is not None:
548 kwargs = preprocessor.copy()
549 preprocessor_cls = kwargs.pop("type")
550 preprocessor_cls = import_item(preprocessor_cls)
551 if preprocessor_cls.__name__ in self.config:
552 kwargs.update(self.config[preprocessor_cls.__name__])
553 preprocessor = preprocessor_cls(**kwargs) # noqa: PLW2901
554 self.register_preprocessor(preprocessor)
555
556 def _get_conf(self):
557 conf: dict[str, t.Any] = {} # the configuration once all conf files are merged
558 for path in map(Path, self.template_paths):
559 conf_path = path / "conf.json"
560 if conf_path.exists():
561 with conf_path.open() as f:
562 conf = recursive_update(conf, json.load(f))
563 return conf
564
565 @default("template_paths")
566 def _template_paths(self, prune=True, root_dirs=None):
567 paths = []
568 root_dirs = self.get_prefix_root_dirs()
569 template_names = self.get_template_names()
570 for template_name in template_names:
571 for base_dir in self.extra_template_basedirs:
572 path = os.path.join(base_dir, template_name)
573 try:
574 if not prune or os.path.exists(path):
575 paths.append(path)
576 except PermissionError:
577 pass
578 for root_dir in root_dirs:
579 base_dir = os.path.join(root_dir, "nbconvert", "templates")
580 path = os.path.join(base_dir, template_name)
581 try:
582 if not prune or os.path.exists(path):
583 paths.append(path)
584 except PermissionError:
585 pass
586
587 for root_dir in root_dirs:
588 # we include root_dir for when we want to be very explicit, e.g.
589 # {% extends 'nbconvert/templates/classic/base.html' %}
590 paths.append(root_dir)
591 # we include base_dir for when we want to be explicit, but less than root_dir, e.g.
592 # {% extends 'classic/base.html' %}
593 base_dir = os.path.join(root_dir, "nbconvert", "templates")
594 paths.append(base_dir)
595
596 compatibility_dir = os.path.join(root_dir, "nbconvert", "templates", "compatibility")
597 paths.append(compatibility_dir)
598
599 additional_paths = []
600 for path in self.template_data_paths:
601 if not prune or os.path.exists(path):
602 additional_paths.append(path)
603
604 return paths + self.extra_template_paths + additional_paths
605
606 @classmethod
607 def get_compatibility_base_template_conf(cls, name):
608 """Get the base template config."""
609 # Hard-coded base template confs to use for backwards compatibility for 5.x-only templates
610 if name == "display_priority":
611 return {"base_template": "base"}
612 if name == "full":
613 return {"base_template": "classic", "mimetypes": {"text/html": True}}
614 return None
615
616 def get_template_names(self):
617 """Finds a list of template names where each successive template name is the base template"""
618 template_names = []
619 root_dirs = self.get_prefix_root_dirs()
620 base_template: str | None = self.template_name
621 merged_conf: dict[str, t.Any] = {} # the configuration once all conf files are merged
622 while base_template is not None:
623 template_names.append(base_template)
624 conf: dict[str, t.Any] = {}
625 found_at_least_one = False
626 for base_dir in self.extra_template_basedirs:
627 template_dir = os.path.join(base_dir, base_template)
628 if os.path.exists(template_dir):
629 found_at_least_one = True
630 conf_file = os.path.join(template_dir, "conf.json")
631 if os.path.exists(conf_file):
632 with open(conf_file) as f:
633 conf = recursive_update(json.load(f), conf)
634 for root_dir in root_dirs:
635 template_dir = os.path.join(root_dir, "nbconvert", "templates", base_template)
636 if os.path.exists(template_dir):
637 found_at_least_one = True
638 conf_file = os.path.join(template_dir, "conf.json")
639 if os.path.exists(conf_file):
640 with open(conf_file) as f:
641 conf = recursive_update(json.load(f), conf)
642 if not found_at_least_one:
643 # Check for backwards compatibility template names
644 for root_dir in root_dirs:
645 compatibility_file = base_template + ".tpl"
646 compatibility_path = os.path.join(
647 root_dir, "nbconvert", "templates", "compatibility", compatibility_file
648 )
649 if os.path.exists(compatibility_path):
650 found_at_least_one = True
651 warnings.warn(
652 f"5.x template name passed '{self.template_name}'. Use 'lab' or 'classic' for new template usage.",
653 DeprecationWarning,
654 stacklevel=2,
655 )
656 self.template_file = compatibility_file
657 conf = self.get_compatibility_base_template_conf(base_template)
658 self.template_name = t.cast(str, conf.get("base_template"))
659 break
660 if not found_at_least_one:
661 paths = "\n\t".join(root_dirs)
662 msg = f"No template sub-directory with name {base_template!r} found in the following paths:\n\t{paths}"
663 raise ValueError(msg)
664 merged_conf = recursive_update(dict(conf), merged_conf)
665 base_template = t.cast(t.Any, conf.get("base_template"))
666 conf = merged_conf
667 mimetypes = [mimetype for mimetype, enabled in conf.get("mimetypes", {}).items() if enabled]
668 if self.output_mimetype and self.output_mimetype not in mimetypes and mimetypes:
669 supported_mimetypes = "\n\t".join(mimetypes)
670 msg = f"Unsupported mimetype {self.output_mimetype!r} for template {self.template_name!r}, mimetypes supported are: \n\t{supported_mimetypes}"
671 raise ValueError(msg)
672 return template_names
673
674 def get_prefix_root_dirs(self):
675 """Get the prefix root dirs."""
676 # We look at the usual jupyter locations, and for development purposes also
677 # relative to the package directory (first entry, meaning with highest precedence)
678 root_dirs = []
679 if DEV_MODE:
680 root_dirs.append(os.path.abspath(os.path.join(ROOT, "..", "..", "share", "jupyter")))
681 root_dirs.extend(jupyter_path())
682 return root_dirs
683
684 def _init_resources(self, resources):
685 resources = super()._init_resources(resources)
686 resources["deprecated"] = deprecated
687 return resources