1"""HTML Exporter class"""
2
3# Copyright (c) Jupyter Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import base64
7import json
8import mimetypes
9import os
10from pathlib import Path
11from typing import Any, Optional
12
13import jinja2
14import markupsafe
15from bs4 import BeautifulSoup
16from jupyter_core.paths import jupyter_path
17from traitlets import Bool, Dict, Unicode, default, validate
18from traitlets.config import Config
19
20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):
21 from jinja2 import contextfilter # type:ignore[attr-defined]
22else:
23 from jinja2 import pass_context as contextfilter
24
25from jinja2.loaders import split_template_path
26from nbformat import NotebookNode
27
28from nbconvert.filters.highlight import Highlight2HTML
29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter
31from nbconvert.utils.iso639_1 import iso639_1
32
33from .templateexporter import TemplateExporter
34
35
36def find_lab_theme(theme_name):
37 """
38 Find a JupyterLab theme location by name.
39
40 Parameters
41 ----------
42 theme_name : str
43 The name of the labextension theme you want to find.
44
45 Raises
46 ------
47 ValueError
48 If the theme was not found, or if it was not specific enough.
49
50 Returns
51 -------
52 theme_name: str
53 Full theme name (with scope, if any)
54 labextension_path : Path
55 The path to the found labextension on the system.
56 """
57 paths = jupyter_path("labextensions")
58
59 matching_themes = []
60 theme_path = None
61 for path in paths:
62 for dirpath, dirnames, filenames in os.walk(path):
63 # If it's a federated labextension that contains themes
64 if "package.json" in filenames and "themes" in dirnames:
65 # TODO Find the theme name in the JS code instead?
66 # TODO Find if it's a light or dark theme?
67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj:
68 labext_name = json.loads(fobj.read())["name"]
69
70 if labext_name == theme_name or theme_name in labext_name.split("/"):
71 matching_themes.append(labext_name)
72
73 full_theme_name = labext_name
74 theme_path = Path(dirpath) / "themes" / labext_name
75
76 if len(matching_themes) == 0:
77 msg = f'Could not find lab theme "{theme_name}"'
78 raise ValueError(msg)
79
80 if len(matching_themes) > 1:
81 msg = (
82 f'Found multiple themes matching "{theme_name}": {matching_themes}. '
83 "Please be more specific about which theme you want to use."
84 )
85 raise ValueError(msg)
86
87 return full_theme_name, theme_path
88
89
90class HTMLExporter(TemplateExporter):
91 """
92 Exports a basic HTML document. This exporter assists with the export of
93 HTML. Inherit from it if you are writing your own HTML template and need
94 custom preprocessors/filters. If you don't need custom preprocessors/
95 filters, just change the 'template_file' config option.
96 """
97
98 export_from_notebook = "HTML"
99
100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag(
101 config=True
102 )
103
104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag(
105 config=True
106 )
107
108 require_js_url = Unicode(
109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js",
110 help="""
111 URL to load require.js from.
112
113 Defaults to loading from cdnjs.
114 """,
115 ).tag(config=True)
116
117 mathjax_url = Unicode(
118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe",
119 help="""
120 URL to load Mathjax from.
121
122 Defaults to loading from cdnjs.
123 """,
124 ).tag(config=True)
125
126 mermaid_js_url = Unicode(
127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/11.10.0/mermaid.esm.min.mjs",
128 help="""
129 URL to load MermaidJS from.
130
131 Defaults to loading from cdnjs.
132 """,
133 )
134
135 mermaid_layout_elk_js_url = Unicode(
136 "https://cdnjs.cloudflare.com/ajax/libs/mermaid-layout-elk/0.1.9/mermaid-layout-elk.esm.min.mjs",
137 help="""
138 URL to load MermaidJS ELK layout from.
139
140 Defaults to loading from cdnjs.
141 """,
142 )
143
144 jquery_url = Unicode(
145 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js",
146 help="""
147 URL to load jQuery from.
148
149 Defaults to loading from cdnjs.
150 """,
151 ).tag(config=True)
152
153 jupyter_widgets_base_url = Unicode(
154 "https://unpkg.com/", help="URL base for Jupyter widgets"
155 ).tag(config=True)
156
157 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True)
158
159 html_manager_semver_range = Unicode(
160 "*", help="Semver range for Jupyter widgets HTML manager"
161 ).tag(config=True)
162
163 @default("file_extension")
164 def _file_extension_default(self):
165 return ".html"
166
167 @default("template_name")
168 def _template_name_default(self):
169 return "lab"
170
171 theme = Unicode(
172 "light",
173 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",
174 ).tag(config=True)
175
176 sanitize_html = Bool(
177 False,
178 help=(
179 "Whether the HTML in Markdown cells and cell outputs should be sanitized."
180 "This should be set to True by nbviewer or similar tools."
181 ),
182 ).tag(config=True)
183
184 skip_svg_encoding = Bool(
185 False,
186 help=("Whether the svg to image data attribute encoding should occur"),
187 ).tag(config=True)
188
189 embed_images = Bool(
190 False, help="Whether or not to embed images as base64 in markdown cells."
191 ).tag(config=True)
192
193 output_mimetype = "text/html"
194
195 lexer_options = Dict(
196 {},
197 help=(
198 "Options to be passed to the pygments lexer for highlighting markdown code blocks. "
199 "See https://pygments.org/docs/lexers/#available-lexers for available options."
200 ),
201 ).tag(config=True)
202
203 @property
204 def default_config(self):
205 c = Config(
206 {
207 "NbConvertBase": {
208 "display_data_priority": [
209 "application/vnd.jupyter.widget-view+json",
210 "application/javascript",
211 "text/html",
212 "text/markdown",
213 "image/svg+xml",
214 "text/vnd.mermaid",
215 "text/latex",
216 "image/png",
217 "image/jpeg",
218 "text/plain",
219 ]
220 },
221 "HighlightMagicsPreprocessor": {"enabled": True},
222 }
223 )
224 if super().default_config:
225 c2 = super().default_config.copy()
226 c2.merge(c)
227 c = c2
228 return c
229
230 language_code = Unicode(
231 "en", help="Language code of the content, should be one of the ISO639-1"
232 ).tag(config=True)
233
234 @validate("language_code")
235 def _valid_language_code(self, proposal):
236 if self.language_code not in iso639_1:
237 self.log.warning(
238 '"%s" is not an ISO 639-1 language code. '
239 'It has been replaced by the default value "en".',
240 self.language_code,
241 )
242 return proposal["trait"].default_value
243 return proposal["value"]
244
245 @contextfilter
246 def markdown2html(self, context, source):
247 """Markdown to HTML filter respecting the anchor_link_text setting"""
248 cell = context.get("cell", {})
249 attachments = cell.get("attachments", {})
250 path = context.get("resources", {}).get("metadata", {}).get("path", "")
251
252 renderer = IPythonRenderer(
253 escape=False,
254 attachments=attachments,
255 embed_images=self.embed_images,
256 path=path,
257 anchor_link_text=self.anchor_link_text,
258 exclude_anchor_links=self.exclude_anchor_links,
259 **self.lexer_options,
260 )
261 return MarkdownWithMath(renderer=renderer).render(source)
262
263 def default_filters(self):
264 """Get the default filters."""
265 yield from super().default_filters()
266 yield ("markdown2html", self.markdown2html)
267
268 def from_notebook_node( # type:ignore[explicit-override, override]
269 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any
270 ) -> tuple[str, dict[str, Any]]:
271 """Convert from notebook node."""
272 langinfo = nb.metadata.get("language_info", {})
273 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
274 highlight_code = self.filters.get(
275 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)
276 )
277
278 resources = self._init_resources(resources)
279
280 filter_data_type = WidgetsDataTypeFilter(
281 notebook_metadata=self._nb_metadata, parent=self, resources=resources
282 )
283
284 self.register_filter("highlight_code", highlight_code)
285 self.register_filter("filter_data_type", filter_data_type)
286 html, resources = super().from_notebook_node(nb, resources, **kw)
287 soup = BeautifulSoup(html, features="html.parser")
288 # Add image's alternative text
289 missing_alt = 0
290 for elem in soup.select("img:not([alt])"):
291 elem.attrs["alt"] = "No description has been provided for this image"
292 missing_alt += 1
293 if missing_alt:
294 self.log.warning("Alternative text is missing on %s image(s).", missing_alt)
295 # Set input and output focusable
296 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"):
297 elem.attrs["tabindex"] = "0"
298 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"):
299 elem.attrs["tabindex"] = "0"
300
301 return str(soup), resources
302
303 def _init_resources(self, resources):
304 def resources_include_css(name):
305 env = self.environment
306 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0])
307 return markupsafe.Markup(code)
308
309 def resources_include_lab_theme(name):
310 # Try to find the theme with the given name, looking through the labextensions
311 _, theme_path = find_lab_theme(name)
312
313 with open(theme_path / "index.css") as file:
314 data = file.read()
315
316 # Embed assets (fonts, images...)
317 for asset in os.listdir(theme_path):
318 local_url = f"url({Path(asset).as_posix()})"
319
320 if local_url in data:
321 mime_type = mimetypes.guess_type(asset)[0]
322
323 # Replace asset url by a base64 dataurl
324 with open(theme_path / asset, "rb") as assetfile:
325 base64_data = base64.b64encode(assetfile.read())
326 base64_str = base64_data.replace(b"\n", b"").decode("ascii")
327
328 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})")
329
330 code = """<style type="text/css">\n%s</style>""" % data
331 return markupsafe.Markup(code)
332
333 def resources_include_js(name, module=False):
334 """Get the resources include JS for a name. If module=True, import as ES module"""
335 env = self.environment
336 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>"""
337 return markupsafe.Markup(code)
338
339 def resources_include_url(name):
340 """Get the resources include url for a name."""
341 env = self.environment
342 mime_type, encoding = mimetypes.guess_type(name)
343 try:
344 # we try to load via the jinja loader, but that tries to load
345 # as (encoded) text
346 data = env.loader.get_source(env, name)[0].encode("utf8")
347 except UnicodeDecodeError:
348 # if that fails (for instance a binary file, png or ttf)
349 # we mimic jinja2
350 pieces = split_template_path(name)
351 for searchpath in self.template_paths:
352 filename = os.path.join(searchpath, *pieces)
353 if os.path.exists(filename):
354 with open(filename, "rb") as f:
355 data = f.read()
356 break
357 else:
358 msg = f"No file {name!r} found in {searchpath!r}"
359 raise ValueError(msg)
360 data = base64.b64encode(data)
361 data = data.replace(b"\n", b"").decode("ascii")
362 src = f"data:{mime_type};base64,{data}"
363 return markupsafe.Markup(src)
364
365 resources = super()._init_resources(resources)
366 resources["theme"] = self.theme
367 resources["include_css"] = resources_include_css
368 resources["include_lab_theme"] = resources_include_lab_theme
369 resources["include_js"] = resources_include_js
370 resources["include_url"] = resources_include_url
371 resources["require_js_url"] = self.require_js_url
372 resources["mathjax_url"] = self.mathjax_url
373 resources["mermaid_js_url"] = self.mermaid_js_url
374 resources["mermaid_layout_elk_js_url"] = self.mermaid_layout_elk_js_url
375 resources["jquery_url"] = self.jquery_url
376 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url
377 resources["widget_renderer_url"] = self.widget_renderer_url
378 resources["html_manager_semver_range"] = self.html_manager_semver_range
379 resources["should_sanitize_html"] = self.sanitize_html
380 resources["language_code"] = self.language_code
381 resources["should_not_encode_svg"] = self.skip_svg_encoding
382 return resources