1"""HTML Exporter class"""
2
3# Copyright (c) Jupyter Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import base64
7import json
8import mimetypes
9import os
10from pathlib import Path
11from typing import Any, Optional
12
13import jinja2
14import markupsafe
15from bs4 import BeautifulSoup
16from jupyter_core.paths import jupyter_path
17from traitlets import Bool, Dict, Unicode, default, validate
18from traitlets.config import Config
19
20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):
21 from jinja2 import contextfilter # type:ignore[attr-defined]
22else:
23 from jinja2 import pass_context as contextfilter
24
25from jinja2.loaders import split_template_path
26from nbformat import NotebookNode
27
28from nbconvert.filters.highlight import Highlight2HTML
29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter
31from nbconvert.utils.iso639_1 import iso639_1
32
33from .templateexporter import TemplateExporter
34
35
36def find_lab_theme(theme_name):
37 """
38 Find a JupyterLab theme location by name.
39
40 Parameters
41 ----------
42 theme_name : str
43 The name of the labextension theme you want to find.
44
45 Raises
46 ------
47 ValueError
48 If the theme was not found, or if it was not specific enough.
49
50 Returns
51 -------
52 theme_name: str
53 Full theme name (with scope, if any)
54 labextension_path : Path
55 The path to the found labextension on the system.
56 """
57 paths = jupyter_path("labextensions")
58
59 matching_themes = []
60 theme_path = None
61 for path in paths:
62 for dirpath, dirnames, filenames in os.walk(path):
63 # If it's a federated labextension that contains themes
64 if "package.json" in filenames and "themes" in dirnames:
65 # TODO Find the theme name in the JS code instead?
66 # TODO Find if it's a light or dark theme?
67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj:
68 labext_name = json.loads(fobj.read())["name"]
69
70 if labext_name == theme_name or theme_name in labext_name.split("/"):
71 matching_themes.append(labext_name)
72
73 full_theme_name = labext_name
74 theme_path = Path(dirpath) / "themes" / labext_name
75
76 if len(matching_themes) == 0:
77 msg = f'Could not find lab theme "{theme_name}"'
78 raise ValueError(msg)
79
80 if len(matching_themes) > 1:
81 msg = (
82 f'Found multiple themes matching "{theme_name}": {matching_themes}. '
83 "Please be more specific about which theme you want to use."
84 )
85 raise ValueError(msg)
86
87 return full_theme_name, theme_path
88
89
90class HTMLExporter(TemplateExporter):
91 """
92 Exports a basic HTML document. This exporter assists with the export of
93 HTML. Inherit from it if you are writing your own HTML template and need
94 custom preprocessors/filters. If you don't need custom preprocessors/
95 filters, just change the 'template_file' config option.
96 """
97
98 export_from_notebook = "HTML"
99
100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag(
101 config=True
102 )
103
104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag(
105 config=True
106 )
107
108 require_js_url = Unicode(
109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js",
110 help="""
111 URL to load require.js from.
112
113 Defaults to loading from cdnjs.
114 """,
115 ).tag(config=True)
116
117 mathjax_url = Unicode(
118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe",
119 help="""
120 URL to load Mathjax from.
121
122 Defaults to loading from cdnjs.
123 """,
124 ).tag(config=True)
125
126 mermaid_js_url = Unicode(
127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.7.0/mermaid.esm.min.mjs",
128 help="""
129 URL to load MermaidJS from.
130
131 Defaults to loading from cdnjs.
132 """,
133 )
134
135 jquery_url = Unicode(
136 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js",
137 help="""
138 URL to load jQuery from.
139
140 Defaults to loading from cdnjs.
141 """,
142 ).tag(config=True)
143
144 jupyter_widgets_base_url = Unicode(
145 "https://unpkg.com/", help="URL base for Jupyter widgets"
146 ).tag(config=True)
147
148 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True)
149
150 html_manager_semver_range = Unicode(
151 "*", help="Semver range for Jupyter widgets HTML manager"
152 ).tag(config=True)
153
154 @default("file_extension")
155 def _file_extension_default(self):
156 return ".html"
157
158 @default("template_name")
159 def _template_name_default(self):
160 return "lab"
161
162 theme = Unicode(
163 "light",
164 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",
165 ).tag(config=True)
166
167 sanitize_html = Bool(
168 False,
169 help=(
170 "Whether the HTML in Markdown cells and cell outputs should be sanitized."
171 "This should be set to True by nbviewer or similar tools."
172 ),
173 ).tag(config=True)
174
175 skip_svg_encoding = Bool(
176 False,
177 help=("Whether the svg to image data attribute encoding should occur"),
178 ).tag(config=True)
179
180 embed_images = Bool(
181 False, help="Whether or not to embed images as base64 in markdown cells."
182 ).tag(config=True)
183
184 output_mimetype = "text/html"
185
186 lexer_options = Dict(
187 {},
188 help=(
189 "Options to be passed to the pygments lexer for highlighting markdown code blocks. "
190 "See https://pygments.org/docs/lexers/#available-lexers for available options."
191 ),
192 ).tag(config=True)
193
194 @property
195 def default_config(self):
196 c = Config(
197 {
198 "NbConvertBase": {
199 "display_data_priority": [
200 "application/vnd.jupyter.widget-view+json",
201 "application/javascript",
202 "text/html",
203 "text/markdown",
204 "image/svg+xml",
205 "text/vnd.mermaid",
206 "text/latex",
207 "image/png",
208 "image/jpeg",
209 "text/plain",
210 ]
211 },
212 "HighlightMagicsPreprocessor": {"enabled": True},
213 }
214 )
215 if super().default_config:
216 c2 = super().default_config.copy()
217 c2.merge(c)
218 c = c2
219 return c
220
221 language_code = Unicode(
222 "en", help="Language code of the content, should be one of the ISO639-1"
223 ).tag(config=True)
224
225 @validate("language_code")
226 def _valid_language_code(self, proposal):
227 if self.language_code not in iso639_1:
228 self.log.warning(
229 '"%s" is not an ISO 639-1 language code. '
230 'It has been replaced by the default value "en".',
231 self.language_code,
232 )
233 return proposal["trait"].default_value
234 return proposal["value"]
235
236 @contextfilter
237 def markdown2html(self, context, source):
238 """Markdown to HTML filter respecting the anchor_link_text setting"""
239 cell = context.get("cell", {})
240 attachments = cell.get("attachments", {})
241 path = context.get("resources", {}).get("metadata", {}).get("path", "")
242
243 renderer = IPythonRenderer(
244 escape=False,
245 attachments=attachments,
246 embed_images=self.embed_images,
247 path=path,
248 anchor_link_text=self.anchor_link_text,
249 exclude_anchor_links=self.exclude_anchor_links,
250 **self.lexer_options,
251 )
252 return MarkdownWithMath(renderer=renderer).render(source)
253
254 def default_filters(self):
255 """Get the default filters."""
256 yield from super().default_filters()
257 yield ("markdown2html", self.markdown2html)
258
259 def from_notebook_node( # type:ignore[explicit-override, override]
260 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any
261 ) -> tuple[str, dict[str, Any]]:
262 """Convert from notebook node."""
263 langinfo = nb.metadata.get("language_info", {})
264 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
265 highlight_code = self.filters.get(
266 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)
267 )
268
269 resources = self._init_resources(resources)
270
271 filter_data_type = WidgetsDataTypeFilter(
272 notebook_metadata=self._nb_metadata, parent=self, resources=resources
273 )
274
275 self.register_filter("highlight_code", highlight_code)
276 self.register_filter("filter_data_type", filter_data_type)
277 html, resources = super().from_notebook_node(nb, resources, **kw)
278 soup = BeautifulSoup(html, features="html.parser")
279 # Add image's alternative text
280 missing_alt = 0
281 for elem in soup.select("img:not([alt])"):
282 elem.attrs["alt"] = "No description has been provided for this image"
283 missing_alt += 1
284 if missing_alt:
285 self.log.warning("Alternative text is missing on %s image(s).", missing_alt)
286 # Set input and output focusable
287 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"):
288 elem.attrs["tabindex"] = "0"
289 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"):
290 elem.attrs["tabindex"] = "0"
291
292 return str(soup), resources
293
294 def _init_resources(self, resources):
295 def resources_include_css(name):
296 env = self.environment
297 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0])
298 return markupsafe.Markup(code)
299
300 def resources_include_lab_theme(name):
301 # Try to find the theme with the given name, looking through the labextensions
302 _, theme_path = find_lab_theme(name)
303
304 with open(theme_path / "index.css") as file:
305 data = file.read()
306
307 # Embed assets (fonts, images...)
308 for asset in os.listdir(theme_path):
309 local_url = f"url({Path(asset).as_posix()})"
310
311 if local_url in data:
312 mime_type = mimetypes.guess_type(asset)[0]
313
314 # Replace asset url by a base64 dataurl
315 with open(theme_path / asset, "rb") as assetfile:
316 base64_data = base64.b64encode(assetfile.read())
317 base64_str = base64_data.replace(b"\n", b"").decode("ascii")
318
319 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})")
320
321 code = """<style type="text/css">\n%s</style>""" % data
322 return markupsafe.Markup(code)
323
324 def resources_include_js(name, module=False):
325 """Get the resources include JS for a name. If module=True, import as ES module"""
326 env = self.environment
327 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>"""
328 return markupsafe.Markup(code)
329
330 def resources_include_url(name):
331 """Get the resources include url for a name."""
332 env = self.environment
333 mime_type, encoding = mimetypes.guess_type(name)
334 try:
335 # we try to load via the jinja loader, but that tries to load
336 # as (encoded) text
337 data = env.loader.get_source(env, name)[0].encode("utf8")
338 except UnicodeDecodeError:
339 # if that fails (for instance a binary file, png or ttf)
340 # we mimic jinja2
341 pieces = split_template_path(name)
342 for searchpath in self.template_paths:
343 filename = os.path.join(searchpath, *pieces)
344 if os.path.exists(filename):
345 with open(filename, "rb") as f:
346 data = f.read()
347 break
348 else:
349 msg = f"No file {name!r} found in {searchpath!r}"
350 raise ValueError(msg)
351 data = base64.b64encode(data)
352 data = data.replace(b"\n", b"").decode("ascii")
353 src = f"data:{mime_type};base64,{data}"
354 return markupsafe.Markup(src)
355
356 resources = super()._init_resources(resources)
357 resources["theme"] = self.theme
358 resources["include_css"] = resources_include_css
359 resources["include_lab_theme"] = resources_include_lab_theme
360 resources["include_js"] = resources_include_js
361 resources["include_url"] = resources_include_url
362 resources["require_js_url"] = self.require_js_url
363 resources["mathjax_url"] = self.mathjax_url
364 resources["mermaid_js_url"] = self.mermaid_js_url
365 resources["jquery_url"] = self.jquery_url
366 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url
367 resources["widget_renderer_url"] = self.widget_renderer_url
368 resources["html_manager_semver_range"] = self.html_manager_semver_range
369 resources["should_sanitize_html"] = self.sanitize_html
370 resources["language_code"] = self.language_code
371 resources["should_not_encode_svg"] = self.skip_svg_encoding
372 return resources