Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/html.py: 32%

1"""HTML Exporter class"""

3# Copyright (c) Jupyter Development Team.

4# Distributed under the terms of the Modified BSD License.

6import base64

7import json

8import mimetypes

9import os

10from pathlib import Path

11from typing import Any, Optional

13import jinja2

14import markupsafe

15from bs4 import BeautifulSoup

16from jupyter_core.paths import jupyter_path

17from traitlets import Bool, Dict, Unicode, default, validate

18from traitlets.config import Config

20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):

21 from jinja2 import contextfilter # type:ignore[attr-defined]

22else:

23 from jinja2 import pass_context as contextfilter

25from jinja2.loaders import split_template_path

26from nbformat import NotebookNode

28from nbconvert.filters.highlight import Highlight2HTML

29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath

30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter

31from nbconvert.utils.iso639_1 import iso639_1

33from .templateexporter import TemplateExporter

36def find_lab_theme(theme_name):

37 """

38 Find a JupyterLab theme location by name.

40 Parameters

41 ----------

42 theme_name : str

43 The name of the labextension theme you want to find.

45 Raises

46 ------

47 ValueError

48 If the theme was not found, or if it was not specific enough.

50 Returns

51 -------

52 theme_name: str

53 Full theme name (with scope, if any)

54 labextension_path : Path

55 The path to the found labextension on the system.

56 """

57 paths = jupyter_path("labextensions")

59 matching_themes = []

60 theme_path = None

61 for path in paths:

62 for dirpath, dirnames, filenames in os.walk(path):

63 # If it's a federated labextension that contains themes

64 if "package.json" in filenames and "themes" in dirnames:

65 # TODO Find the theme name in the JS code instead?

66 # TODO Find if it's a light or dark theme?

67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj:

68 labext_name = json.loads(fobj.read())["name"]

70 if labext_name == theme_name or theme_name in labext_name.split("/"):

71 matching_themes.append(labext_name)

73 full_theme_name = labext_name

74 theme_path = Path(dirpath) / "themes" / labext_name

76 if len(matching_themes) == 0:

77 msg = f'Could not find lab theme "{theme_name}"'

78 raise ValueError(msg)

80 if len(matching_themes) > 1:

81 msg = (

82 f'Found multiple themes matching "{theme_name}": {matching_themes}. '

83 "Please be more specific about which theme you want to use."

84 )

85 raise ValueError(msg)

87 return full_theme_name, theme_path

90class HTMLExporter(TemplateExporter):

91 """

92 Exports a basic HTML document. This exporter assists with the export of

93 HTML. Inherit from it if you are writing your own HTML template and need

94 custom preprocessors/filters. If you don't need custom preprocessors/

95 filters, just change the 'template_file' config option.

96 """

98 export_from_notebook = "HTML"

100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag(

101 config=True

102 )

103

104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag(

105 config=True

106 )

107

108 require_js_url = Unicode(

109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js",

110 help="""

111 URL to load require.js from.

112

113 Defaults to loading from cdnjs.

114 """,

115 ).tag(config=True)

116

117 mathjax_url = Unicode(

118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe",

119 help="""

120 URL to load Mathjax from.

121

122 Defaults to loading from cdnjs.

123 """,

124 ).tag(config=True)

125

126 mermaid_js_url = Unicode(

127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/11.10.0/mermaid.esm.min.mjs",

128 help="""

129 URL to load MermaidJS from.

130

131 Defaults to loading from cdnjs.

132 """,

133 )

134

135 mermaid_layout_elk_js_url = Unicode(

136 "https://cdnjs.cloudflare.com/ajax/libs/mermaid-layout-elk/0.1.9/mermaid-layout-elk.esm.min.mjs",

137 help="""

138 URL to load MermaidJS ELK layout from.

139

140 Defaults to loading from cdnjs.

141 """,

142 )

143

144 jquery_url = Unicode(

145 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js",

146 help="""

147 URL to load jQuery from.

148

149 Defaults to loading from cdnjs.

150 """,

151 ).tag(config=True)

152

153 jupyter_widgets_base_url = Unicode(

154 "https://unpkg.com/", help="URL base for Jupyter widgets"

155 ).tag(config=True)

156

157 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True)

158

159 html_manager_semver_range = Unicode(

160 "*", help="Semver range for Jupyter widgets HTML manager"

161 ).tag(config=True)

162

163 @default("file_extension")

164 def _file_extension_default(self):

165 return ".html"

166

167 @default("template_name")

168 def _template_name_default(self):

169 return "lab"

170

171 theme = Unicode(

172 "light",

173 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",

174 ).tag(config=True)

175

176 sanitize_html = Bool(

177 False,

178 help=(

179 "Whether the HTML in Markdown cells and cell outputs should be sanitized."

180 "This should be set to True by nbviewer or similar tools."

181 ),

182 ).tag(config=True)

183

184 skip_svg_encoding = Bool(

185 False,

186 help=("Whether the svg to image data attribute encoding should occur"),

187 ).tag(config=True)

188

189 embed_images = Bool(

190 False, help="Whether or not to embed images as base64 in markdown cells."

191 ).tag(config=True)

192

193 output_mimetype = "text/html"

194

195 lexer_options = Dict(

196 {},

197 help=(

198 "Options to be passed to the pygments lexer for highlighting markdown code blocks. "

199 "See https://pygments.org/docs/lexers/#available-lexers for available options."

200 ),

201 ).tag(config=True)

202

203 @property

204 def default_config(self):

205 c = Config(

206 {

207 "NbConvertBase": {

208 "display_data_priority": [

209 "application/vnd.jupyter.widget-view+json",

210 "application/javascript",

211 "text/html",

212 "text/markdown",

213 "image/svg+xml",

214 "text/vnd.mermaid",

215 "text/latex",

216 "image/png",

217 "image/jpeg",

218 "text/plain",

219 ]

220 },

221 "HighlightMagicsPreprocessor": {"enabled": True},

222 }

223 )

224 if super().default_config:

225 c2 = super().default_config.copy()

226 c2.merge(c)

227 c = c2

228 return c

229

230 language_code = Unicode(

231 "en", help="Language code of the content, should be one of the ISO639-1"

232 ).tag(config=True)

233

234 @validate("language_code")

235 def _valid_language_code(self, proposal):

236 if self.language_code not in iso639_1:

237 self.log.warning(

238 '"%s" is not an ISO 639-1 language code. '

239 'It has been replaced by the default value "en".',

240 self.language_code,

241 )

242 return proposal["trait"].default_value

243 return proposal["value"]

244

245 @contextfilter

246 def markdown2html(self, context, source):

247 """Markdown to HTML filter respecting the anchor_link_text setting"""

248 cell = context.get("cell", {})

249 attachments = cell.get("attachments", {})

250 path = context.get("resources", {}).get("metadata", {}).get("path", "")

251

252 renderer = IPythonRenderer(

253 escape=False,

254 attachments=attachments,

255 embed_images=self.embed_images,

256 path=path,

257 anchor_link_text=self.anchor_link_text,

258 exclude_anchor_links=self.exclude_anchor_links,

259 **self.lexer_options,

260 )

261 return MarkdownWithMath(renderer=renderer).render(source)

262

263 def default_filters(self):

264 """Get the default filters."""

265 yield from super().default_filters()

266 yield ("markdown2html", self.markdown2html)

267

268 def from_notebook_node( # type:ignore[explicit-override, override]

269 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any

270 ) -> tuple[str, dict[str, Any]]:

271 """Convert from notebook node."""

272 langinfo = nb.metadata.get("language_info", {})

273 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))

274 highlight_code = self.filters.get(

275 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)

276 )

277

278 resources = self._init_resources(resources)

279

280 filter_data_type = WidgetsDataTypeFilter(

281 notebook_metadata=self._nb_metadata, parent=self, resources=resources

282 )

283

284 self.register_filter("highlight_code", highlight_code)

285 self.register_filter("filter_data_type", filter_data_type)

286 html, resources = super().from_notebook_node(nb, resources, **kw)

287 soup = BeautifulSoup(html, features="html.parser")

288 # Add image's alternative text

289 missing_alt = 0

290 for elem in soup.select("img:not([alt])"):

291 elem.attrs["alt"] = "No description has been provided for this image"

292 missing_alt += 1

293 if missing_alt:

294 self.log.warning("Alternative text is missing on %s image(s).", missing_alt)

295 # Set input and output focusable

296 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"):

297 elem.attrs["tabindex"] = "0"

298 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"):

299 elem.attrs["tabindex"] = "0"

300

301 return str(soup), resources

302

303 def _init_resources(self, resources):

304 def resources_include_css(name):

305 env = self.environment

306 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0])

307 return markupsafe.Markup(code)

308

309 def resources_include_lab_theme(name):

310 # Try to find the theme with the given name, looking through the labextensions

311 _, theme_path = find_lab_theme(name)

312

313 with open(theme_path / "index.css") as file:

314 data = file.read()

315

316 # Embed assets (fonts, images...)

317 for asset in os.listdir(theme_path):

318 local_url = f"url({Path(asset).as_posix()})"

319

320 if local_url in data:

321 mime_type = mimetypes.guess_type(asset)[0]

322

323 # Replace asset url by a base64 dataurl

324 with open(theme_path / asset, "rb") as assetfile:

325 base64_data = base64.b64encode(assetfile.read())

326 base64_str = base64_data.replace(b"\n", b"").decode("ascii")

327

328 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})")

329

330 code = """<style type="text/css">\n%s</style>""" % data

331 return markupsafe.Markup(code)

332

333 def resources_include_js(name, module=False):

334 """Get the resources include JS for a name. If module=True, import as ES module"""

335 env = self.environment

336 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>"""

337 return markupsafe.Markup(code)

338

339 def resources_include_url(name):

340 """Get the resources include url for a name."""

341 env = self.environment

342 mime_type, encoding = mimetypes.guess_type(name)

343 try:

344 # we try to load via the jinja loader, but that tries to load

345 # as (encoded) text

346 data = env.loader.get_source(env, name)[0].encode("utf8")

347 except UnicodeDecodeError:

348 # if that fails (for instance a binary file, png or ttf)

349 # we mimic jinja2

350 pieces = split_template_path(name)

351 for searchpath in self.template_paths:

352 filename = os.path.join(searchpath, *pieces)

353 if os.path.exists(filename):

354 with open(filename, "rb") as f:

355 data = f.read()

356 break

357 else:

358 msg = f"No file {name!r} found in {searchpath!r}"

359 raise ValueError(msg)

360 data = base64.b64encode(data)

361 data = data.replace(b"\n", b"").decode("ascii")

362 src = f"data:{mime_type};base64,{data}"

363 return markupsafe.Markup(src)

364

365 resources = super()._init_resources(resources)

366 resources["theme"] = self.theme

367 resources["include_css"] = resources_include_css

368 resources["include_lab_theme"] = resources_include_lab_theme

369 resources["include_js"] = resources_include_js

370 resources["include_url"] = resources_include_url

371 resources["require_js_url"] = self.require_js_url

372 resources["mathjax_url"] = self.mathjax_url

373 resources["mermaid_js_url"] = self.mermaid_js_url

374 resources["mermaid_layout_elk_js_url"] = self.mermaid_layout_elk_js_url

375 resources["jquery_url"] = self.jquery_url

376 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url

377 resources["widget_renderer_url"] = self.widget_renderer_url

378 resources["html_manager_semver_range"] = self.html_manager_semver_range

379 resources["should_sanitize_html"] = self.sanitize_html

380 resources["language_code"] = self.language_code

381 resources["should_not_encode_svg"] = self.skip_svg_encoding

382 return resources