Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/html.py: 32%

1"""HTML Exporter class"""

3# Copyright (c) Jupyter Development Team.

4# Distributed under the terms of the Modified BSD License.

6import base64

7import json

8import mimetypes

9import os

10from pathlib import Path

11from typing import Any, Optional

13import jinja2

14import markupsafe

15from bs4 import BeautifulSoup

16from jupyter_core.paths import jupyter_path

17from traitlets import Bool, Dict, Unicode, default, validate

18from traitlets.config import Config

20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):

21 from jinja2 import contextfilter # type:ignore[attr-defined]

22else:

23 from jinja2 import pass_context as contextfilter

25from jinja2.loaders import split_template_path

26from nbformat import NotebookNode

28from nbconvert.filters.highlight import Highlight2HTML

29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath

30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter

31from nbconvert.utils.iso639_1 import iso639_1

33from .templateexporter import TemplateExporter

36def find_lab_theme(theme_name):

37 """

38 Find a JupyterLab theme location by name.

40 Parameters

41 ----------

42 theme_name : str

43 The name of the labextension theme you want to find.

45 Raises

46 ------

47 ValueError

48 If the theme was not found, or if it was not specific enough.

50 Returns

51 -------

52 theme_name: str

53 Full theme name (with scope, if any)

54 labextension_path : Path

55 The path to the found labextension on the system.

56 """

57 paths = jupyter_path("labextensions")

59 matching_themes = []

60 theme_path = None

61 for path in paths:

62 for dirpath, dirnames, filenames in os.walk(path):

63 # If it's a federated labextension that contains themes

64 if "package.json" in filenames and "themes" in dirnames:

65 # TODO Find the theme name in the JS code instead?

66 # TODO Find if it's a light or dark theme?

67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj:

68 labext_name = json.loads(fobj.read())["name"]

70 if labext_name == theme_name or theme_name in labext_name.split("/"):

71 matching_themes.append(labext_name)

73 full_theme_name = labext_name

74 theme_path = Path(dirpath) / "themes" / labext_name

76 if len(matching_themes) == 0:

77 msg = f'Could not find lab theme "{theme_name}"'

78 raise ValueError(msg)

80 if len(matching_themes) > 1:

81 msg = (

82 f'Found multiple themes matching "{theme_name}": {matching_themes}. '

83 "Please be more specific about which theme you want to use."

84 )

85 raise ValueError(msg)

87 return full_theme_name, theme_path

90class HTMLExporter(TemplateExporter):

91 """

92 Exports a basic HTML document. This exporter assists with the export of

93 HTML. Inherit from it if you are writing your own HTML template and need

94 custom preprocessors/filters. If you don't need custom preprocessors/

95 filters, just change the 'template_file' config option.

96 """

98 export_from_notebook = "HTML"

100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag(

101 config=True

102 )

103

104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag(

105 config=True

106 )

107

108 require_js_url = Unicode(

109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js",

110 help="""

111 URL to load require.js from.

112

113 Defaults to loading from cdnjs.

114 """,

115 ).tag(config=True)

116

117 mathjax_url = Unicode(

118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe",

119 help="""

120 URL to load Mathjax from.

121

122 Defaults to loading from cdnjs.

123 """,

124 ).tag(config=True)

125

126 mermaid_js_url = Unicode(

127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.7.0/mermaid.esm.min.mjs",

128 help="""

129 URL to load MermaidJS from.

130

131 Defaults to loading from cdnjs.

132 """,

133 )

134

135 jquery_url = Unicode(

136 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js",

137 help="""

138 URL to load jQuery from.

139

140 Defaults to loading from cdnjs.

141 """,

142 ).tag(config=True)

143

144 jupyter_widgets_base_url = Unicode(

145 "https://unpkg.com/", help="URL base for Jupyter widgets"

146 ).tag(config=True)

147

148 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True)

149

150 html_manager_semver_range = Unicode(

151 "*", help="Semver range for Jupyter widgets HTML manager"

152 ).tag(config=True)

153

154 @default("file_extension")

155 def _file_extension_default(self):

156 return ".html"

157

158 @default("template_name")

159 def _template_name_default(self):

160 return "lab"

161

162 theme = Unicode(

163 "light",

164 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",

165 ).tag(config=True)

166

167 sanitize_html = Bool(

168 False,

169 help=(

170 "Whether the HTML in Markdown cells and cell outputs should be sanitized."

171 "This should be set to True by nbviewer or similar tools."

172 ),

173 ).tag(config=True)

174

175 skip_svg_encoding = Bool(

176 False,

177 help=("Whether the svg to image data attribute encoding should occur"),

178 ).tag(config=True)

179

180 embed_images = Bool(

181 False, help="Whether or not to embed images as base64 in markdown cells."

182 ).tag(config=True)

183

184 output_mimetype = "text/html"

185

186 lexer_options = Dict(

187 {},

188 help=(

189 "Options to be passed to the pygments lexer for highlighting markdown code blocks. "

190 "See https://pygments.org/docs/lexers/#available-lexers for available options."

191 ),

192 ).tag(config=True)

193

194 @property

195 def default_config(self):

196 c = Config(

197 {

198 "NbConvertBase": {

199 "display_data_priority": [

200 "application/vnd.jupyter.widget-view+json",

201 "application/javascript",

202 "text/html",

203 "text/markdown",

204 "image/svg+xml",

205 "text/vnd.mermaid",

206 "text/latex",

207 "image/png",

208 "image/jpeg",

209 "text/plain",

210 ]

211 },

212 "HighlightMagicsPreprocessor": {"enabled": True},

213 }

214 )

215 if super().default_config:

216 c2 = super().default_config.copy()

217 c2.merge(c)

218 c = c2

219 return c

220

221 language_code = Unicode(

222 "en", help="Language code of the content, should be one of the ISO639-1"

223 ).tag(config=True)

224

225 @validate("language_code")

226 def _valid_language_code(self, proposal):

227 if self.language_code not in iso639_1:

228 self.log.warning(

229 '"%s" is not an ISO 639-1 language code. '

230 'It has been replaced by the default value "en".',

231 self.language_code,

232 )

233 return proposal["trait"].default_value

234 return proposal["value"]

235

236 @contextfilter

237 def markdown2html(self, context, source):

238 """Markdown to HTML filter respecting the anchor_link_text setting"""

239 cell = context.get("cell", {})

240 attachments = cell.get("attachments", {})

241 path = context.get("resources", {}).get("metadata", {}).get("path", "")

242

243 renderer = IPythonRenderer(

244 escape=False,

245 attachments=attachments,

246 embed_images=self.embed_images,

247 path=path,

248 anchor_link_text=self.anchor_link_text,

249 exclude_anchor_links=self.exclude_anchor_links,

250 **self.lexer_options,

251 )

252 return MarkdownWithMath(renderer=renderer).render(source)

253

254 def default_filters(self):

255 """Get the default filters."""

256 yield from super().default_filters()

257 yield ("markdown2html", self.markdown2html)

258

259 def from_notebook_node( # type:ignore[explicit-override, override]

260 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any

261 ) -> tuple[str, dict[str, Any]]:

262 """Convert from notebook node."""

263 langinfo = nb.metadata.get("language_info", {})

264 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))

265 highlight_code = self.filters.get(

266 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)

267 )

268

269 resources = self._init_resources(resources)

270

271 filter_data_type = WidgetsDataTypeFilter(

272 notebook_metadata=self._nb_metadata, parent=self, resources=resources

273 )

274

275 self.register_filter("highlight_code", highlight_code)

276 self.register_filter("filter_data_type", filter_data_type)

277 html, resources = super().from_notebook_node(nb, resources, **kw)

278 soup = BeautifulSoup(html, features="html.parser")

279 # Add image's alternative text

280 missing_alt = 0

281 for elem in soup.select("img:not([alt])"):

282 elem.attrs["alt"] = "No description has been provided for this image"

283 missing_alt += 1

284 if missing_alt:

285 self.log.warning("Alternative text is missing on %s image(s).", missing_alt)

286 # Set input and output focusable

287 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"):

288 elem.attrs["tabindex"] = "0"

289 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"):

290 elem.attrs["tabindex"] = "0"

291

292 return str(soup), resources

293

294 def _init_resources(self, resources):

295 def resources_include_css(name):

296 env = self.environment

297 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0])

298 return markupsafe.Markup(code)

299

300 def resources_include_lab_theme(name):

301 # Try to find the theme with the given name, looking through the labextensions

302 _, theme_path = find_lab_theme(name)

303

304 with open(theme_path / "index.css") as file:

305 data = file.read()

306

307 # Embed assets (fonts, images...)

308 for asset in os.listdir(theme_path):

309 local_url = f"url({Path(asset).as_posix()})"

310

311 if local_url in data:

312 mime_type = mimetypes.guess_type(asset)[0]

313

314 # Replace asset url by a base64 dataurl

315 with open(theme_path / asset, "rb") as assetfile:

316 base64_data = base64.b64encode(assetfile.read())

317 base64_str = base64_data.replace(b"\n", b"").decode("ascii")

318

319 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})")

320

321 code = """<style type="text/css">\n%s</style>""" % data

322 return markupsafe.Markup(code)

323

324 def resources_include_js(name, module=False):

325 """Get the resources include JS for a name. If module=True, import as ES module"""

326 env = self.environment

327 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>"""

328 return markupsafe.Markup(code)

329

330 def resources_include_url(name):

331 """Get the resources include url for a name."""

332 env = self.environment

333 mime_type, encoding = mimetypes.guess_type(name)

334 try:

335 # we try to load via the jinja loader, but that tries to load

336 # as (encoded) text

337 data = env.loader.get_source(env, name)[0].encode("utf8")

338 except UnicodeDecodeError:

339 # if that fails (for instance a binary file, png or ttf)

340 # we mimic jinja2

341 pieces = split_template_path(name)

342 for searchpath in self.template_paths:

343 filename = os.path.join(searchpath, *pieces)

344 if os.path.exists(filename):

345 with open(filename, "rb") as f:

346 data = f.read()

347 break

348 else:

349 msg = f"No file {name!r} found in {searchpath!r}"

350 raise ValueError(msg)

351 data = base64.b64encode(data)

352 data = data.replace(b"\n", b"").decode("ascii")

353 src = f"data:{mime_type};base64,{data}"

354 return markupsafe.Markup(src)

355

356 resources = super()._init_resources(resources)

357 resources["theme"] = self.theme

358 resources["include_css"] = resources_include_css

359 resources["include_lab_theme"] = resources_include_lab_theme

360 resources["include_js"] = resources_include_js

361 resources["include_url"] = resources_include_url

362 resources["require_js_url"] = self.require_js_url

363 resources["mathjax_url"] = self.mathjax_url

364 resources["mermaid_js_url"] = self.mermaid_js_url

365 resources["jquery_url"] = self.jquery_url

366 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url

367 resources["widget_renderer_url"] = self.widget_renderer_url

368 resources["html_manager_semver_range"] = self.html_manager_semver_range

369 resources["should_sanitize_html"] = self.sanitize_html

370 resources["language_code"] = self.language_code

371 resources["should_not_encode_svg"] = self.skip_svg_encoding

372 return resources