Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/html.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

173 statements  

1"""HTML Exporter class""" 

2 

3# Copyright (c) Jupyter Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import base64 

7import json 

8import mimetypes 

9import os 

10from pathlib import Path 

11from typing import Any, Optional 

12 

13import jinja2 

14import markupsafe 

15from bs4 import BeautifulSoup 

16from jupyter_core.paths import jupyter_path 

17from traitlets import Bool, Dict, Unicode, default, validate 

18from traitlets.config import Config 

19 

20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0): 

21 from jinja2 import contextfilter # type:ignore[attr-defined] 

22else: 

23 from jinja2 import pass_context as contextfilter 

24 

25from jinja2.loaders import split_template_path 

26from nbformat import NotebookNode 

27 

28from nbconvert.filters.highlight import Highlight2HTML 

29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath 

30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter 

31from nbconvert.utils.iso639_1 import iso639_1 

32 

33from .templateexporter import TemplateExporter 

34 

35 

36def find_lab_theme(theme_name): 

37 """ 

38 Find a JupyterLab theme location by name. 

39 

40 Parameters 

41 ---------- 

42 theme_name : str 

43 The name of the labextension theme you want to find. 

44 

45 Raises 

46 ------ 

47 ValueError 

48 If the theme was not found, or if it was not specific enough. 

49 

50 Returns 

51 ------- 

52 theme_name: str 

53 Full theme name (with scope, if any) 

54 labextension_path : Path 

55 The path to the found labextension on the system. 

56 """ 

57 paths = jupyter_path("labextensions") 

58 

59 matching_themes = [] 

60 theme_path = None 

61 for path in paths: 

62 for dirpath, dirnames, filenames in os.walk(path): 

63 # If it's a federated labextension that contains themes 

64 if "package.json" in filenames and "themes" in dirnames: 

65 # TODO Find the theme name in the JS code instead? 

66 # TODO Find if it's a light or dark theme? 

67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj: 

68 labext_name = json.loads(fobj.read())["name"] 

69 

70 if labext_name == theme_name or theme_name in labext_name.split("/"): 

71 matching_themes.append(labext_name) 

72 

73 full_theme_name = labext_name 

74 theme_path = Path(dirpath) / "themes" / labext_name 

75 

76 if len(matching_themes) == 0: 

77 msg = f'Could not find lab theme "{theme_name}"' 

78 raise ValueError(msg) 

79 

80 if len(matching_themes) > 1: 

81 msg = ( 

82 f'Found multiple themes matching "{theme_name}": {matching_themes}. ' 

83 "Please be more specific about which theme you want to use." 

84 ) 

85 raise ValueError(msg) 

86 

87 return full_theme_name, theme_path 

88 

89 

90class HTMLExporter(TemplateExporter): 

91 """ 

92 Exports a basic HTML document. This exporter assists with the export of 

93 HTML. Inherit from it if you are writing your own HTML template and need 

94 custom preprocessors/filters. If you don't need custom preprocessors/ 

95 filters, just change the 'template_file' config option. 

96 """ 

97 

98 export_from_notebook = "HTML" 

99 

100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag( 

101 config=True 

102 ) 

103 

104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag( 

105 config=True 

106 ) 

107 

108 require_js_url = Unicode( 

109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js", 

110 help=""" 

111 URL to load require.js from. 

112 

113 Defaults to loading from cdnjs. 

114 """, 

115 ).tag(config=True) 

116 

117 mathjax_url = Unicode( 

118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe", 

119 help=""" 

120 URL to load Mathjax from. 

121 

122 Defaults to loading from cdnjs. 

123 """, 

124 ).tag(config=True) 

125 

126 mermaid_js_url = Unicode( 

127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/11.10.0/mermaid.esm.min.mjs", 

128 help=""" 

129 URL to load MermaidJS from. 

130 

131 Defaults to loading from cdnjs. 

132 """, 

133 ) 

134 

135 mermaid_layout_elk_js_url = Unicode( 

136 "https://cdnjs.cloudflare.com/ajax/libs/mermaid-layout-elk/0.1.9/mermaid-layout-elk.esm.min.mjs", 

137 help=""" 

138 URL to load MermaidJS ELK layout from. 

139 

140 Defaults to loading from cdnjs. 

141 """, 

142 ) 

143 

144 jquery_url = Unicode( 

145 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js", 

146 help=""" 

147 URL to load jQuery from. 

148 

149 Defaults to loading from cdnjs. 

150 """, 

151 ).tag(config=True) 

152 

153 jupyter_widgets_base_url = Unicode( 

154 "https://unpkg.com/", help="URL base for Jupyter widgets" 

155 ).tag(config=True) 

156 

157 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True) 

158 

159 html_manager_semver_range = Unicode( 

160 "*", help="Semver range for Jupyter widgets HTML manager" 

161 ).tag(config=True) 

162 

163 @default("file_extension") 

164 def _file_extension_default(self): 

165 return ".html" 

166 

167 @default("template_name") 

168 def _template_name_default(self): 

169 return "lab" 

170 

171 theme = Unicode( 

172 "light", 

173 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)", 

174 ).tag(config=True) 

175 

176 sanitize_html = Bool( 

177 False, 

178 help=( 

179 "Whether the HTML in Markdown cells and cell outputs should be sanitized." 

180 "This should be set to True by nbviewer or similar tools." 

181 ), 

182 ).tag(config=True) 

183 

184 skip_svg_encoding = Bool( 

185 False, 

186 help=("Whether the svg to image data attribute encoding should occur"), 

187 ).tag(config=True) 

188 

189 embed_images = Bool( 

190 False, help="Whether or not to embed images as base64 in markdown cells." 

191 ).tag(config=True) 

192 

193 output_mimetype = "text/html" 

194 

195 lexer_options = Dict( 

196 {}, 

197 help=( 

198 "Options to be passed to the pygments lexer for highlighting markdown code blocks. " 

199 "See https://pygments.org/docs/lexers/#available-lexers for available options." 

200 ), 

201 ).tag(config=True) 

202 

203 @property 

204 def default_config(self): 

205 c = Config( 

206 { 

207 "NbConvertBase": { 

208 "display_data_priority": [ 

209 "application/vnd.jupyter.widget-view+json", 

210 "application/javascript", 

211 "text/html", 

212 "text/markdown", 

213 "image/svg+xml", 

214 "text/vnd.mermaid", 

215 "text/latex", 

216 "image/png", 

217 "image/jpeg", 

218 "text/plain", 

219 ] 

220 }, 

221 "HighlightMagicsPreprocessor": {"enabled": True}, 

222 } 

223 ) 

224 if super().default_config: 

225 c2 = super().default_config.copy() 

226 c2.merge(c) 

227 c = c2 

228 return c 

229 

230 language_code = Unicode( 

231 "en", help="Language code of the content, should be one of the ISO639-1" 

232 ).tag(config=True) 

233 

234 @validate("language_code") 

235 def _valid_language_code(self, proposal): 

236 if self.language_code not in iso639_1: 

237 self.log.warning( 

238 '"%s" is not an ISO 639-1 language code. ' 

239 'It has been replaced by the default value "en".', 

240 self.language_code, 

241 ) 

242 return proposal["trait"].default_value 

243 return proposal["value"] 

244 

245 @contextfilter 

246 def markdown2html(self, context, source): 

247 """Markdown to HTML filter respecting the anchor_link_text setting""" 

248 cell = context.get("cell", {}) 

249 attachments = cell.get("attachments", {}) 

250 path = context.get("resources", {}).get("metadata", {}).get("path", "") 

251 

252 renderer = IPythonRenderer( 

253 escape=False, 

254 attachments=attachments, 

255 embed_images=self.embed_images, 

256 path=path, 

257 anchor_link_text=self.anchor_link_text, 

258 exclude_anchor_links=self.exclude_anchor_links, 

259 **self.lexer_options, 

260 ) 

261 return MarkdownWithMath(renderer=renderer).render(source) 

262 

263 def default_filters(self): 

264 """Get the default filters.""" 

265 yield from super().default_filters() 

266 yield ("markdown2html", self.markdown2html) 

267 

268 def from_notebook_node( # type:ignore[explicit-override, override] 

269 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any 

270 ) -> tuple[str, dict[str, Any]]: 

271 """Convert from notebook node.""" 

272 langinfo = nb.metadata.get("language_info", {}) 

273 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None)) 

274 highlight_code = self.filters.get( 

275 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self) 

276 ) 

277 

278 resources = self._init_resources(resources) 

279 

280 filter_data_type = WidgetsDataTypeFilter( 

281 notebook_metadata=self._nb_metadata, parent=self, resources=resources 

282 ) 

283 

284 self.register_filter("highlight_code", highlight_code) 

285 self.register_filter("filter_data_type", filter_data_type) 

286 html, resources = super().from_notebook_node(nb, resources, **kw) 

287 soup = BeautifulSoup(html, features="html.parser") 

288 # Add image's alternative text 

289 missing_alt = 0 

290 for elem in soup.select("img:not([alt])"): 

291 elem.attrs["alt"] = "No description has been provided for this image" 

292 missing_alt += 1 

293 if missing_alt: 

294 self.log.warning("Alternative text is missing on %s image(s).", missing_alt) 

295 # Set input and output focusable 

296 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"): 

297 elem.attrs["tabindex"] = "0" 

298 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"): 

299 elem.attrs["tabindex"] = "0" 

300 

301 return str(soup), resources 

302 

303 def _init_resources(self, resources): 

304 def resources_include_css(name): 

305 env = self.environment 

306 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0]) 

307 return markupsafe.Markup(code) 

308 

309 def resources_include_lab_theme(name): 

310 # Try to find the theme with the given name, looking through the labextensions 

311 _, theme_path = find_lab_theme(name) 

312 

313 with open(theme_path / "index.css") as file: 

314 data = file.read() 

315 

316 # Embed assets (fonts, images...) 

317 for asset in os.listdir(theme_path): 

318 local_url = f"url({Path(asset).as_posix()})" 

319 

320 if local_url in data: 

321 mime_type = mimetypes.guess_type(asset)[0] 

322 

323 # Replace asset url by a base64 dataurl 

324 with open(theme_path / asset, "rb") as assetfile: 

325 base64_data = base64.b64encode(assetfile.read()) 

326 base64_str = base64_data.replace(b"\n", b"").decode("ascii") 

327 

328 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})") 

329 

330 code = """<style type="text/css">\n%s</style>""" % data 

331 return markupsafe.Markup(code) 

332 

333 def resources_include_js(name, module=False): 

334 """Get the resources include JS for a name. If module=True, import as ES module""" 

335 env = self.environment 

336 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>""" 

337 return markupsafe.Markup(code) 

338 

339 def resources_include_url(name): 

340 """Get the resources include url for a name.""" 

341 env = self.environment 

342 mime_type, encoding = mimetypes.guess_type(name) 

343 try: 

344 # we try to load via the jinja loader, but that tries to load 

345 # as (encoded) text 

346 data = env.loader.get_source(env, name)[0].encode("utf8") 

347 except UnicodeDecodeError: 

348 # if that fails (for instance a binary file, png or ttf) 

349 # we mimic jinja2 

350 pieces = split_template_path(name) 

351 for searchpath in self.template_paths: 

352 filename = os.path.join(searchpath, *pieces) 

353 if os.path.exists(filename): 

354 with open(filename, "rb") as f: 

355 data = f.read() 

356 break 

357 else: 

358 msg = f"No file {name!r} found in {searchpath!r}" 

359 raise ValueError(msg) 

360 data = base64.b64encode(data) 

361 data = data.replace(b"\n", b"").decode("ascii") 

362 src = f"data:{mime_type};base64,{data}" 

363 return markupsafe.Markup(src) 

364 

365 resources = super()._init_resources(resources) 

366 resources["theme"] = self.theme 

367 resources["include_css"] = resources_include_css 

368 resources["include_lab_theme"] = resources_include_lab_theme 

369 resources["include_js"] = resources_include_js 

370 resources["include_url"] = resources_include_url 

371 resources["require_js_url"] = self.require_js_url 

372 resources["mathjax_url"] = self.mathjax_url 

373 resources["mermaid_js_url"] = self.mermaid_js_url 

374 resources["mermaid_layout_elk_js_url"] = self.mermaid_layout_elk_js_url 

375 resources["jquery_url"] = self.jquery_url 

376 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url 

377 resources["widget_renderer_url"] = self.widget_renderer_url 

378 resources["html_manager_semver_range"] = self.html_manager_semver_range 

379 resources["should_sanitize_html"] = self.sanitize_html 

380 resources["language_code"] = self.language_code 

381 resources["should_not_encode_svg"] = self.skip_svg_encoding 

382 return resources