Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/html.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

171 statements  

1"""HTML Exporter class""" 

2 

3# Copyright (c) Jupyter Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import base64 

7import json 

8import mimetypes 

9import os 

10from pathlib import Path 

11from typing import Any, Optional 

12 

13import jinja2 

14import markupsafe 

15from bs4 import BeautifulSoup 

16from jupyter_core.paths import jupyter_path 

17from traitlets import Bool, Dict, Unicode, default, validate 

18from traitlets.config import Config 

19 

20if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0): 

21 from jinja2 import contextfilter # type:ignore[attr-defined] 

22else: 

23 from jinja2 import pass_context as contextfilter 

24 

25from jinja2.loaders import split_template_path 

26from nbformat import NotebookNode 

27 

28from nbconvert.filters.highlight import Highlight2HTML 

29from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath 

30from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter 

31from nbconvert.utils.iso639_1 import iso639_1 

32 

33from .templateexporter import TemplateExporter 

34 

35 

36def find_lab_theme(theme_name): 

37 """ 

38 Find a JupyterLab theme location by name. 

39 

40 Parameters 

41 ---------- 

42 theme_name : str 

43 The name of the labextension theme you want to find. 

44 

45 Raises 

46 ------ 

47 ValueError 

48 If the theme was not found, or if it was not specific enough. 

49 

50 Returns 

51 ------- 

52 theme_name: str 

53 Full theme name (with scope, if any) 

54 labextension_path : Path 

55 The path to the found labextension on the system. 

56 """ 

57 paths = jupyter_path("labextensions") 

58 

59 matching_themes = [] 

60 theme_path = None 

61 for path in paths: 

62 for dirpath, dirnames, filenames in os.walk(path): 

63 # If it's a federated labextension that contains themes 

64 if "package.json" in filenames and "themes" in dirnames: 

65 # TODO Find the theme name in the JS code instead? 

66 # TODO Find if it's a light or dark theme? 

67 with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj: 

68 labext_name = json.loads(fobj.read())["name"] 

69 

70 if labext_name == theme_name or theme_name in labext_name.split("/"): 

71 matching_themes.append(labext_name) 

72 

73 full_theme_name = labext_name 

74 theme_path = Path(dirpath) / "themes" / labext_name 

75 

76 if len(matching_themes) == 0: 

77 msg = f'Could not find lab theme "{theme_name}"' 

78 raise ValueError(msg) 

79 

80 if len(matching_themes) > 1: 

81 msg = ( 

82 f'Found multiple themes matching "{theme_name}": {matching_themes}. ' 

83 "Please be more specific about which theme you want to use." 

84 ) 

85 raise ValueError(msg) 

86 

87 return full_theme_name, theme_path 

88 

89 

90class HTMLExporter(TemplateExporter): 

91 """ 

92 Exports a basic HTML document. This exporter assists with the export of 

93 HTML. Inherit from it if you are writing your own HTML template and need 

94 custom preprocessors/filters. If you don't need custom preprocessors/ 

95 filters, just change the 'template_file' config option. 

96 """ 

97 

98 export_from_notebook = "HTML" 

99 

100 anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag( 

101 config=True 

102 ) 

103 

104 exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag( 

105 config=True 

106 ) 

107 

108 require_js_url = Unicode( 

109 "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js", 

110 help=""" 

111 URL to load require.js from. 

112 

113 Defaults to loading from cdnjs. 

114 """, 

115 ).tag(config=True) 

116 

117 mathjax_url = Unicode( 

118 "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe", 

119 help=""" 

120 URL to load Mathjax from. 

121 

122 Defaults to loading from cdnjs. 

123 """, 

124 ).tag(config=True) 

125 

126 mermaid_js_url = Unicode( 

127 "https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.7.0/mermaid.esm.min.mjs", 

128 help=""" 

129 URL to load MermaidJS from. 

130 

131 Defaults to loading from cdnjs. 

132 """, 

133 ) 

134 

135 jquery_url = Unicode( 

136 "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js", 

137 help=""" 

138 URL to load jQuery from. 

139 

140 Defaults to loading from cdnjs. 

141 """, 

142 ).tag(config=True) 

143 

144 jupyter_widgets_base_url = Unicode( 

145 "https://unpkg.com/", help="URL base for Jupyter widgets" 

146 ).tag(config=True) 

147 

148 widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True) 

149 

150 html_manager_semver_range = Unicode( 

151 "*", help="Semver range for Jupyter widgets HTML manager" 

152 ).tag(config=True) 

153 

154 @default("file_extension") 

155 def _file_extension_default(self): 

156 return ".html" 

157 

158 @default("template_name") 

159 def _template_name_default(self): 

160 return "lab" 

161 

162 theme = Unicode( 

163 "light", 

164 help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)", 

165 ).tag(config=True) 

166 

167 sanitize_html = Bool( 

168 False, 

169 help=( 

170 "Whether the HTML in Markdown cells and cell outputs should be sanitized." 

171 "This should be set to True by nbviewer or similar tools." 

172 ), 

173 ).tag(config=True) 

174 

175 skip_svg_encoding = Bool( 

176 False, 

177 help=("Whether the svg to image data attribute encoding should occur"), 

178 ).tag(config=True) 

179 

180 embed_images = Bool( 

181 False, help="Whether or not to embed images as base64 in markdown cells." 

182 ).tag(config=True) 

183 

184 output_mimetype = "text/html" 

185 

186 lexer_options = Dict( 

187 {}, 

188 help=( 

189 "Options to be passed to the pygments lexer for highlighting markdown code blocks. " 

190 "See https://pygments.org/docs/lexers/#available-lexers for available options." 

191 ), 

192 ).tag(config=True) 

193 

194 @property 

195 def default_config(self): 

196 c = Config( 

197 { 

198 "NbConvertBase": { 

199 "display_data_priority": [ 

200 "application/vnd.jupyter.widget-view+json", 

201 "application/javascript", 

202 "text/html", 

203 "text/markdown", 

204 "image/svg+xml", 

205 "text/vnd.mermaid", 

206 "text/latex", 

207 "image/png", 

208 "image/jpeg", 

209 "text/plain", 

210 ] 

211 }, 

212 "HighlightMagicsPreprocessor": {"enabled": True}, 

213 } 

214 ) 

215 if super().default_config: 

216 c2 = super().default_config.copy() 

217 c2.merge(c) 

218 c = c2 

219 return c 

220 

221 language_code = Unicode( 

222 "en", help="Language code of the content, should be one of the ISO639-1" 

223 ).tag(config=True) 

224 

225 @validate("language_code") 

226 def _valid_language_code(self, proposal): 

227 if self.language_code not in iso639_1: 

228 self.log.warning( 

229 '"%s" is not an ISO 639-1 language code. ' 

230 'It has been replaced by the default value "en".', 

231 self.language_code, 

232 ) 

233 return proposal["trait"].default_value 

234 return proposal["value"] 

235 

236 @contextfilter 

237 def markdown2html(self, context, source): 

238 """Markdown to HTML filter respecting the anchor_link_text setting""" 

239 cell = context.get("cell", {}) 

240 attachments = cell.get("attachments", {}) 

241 path = context.get("resources", {}).get("metadata", {}).get("path", "") 

242 

243 renderer = IPythonRenderer( 

244 escape=False, 

245 attachments=attachments, 

246 embed_images=self.embed_images, 

247 path=path, 

248 anchor_link_text=self.anchor_link_text, 

249 exclude_anchor_links=self.exclude_anchor_links, 

250 **self.lexer_options, 

251 ) 

252 return MarkdownWithMath(renderer=renderer).render(source) 

253 

254 def default_filters(self): 

255 """Get the default filters.""" 

256 yield from super().default_filters() 

257 yield ("markdown2html", self.markdown2html) 

258 

259 def from_notebook_node( # type:ignore[explicit-override, override] 

260 self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any 

261 ) -> tuple[str, dict[str, Any]]: 

262 """Convert from notebook node.""" 

263 langinfo = nb.metadata.get("language_info", {}) 

264 lexer = langinfo.get("pygments_lexer", langinfo.get("name", None)) 

265 highlight_code = self.filters.get( 

266 "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self) 

267 ) 

268 

269 resources = self._init_resources(resources) 

270 

271 filter_data_type = WidgetsDataTypeFilter( 

272 notebook_metadata=self._nb_metadata, parent=self, resources=resources 

273 ) 

274 

275 self.register_filter("highlight_code", highlight_code) 

276 self.register_filter("filter_data_type", filter_data_type) 

277 html, resources = super().from_notebook_node(nb, resources, **kw) 

278 soup = BeautifulSoup(html, features="html.parser") 

279 # Add image's alternative text 

280 missing_alt = 0 

281 for elem in soup.select("img:not([alt])"): 

282 elem.attrs["alt"] = "No description has been provided for this image" 

283 missing_alt += 1 

284 if missing_alt: 

285 self.log.warning("Alternative text is missing on %s image(s).", missing_alt) 

286 # Set input and output focusable 

287 for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"): 

288 elem.attrs["tabindex"] = "0" 

289 for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"): 

290 elem.attrs["tabindex"] = "0" 

291 

292 return str(soup), resources 

293 

294 def _init_resources(self, resources): 

295 def resources_include_css(name): 

296 env = self.environment 

297 code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0]) 

298 return markupsafe.Markup(code) 

299 

300 def resources_include_lab_theme(name): 

301 # Try to find the theme with the given name, looking through the labextensions 

302 _, theme_path = find_lab_theme(name) 

303 

304 with open(theme_path / "index.css") as file: 

305 data = file.read() 

306 

307 # Embed assets (fonts, images...) 

308 for asset in os.listdir(theme_path): 

309 local_url = f"url({Path(asset).as_posix()})" 

310 

311 if local_url in data: 

312 mime_type = mimetypes.guess_type(asset)[0] 

313 

314 # Replace asset url by a base64 dataurl 

315 with open(theme_path / asset, "rb") as assetfile: 

316 base64_data = base64.b64encode(assetfile.read()) 

317 base64_str = base64_data.replace(b"\n", b"").decode("ascii") 

318 

319 data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})") 

320 

321 code = """<style type="text/css">\n%s</style>""" % data 

322 return markupsafe.Markup(code) 

323 

324 def resources_include_js(name, module=False): 

325 """Get the resources include JS for a name. If module=True, import as ES module""" 

326 env = self.environment 

327 code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>""" 

328 return markupsafe.Markup(code) 

329 

330 def resources_include_url(name): 

331 """Get the resources include url for a name.""" 

332 env = self.environment 

333 mime_type, encoding = mimetypes.guess_type(name) 

334 try: 

335 # we try to load via the jinja loader, but that tries to load 

336 # as (encoded) text 

337 data = env.loader.get_source(env, name)[0].encode("utf8") 

338 except UnicodeDecodeError: 

339 # if that fails (for instance a binary file, png or ttf) 

340 # we mimic jinja2 

341 pieces = split_template_path(name) 

342 for searchpath in self.template_paths: 

343 filename = os.path.join(searchpath, *pieces) 

344 if os.path.exists(filename): 

345 with open(filename, "rb") as f: 

346 data = f.read() 

347 break 

348 else: 

349 msg = f"No file {name!r} found in {searchpath!r}" 

350 raise ValueError(msg) 

351 data = base64.b64encode(data) 

352 data = data.replace(b"\n", b"").decode("ascii") 

353 src = f"data:{mime_type};base64,{data}" 

354 return markupsafe.Markup(src) 

355 

356 resources = super()._init_resources(resources) 

357 resources["theme"] = self.theme 

358 resources["include_css"] = resources_include_css 

359 resources["include_lab_theme"] = resources_include_lab_theme 

360 resources["include_js"] = resources_include_js 

361 resources["include_url"] = resources_include_url 

362 resources["require_js_url"] = self.require_js_url 

363 resources["mathjax_url"] = self.mathjax_url 

364 resources["mermaid_js_url"] = self.mermaid_js_url 

365 resources["jquery_url"] = self.jquery_url 

366 resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url 

367 resources["widget_renderer_url"] = self.widget_renderer_url 

368 resources["html_manager_semver_range"] = self.html_manager_semver_range 

369 resources["should_sanitize_html"] = self.sanitize_html 

370 resources["language_code"] = self.language_code 

371 resources["should_not_encode_svg"] = self.skip_svg_encoding 

372 return resources