Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/exporters/webpdf.py: 29%
68 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""Export to PDF via a headless browser"""
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
6import asyncio
7import concurrent.futures
8import os
9import tempfile
10from importlib import util as importlib_util
12from traitlets import Bool, default
14from .html import HTMLExporter
16PYPPETEER_INSTALLED = importlib_util.find_spec("pyppeteer") is not None
19class WebPDFExporter(HTMLExporter):
20 """Writer designed to write to PDF files.
22 This inherits from :class:`HTMLExporter`. It creates the HTML using the
23 template machinery, and then run pyppeteer to create a pdf.
24 """
26 export_from_notebook = "PDF via HTML"
28 allow_chromium_download = Bool(
29 False,
30 help="Whether to allow downloading Chromium if no suitable version is found on the system.",
31 ).tag(config=True)
33 paginate = Bool(
34 True,
35 help="""
36 Split generated notebook into multiple pages.
38 If False, a PDF with one long page will be generated.
40 Set to True to match behavior of LaTeX based PDF generator
41 """,
42 ).tag(config=True)
44 @default("file_extension")
45 def _file_extension_default(self):
46 return ".html"
48 @default("template_name")
49 def _template_name_default(self):
50 return "webpdf"
52 disable_sandbox = Bool(
53 False,
54 help="""
55 Disable chromium security sandbox when converting to PDF.
57 WARNING: This could cause arbitrary code execution in specific circumstances,
58 where JS in your notebook can execute serverside code! Please use with
59 caution.
61 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``
62 has more information.
64 This is required for webpdf to work inside most container environments.
65 """,
66 ).tag(config=True)
68 def _check_launch_reqs(self):
69 try:
70 from pyppeteer import launch # type: ignore[import]
71 from pyppeteer.util import check_chromium # type:ignore
72 except ModuleNotFoundError as e:
73 msg = (
74 "Pyppeteer is not installed to support Web PDF conversion. "
75 "Please install `nbconvert[webpdf]` to enable."
76 )
77 raise RuntimeError(msg) from e
78 if not self.allow_chromium_download and not check_chromium():
79 msg = (
80 "No suitable chromium executable found on the system. "
81 "Please use '--allow-chromium-download' to allow downloading one."
82 )
83 raise RuntimeError(msg)
84 return launch
86 def run_pyppeteer(self, html):
87 """Run pyppeteer."""
89 async def main(temp_file):
90 """Run main pyppeteer script."""
91 args = ["--no-sandbox"] if self.disable_sandbox else []
92 browser = await self._check_launch_reqs()(
93 handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False, args=args
94 )
95 page = await browser.newPage()
96 await page.emulateMedia("print")
97 await page.waitFor(100)
98 await page.goto(f"file://{temp_file.name}", waitUntil="networkidle0")
99 await page.waitFor(100)
101 pdf_params = {"printBackground": True}
102 if not self.paginate:
103 # Floating point precision errors cause the printed
104 # PDF from spilling over a new page by a pixel fraction.
105 dimensions = await page.evaluate(
106 """() => {
107 const rect = document.body.getBoundingClientRect();
108 return {
109 width: Math.ceil(rect.width) + 1,
110 height: Math.ceil(rect.height) + 1,
111 }
112 }"""
113 )
114 width = dimensions["width"]
115 height = dimensions["height"]
116 # 200 inches is the maximum size for Adobe Acrobat Reader.
117 pdf_params.update(
118 {
119 "width": min(width, 200 * 72),
120 "height": min(height, 200 * 72),
121 }
122 )
123 pdf_data = await page.pdf(pdf_params)
125 await browser.close()
126 return pdf_data
128 pool = concurrent.futures.ThreadPoolExecutor()
129 # Create a temporary file to pass the HTML code to Chromium:
130 # Unfortunately, tempfile on Windows does not allow for an already open
131 # file to be opened by a separate process. So we must close it first
132 # before calling Chromium. We also specify delete=False to ensure the
133 # file is not deleted after closing (the default behavior).
134 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
135 with temp_file:
136 temp_file.write(html.encode("utf-8"))
137 try:
138 # TODO: when dropping Python 3.6, use
139 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result()
140 def run_coroutine(coro):
141 """Run an internal coroutine."""
142 loop = asyncio.new_event_loop()
143 asyncio.set_event_loop(loop)
144 return loop.run_until_complete(coro)
146 pdf_data = pool.submit(run_coroutine, main(temp_file)).result()
147 finally:
148 # Ensure the file is deleted even if pypeteer raises an exception
149 os.unlink(temp_file.name)
150 return pdf_data
152 def from_notebook_node(self, nb, resources=None, **kw):
153 """Convert from a notebook node."""
154 self._check_launch_reqs()
155 html, resources = super().from_notebook_node(nb, resources=resources, **kw)
157 self.log.info("Building PDF")
158 pdf_data = self.run_pyppeteer(html)
159 self.log.info("PDF successfully created")
161 # convert output extension to pdf
162 # the writer above required it to be html
163 resources["output_extension"] = ".pdf"
165 return pdf_data, resources