1"""Export to PDF via a headless browser"""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import asyncio
7import concurrent.futures
8import os
9import subprocess
10import sys
11import tempfile
12from importlib import util as importlib_util
13
14from traitlets import Bool, List, Unicode, default
15
16from .html import HTMLExporter
17
18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None
19IS_WINDOWS = os.name == "nt"
20
21
22class WebPDFExporter(HTMLExporter):
23 """Writer designed to write to PDF files.
24
25 This inherits from :class:`HTMLExporter`. It creates the HTML using the
26 template machinery, and then run playwright to create a pdf.
27 """
28
29 export_from_notebook = "PDF via HTML"
30
31 allow_chromium_download = Bool(
32 False,
33 help="Whether to allow downloading Chromium if no suitable version is found on the system.",
34 ).tag(config=True)
35
36 paginate = Bool(
37 True,
38 help="""
39 Split generated notebook into multiple pages.
40
41 If False, a PDF with one long page will be generated.
42
43 Set to True to match behavior of LaTeX based PDF generator
44 """,
45 ).tag(config=True)
46
47 @default("file_extension")
48 def _file_extension_default(self):
49 return ".html"
50
51 @default("template_name")
52 def _template_name_default(self):
53 return "webpdf"
54
55 disable_sandbox = Bool(
56 False,
57 help="""
58 Disable chromium security sandbox when converting to PDF.
59
60 WARNING: This could cause arbitrary code execution in specific circumstances,
61 where JS in your notebook can execute serverside code! Please use with
62 caution.
63
64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``
65 has more information.
66
67 This is required for webpdf to work inside most container environments.
68 """,
69 ).tag(config=True)
70
71 browser_args = List(
72 Unicode(),
73 help="""
74 Additional arguments to pass to the browser rendering to PDF.
75
76 These arguments will be passed directly to the browser launch method
77 and can be used to customize browser behavior beyond the default settings.
78 """,
79 ).tag(config=True)
80
81 def run_playwright(self, html):
82 """Run playwright."""
83
84 async def main(temp_file):
85 """Run main playwright script."""
86
87 try:
88 from playwright.async_api import async_playwright # type: ignore[import-not-found]
89 except ModuleNotFoundError as e:
90 msg = (
91 "Playwright is not installed to support Web PDF conversion. "
92 "Please install `nbconvert[webpdf]` to enable."
93 )
94 raise RuntimeError(msg) from e
95
96 if self.allow_chromium_download:
97 cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
98 subprocess.check_call(cmd) # noqa: S603
99
100 playwright = await async_playwright().start()
101 chromium = playwright.chromium
102
103 args = self.browser_args
104 if self.disable_sandbox:
105 args.append("--no-sandbox")
106
107 try:
108 browser = await chromium.launch(
109 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args
110 )
111 except Exception as e:
112 msg = (
113 "No suitable chromium executable found on the system. "
114 "Please use '--allow-chromium-download' to allow downloading one,"
115 "or install it using `playwright install chromium`."
116 )
117 await playwright.stop()
118 raise RuntimeError(msg) from e
119
120 page = await browser.new_page()
121 await page.emulate_media(media="print")
122 await page.wait_for_timeout(100)
123 await page.goto(f"file://{temp_file.name}", wait_until="networkidle")
124 await page.wait_for_timeout(100)
125
126 pdf_params = {"print_background": True}
127 if not self.paginate:
128 # Floating point precision errors cause the printed
129 # PDF from spilling over a new page by a pixel fraction.
130 dimensions = await page.evaluate(
131 """() => {
132 const rect = document.body.getBoundingClientRect();
133 return {
134 width: Math.ceil(rect.width) + 1,
135 height: Math.ceil(rect.height) + 1,
136 }
137 }"""
138 )
139 width = dimensions["width"]
140 height = dimensions["height"]
141 # 200 inches is the maximum size for Adobe Acrobat Reader.
142 pdf_params.update(
143 {
144 "width": min(width, 200 * 72),
145 "height": min(height, 200 * 72),
146 }
147 )
148 pdf_data = await page.pdf(**pdf_params)
149
150 await browser.close()
151 await playwright.stop()
152 return pdf_data
153
154 pool = concurrent.futures.ThreadPoolExecutor()
155 # Create a temporary file to pass the HTML code to Chromium:
156 # Unfortunately, tempfile on Windows does not allow for an already open
157 # file to be opened by a separate process. So we must close it first
158 # before calling Chromium. We also specify delete=False to ensure the
159 # file is not deleted after closing (the default behavior).
160 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
161 with temp_file:
162 temp_file.write(html.encode("utf-8"))
163 try:
164 # TODO: when dropping Python 3.6, use
165 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result()
166 def run_coroutine(coro):
167 """Run an internal coroutine."""
168 loop = (
169 asyncio.ProactorEventLoop() # type:ignore[attr-defined]
170 if IS_WINDOWS
171 else asyncio.new_event_loop()
172 )
173
174 asyncio.set_event_loop(loop)
175 return loop.run_until_complete(coro)
176
177 pdf_data = pool.submit(run_coroutine, main(temp_file)).result()
178 finally:
179 # Ensure the file is deleted even if playwright raises an exception
180 os.unlink(temp_file.name)
181 return pdf_data
182
183 def from_notebook_node(self, nb, resources=None, **kw):
184 """Convert from a notebook node."""
185 html, resources = super().from_notebook_node(nb, resources=resources, **kw)
186
187 self.log.info("Building PDF")
188 pdf_data = self.run_playwright(html)
189 self.log.info("PDF successfully created")
190
191 # convert output extension to pdf
192 # the writer above required it to be html
193 resources["output_extension"] = ".pdf"
194
195 return pdf_data, resources