1"""Export to PDF via a headless browser"""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import asyncio
7import concurrent.futures
8import os
9import subprocess
10import sys
11import tempfile
12from importlib import util as importlib_util
13
14from traitlets import Bool, default
15
16from .html import HTMLExporter
17
18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None
19IS_WINDOWS = os.name == "nt"
20
21
22class WebPDFExporter(HTMLExporter):
23 """Writer designed to write to PDF files.
24
25 This inherits from :class:`HTMLExporter`. It creates the HTML using the
26 template machinery, and then run playwright to create a pdf.
27 """
28
29 export_from_notebook = "PDF via HTML"
30
31 allow_chromium_download = Bool(
32 False,
33 help="Whether to allow downloading Chromium if no suitable version is found on the system.",
34 ).tag(config=True)
35
36 paginate = Bool(
37 True,
38 help="""
39 Split generated notebook into multiple pages.
40
41 If False, a PDF with one long page will be generated.
42
43 Set to True to match behavior of LaTeX based PDF generator
44 """,
45 ).tag(config=True)
46
47 @default("file_extension")
48 def _file_extension_default(self):
49 return ".html"
50
51 @default("template_name")
52 def _template_name_default(self):
53 return "webpdf"
54
55 disable_sandbox = Bool(
56 False,
57 help="""
58 Disable chromium security sandbox when converting to PDF.
59
60 WARNING: This could cause arbitrary code execution in specific circumstances,
61 where JS in your notebook can execute serverside code! Please use with
62 caution.
63
64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``
65 has more information.
66
67 This is required for webpdf to work inside most container environments.
68 """,
69 ).tag(config=True)
70
71 def run_playwright(self, html):
72 """Run playwright."""
73
74 async def main(temp_file):
75 """Run main playwright script."""
76 args = ["--no-sandbox"] if self.disable_sandbox else []
77 try:
78 from playwright.async_api import async_playwright # type: ignore[import-not-found]
79 except ModuleNotFoundError as e:
80 msg = (
81 "Playwright is not installed to support Web PDF conversion. "
82 "Please install `nbconvert[webpdf]` to enable."
83 )
84 raise RuntimeError(msg) from e
85
86 if self.allow_chromium_download:
87 cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
88 subprocess.check_call(cmd) # noqa: S603
89
90 playwright = await async_playwright().start()
91 chromium = playwright.chromium
92
93 try:
94 browser = await chromium.launch(
95 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args
96 )
97 except Exception as e:
98 msg = (
99 "No suitable chromium executable found on the system. "
100 "Please use '--allow-chromium-download' to allow downloading one,"
101 "or install it using `playwright install chromium`."
102 )
103 await playwright.stop()
104 raise RuntimeError(msg) from e
105
106 page = await browser.new_page()
107 await page.emulate_media(media="print")
108 await page.wait_for_timeout(100)
109 await page.goto(f"file://{temp_file.name}", wait_until="networkidle")
110 await page.wait_for_timeout(100)
111
112 pdf_params = {"print_background": True}
113 if not self.paginate:
114 # Floating point precision errors cause the printed
115 # PDF from spilling over a new page by a pixel fraction.
116 dimensions = await page.evaluate(
117 """() => {
118 const rect = document.body.getBoundingClientRect();
119 return {
120 width: Math.ceil(rect.width) + 1,
121 height: Math.ceil(rect.height) + 1,
122 }
123 }"""
124 )
125 width = dimensions["width"]
126 height = dimensions["height"]
127 # 200 inches is the maximum size for Adobe Acrobat Reader.
128 pdf_params.update(
129 {
130 "width": min(width, 200 * 72),
131 "height": min(height, 200 * 72),
132 }
133 )
134 pdf_data = await page.pdf(**pdf_params)
135
136 await browser.close()
137 await playwright.stop()
138 return pdf_data
139
140 pool = concurrent.futures.ThreadPoolExecutor()
141 # Create a temporary file to pass the HTML code to Chromium:
142 # Unfortunately, tempfile on Windows does not allow for an already open
143 # file to be opened by a separate process. So we must close it first
144 # before calling Chromium. We also specify delete=False to ensure the
145 # file is not deleted after closing (the default behavior).
146 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
147 with temp_file:
148 temp_file.write(html.encode("utf-8"))
149 try:
150 # TODO: when dropping Python 3.6, use
151 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result()
152 def run_coroutine(coro):
153 """Run an internal coroutine."""
154 loop = (
155 asyncio.ProactorEventLoop() # type:ignore[attr-defined]
156 if IS_WINDOWS
157 else asyncio.new_event_loop()
158 )
159
160 asyncio.set_event_loop(loop)
161 return loop.run_until_complete(coro)
162
163 pdf_data = pool.submit(run_coroutine, main(temp_file)).result()
164 finally:
165 # Ensure the file is deleted even if playwright raises an exception
166 os.unlink(temp_file.name)
167 return pdf_data
168
169 def from_notebook_node(self, nb, resources=None, **kw):
170 """Convert from a notebook node."""
171 html, resources = super().from_notebook_node(nb, resources=resources, **kw)
172
173 self.log.info("Building PDF")
174 pdf_data = self.run_playwright(html)
175 self.log.info("PDF successfully created")
176
177 # convert output extension to pdf
178 # the writer above required it to be html
179 resources["output_extension"] = ".pdf"
180
181 return pdf_data, resources