1"""Export to PDF via a headless browser"""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import asyncio
7import concurrent.futures
8import os
9import subprocess
10import sys
11import tempfile
12from importlib import util as importlib_util
13
14from traitlets import Bool, List, Unicode, default
15
16from .html import HTMLExporter
17
18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None
19IS_WINDOWS = os.name == "nt"
20
21
22class WebPDFExporter(HTMLExporter):
23 """Writer designed to write to PDF files.
24
25 This inherits from :class:`HTMLExporter`. It creates the HTML using the
26 template machinery, and then run playwright to create a pdf.
27 """
28
29 export_from_notebook = "PDF via HTML"
30
31 allow_chromium_download = Bool(
32 False,
33 help="Whether to allow downloading Chromium if no suitable version is found on the system.",
34 ).tag(config=True)
35
36 paginate = Bool(
37 True,
38 help="""
39 Split generated notebook into multiple pages.
40
41 If False, a PDF with one long page will be generated.
42
43 Set to True to match behavior of LaTeX based PDF generator
44 """,
45 ).tag(config=True)
46
47 @default("file_extension")
48 def _file_extension_default(self):
49 return ".html"
50
51 @default("template_name")
52 def _template_name_default(self):
53 return "webpdf"
54
55 disable_sandbox = Bool(
56 False,
57 help="""
58 Disable chromium security sandbox when converting to PDF.
59
60 WARNING: This could cause arbitrary code execution in specific circumstances,
61 where JS in your notebook can execute serverside code! Please use with
62 caution.
63
64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``
65 has more information.
66
67 This is required for webpdf to work inside most container environments.
68 """,
69 ).tag(config=True)
70
71 browser_args = List(
72 Unicode(),
73 help="""
74 Additional arguments to pass to the browser rendering to PDF.
75
76 These arguments will be passed directly to the browser launch method
77 and can be used to customize browser behavior beyond the default settings.
78 """,
79 ).tag(config=True)
80
81 def run_playwright(self, html):
82 """Run playwright."""
83
84 async def main(temp_file):
85 """Run main playwright script."""
86
87 try:
88 from playwright.async_api import ( # type: ignore[import-not-found] # noqa: PLC0415,
89 async_playwright,
90 )
91 except ModuleNotFoundError as e:
92 msg = (
93 "Playwright is not installed to support Web PDF conversion. "
94 "Please install `nbconvert[webpdf]` to enable."
95 )
96 raise RuntimeError(msg) from e
97
98 if self.allow_chromium_download:
99 cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
100 subprocess.check_call(cmd) # noqa: S603
101
102 playwright = await async_playwright().start()
103 chromium = playwright.chromium
104
105 args = self.browser_args
106 if self.disable_sandbox:
107 args.append("--no-sandbox")
108
109 try:
110 browser = await chromium.launch(
111 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args
112 )
113 except Exception as e:
114 msg = (
115 "No suitable chromium executable found on the system. "
116 "Please use '--allow-chromium-download' to allow downloading one,"
117 "or install it using `playwright install chromium`."
118 )
119 await playwright.stop()
120 raise RuntimeError(msg) from e
121
122 page = await browser.new_page()
123 await page.emulate_media(media="print")
124 await page.wait_for_timeout(100)
125 await page.goto(f"file://{temp_file.name}", wait_until="networkidle")
126 await page.wait_for_timeout(100)
127
128 pdf_params = {"print_background": True}
129 if not self.paginate:
130 # Floating point precision errors cause the printed
131 # PDF from spilling over a new page by a pixel fraction.
132 dimensions = await page.evaluate(
133 """() => {
134 const rect = document.body.getBoundingClientRect();
135 return {
136 width: Math.ceil(rect.width) + 1,
137 height: Math.ceil(rect.height) + 1,
138 }
139 }"""
140 )
141 width = dimensions["width"]
142 height = dimensions["height"]
143 # 200 inches is the maximum size for Adobe Acrobat Reader.
144 pdf_params.update(
145 {
146 "width": min(width, 200 * 72),
147 "height": min(height, 200 * 72),
148 }
149 )
150 pdf_data = await page.pdf(**pdf_params)
151
152 await browser.close()
153 await playwright.stop()
154 return pdf_data
155
156 pool = concurrent.futures.ThreadPoolExecutor()
157 # Create a temporary file to pass the HTML code to Chromium:
158 # Unfortunately, tempfile on Windows does not allow for an already open
159 # file to be opened by a separate process. So we must close it first
160 # before calling Chromium. We also specify delete=False to ensure the
161 # file is not deleted after closing (the default behavior).
162 temp_file = tempfile.NamedTemporaryFile( # noqa: SIM115
163 suffix=".html", delete=False
164 )
165 with temp_file:
166 temp_file.write(html.encode("utf-8"))
167 try:
168 pdf_data = pool.submit(asyncio.run, main(temp_file)).result()
169 finally:
170 # Ensure the file is deleted even if playwright raises an exception
171 os.unlink(temp_file.name)
172 return pdf_data
173
174 def from_notebook_node(self, nb, resources=None, **kw):
175 """Convert from a notebook node."""
176 html, resources = super().from_notebook_node(nb, resources=resources, **kw)
177
178 self.log.info("Building PDF")
179 pdf_data = self.run_playwright(html)
180 self.log.info("PDF successfully created")
181
182 # convert output extension to pdf
183 # the writer above required it to be html
184 resources["output_extension"] = ".pdf"
185
186 return pdf_data, resources