Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/webpdf.py: 32%

1"""Export to PDF via a headless browser"""

3# Copyright (c) IPython Development Team.

4# Distributed under the terms of the Modified BSD License.

6import asyncio

7import concurrent.futures

8import os

9import subprocess

10import sys

11import tempfile

12from importlib import util as importlib_util

14from traitlets import Bool, List, Unicode, default

16from .html import HTMLExporter

18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None

19IS_WINDOWS = os.name == "nt"

22class WebPDFExporter(HTMLExporter):

23 """Writer designed to write to PDF files.

25 This inherits from :class:`HTMLExporter`. It creates the HTML using the

26 template machinery, and then run playwright to create a pdf.

27 """

29 export_from_notebook = "PDF via HTML"

31 allow_chromium_download = Bool(

32 False,

33 help="Whether to allow downloading Chromium if no suitable version is found on the system.",

34 ).tag(config=True)

36 paginate = Bool(

37 True,

38 help="""

39 Split generated notebook into multiple pages.

41 If False, a PDF with one long page will be generated.

43 Set to True to match behavior of LaTeX based PDF generator

44 """,

45 ).tag(config=True)

47 @default("file_extension")

48 def _file_extension_default(self):

49 return ".html"

51 @default("template_name")

52 def _template_name_default(self):

53 return "webpdf"

55 disable_sandbox = Bool(

56 False,

57 help="""

58 Disable chromium security sandbox when converting to PDF.

60 WARNING: This could cause arbitrary code execution in specific circumstances,

61 where JS in your notebook can execute serverside code! Please use with

62 caution.

64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``

65 has more information.

67 This is required for webpdf to work inside most container environments.

68 """,

69 ).tag(config=True)

71 browser_args = List(

72 Unicode(),

73 help="""

74 Additional arguments to pass to the browser rendering to PDF.

76 These arguments will be passed directly to the browser launch method

77 and can be used to customize browser behavior beyond the default settings.

78 """,

79 ).tag(config=True)

81 def run_playwright(self, html):

82 """Run playwright."""

84 async def main(temp_file):

85 """Run main playwright script."""

87 try:

88 from playwright.async_api import ( # type: ignore[import-not-found] # noqa: PLC0415,

89 async_playwright,

90 )

91 except ModuleNotFoundError as e:

92 msg = (

93 "Playwright is not installed to support Web PDF conversion. "

94 "Please install `nbconvert[webpdf]` to enable."

95 )

96 raise RuntimeError(msg) from e

98 if self.allow_chromium_download:

99 cmd = [sys.executable, "-m", "playwright", "install", "chromium"]

100 subprocess.check_call(cmd) # noqa: S603

101

102 playwright = await async_playwright().start()

103 chromium = playwright.chromium

104

105 args = self.browser_args

106 if self.disable_sandbox:

107 args.append("--no-sandbox")

108

109 try:

110 browser = await chromium.launch(

111 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args

112 )

113 except Exception as e:

114 msg = (

115 "No suitable chromium executable found on the system. "

116 "Please use '--allow-chromium-download' to allow downloading one,"

117 "or install it using `playwright install chromium`."

118 )

119 await playwright.stop()

120 raise RuntimeError(msg) from e

121

122 page = await browser.new_page()

123 await page.emulate_media(media="print")

124 await page.wait_for_timeout(100)

125 await page.goto(f"file://{temp_file.name}", wait_until="networkidle")

126 await page.wait_for_timeout(100)

127

128 pdf_params = {"print_background": True}

129 if not self.paginate:

130 # Floating point precision errors cause the printed

131 # PDF from spilling over a new page by a pixel fraction.

132 dimensions = await page.evaluate(

133 """() => {

134 const rect = document.body.getBoundingClientRect();

135 return {

136 width: Math.ceil(rect.width) + 1,

137 height: Math.ceil(rect.height) + 1,

138 }

139 }"""

140 )

141 width = dimensions["width"]

142 height = dimensions["height"]

143 # 200 inches is the maximum size for Adobe Acrobat Reader.

144 pdf_params.update(

145 {

146 "width": min(width, 200 * 72),

147 "height": min(height, 200 * 72),

148 }

149 )

150 pdf_data = await page.pdf(**pdf_params)

151

152 await browser.close()

153 await playwright.stop()

154 return pdf_data

155

156 pool = concurrent.futures.ThreadPoolExecutor()

157 # Create a temporary file to pass the HTML code to Chromium:

158 # Unfortunately, tempfile on Windows does not allow for an already open

159 # file to be opened by a separate process. So we must close it first

160 # before calling Chromium. We also specify delete=False to ensure the

161 # file is not deleted after closing (the default behavior).

162 temp_file = tempfile.NamedTemporaryFile( # noqa: SIM115

163 suffix=".html", delete=False

164 )

165 with temp_file:

166 temp_file.write(html.encode("utf-8"))

167 try:

168 pdf_data = pool.submit(asyncio.run, main(temp_file)).result()

169 finally:

170 # Ensure the file is deleted even if playwright raises an exception

171 os.unlink(temp_file.name)

172 return pdf_data

173

174 def from_notebook_node(self, nb, resources=None, **kw):

175 """Convert from a notebook node."""

176 html, resources = super().from_notebook_node(nb, resources=resources, **kw)

177

178 self.log.info("Building PDF")

179 pdf_data = self.run_playwright(html)

180 self.log.info("PDF successfully created")

181

182 # convert output extension to pdf

183 # the writer above required it to be html

184 resources["output_extension"] = ".pdf"

185

186 return pdf_data, resources