Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/exporters/webpdf.py: 29%

1"""Export to PDF via a headless browser"""

3# Copyright (c) IPython Development Team.

4# Distributed under the terms of the Modified BSD License.

6import asyncio

7import concurrent.futures

8import os

9import tempfile

10from importlib import util as importlib_util

12from traitlets import Bool, default

14from .html import HTMLExporter

16PYPPETEER_INSTALLED = importlib_util.find_spec("pyppeteer") is not None

19class WebPDFExporter(HTMLExporter):

20 """Writer designed to write to PDF files.

22 This inherits from :class:`HTMLExporter`. It creates the HTML using the

23 template machinery, and then run pyppeteer to create a pdf.

24 """

26 export_from_notebook = "PDF via HTML"

28 allow_chromium_download = Bool(

29 False,

30 help="Whether to allow downloading Chromium if no suitable version is found on the system.",

31 ).tag(config=True)

33 paginate = Bool(

34 True,

35 help="""

36 Split generated notebook into multiple pages.

38 If False, a PDF with one long page will be generated.

40 Set to True to match behavior of LaTeX based PDF generator

41 """,

42 ).tag(config=True)

44 @default("file_extension")

45 def _file_extension_default(self):

46 return ".html"

48 @default("template_name")

49 def _template_name_default(self):

50 return "webpdf"

52 disable_sandbox = Bool(

53 False,

54 help="""

55 Disable chromium security sandbox when converting to PDF.

57 WARNING: This could cause arbitrary code execution in specific circumstances,

58 where JS in your notebook can execute serverside code! Please use with

59 caution.

61 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``

62 has more information.

64 This is required for webpdf to work inside most container environments.

65 """,

66 ).tag(config=True)

68 def _check_launch_reqs(self):

69 try:

70 from pyppeteer import launch # type: ignore[import]

71 from pyppeteer.util import check_chromium # type:ignore

72 except ModuleNotFoundError as e:

73 msg = (

74 "Pyppeteer is not installed to support Web PDF conversion. "

75 "Please install `nbconvert[webpdf]` to enable."

76 )

77 raise RuntimeError(msg) from e

78 if not self.allow_chromium_download and not check_chromium():

79 msg = (

80 "No suitable chromium executable found on the system. "

81 "Please use '--allow-chromium-download' to allow downloading one."

82 )

83 raise RuntimeError(msg)

84 return launch

86 def run_pyppeteer(self, html):

87 """Run pyppeteer."""

89 async def main(temp_file):

90 """Run main pyppeteer script."""

91 args = ["--no-sandbox"] if self.disable_sandbox else []

92 browser = await self._check_launch_reqs()(

93 handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False, args=args

94 )

95 page = await browser.newPage()

96 await page.emulateMedia("print")

97 await page.waitFor(100)

98 await page.goto(f"file://{temp_file.name}", waitUntil="networkidle0")

99 await page.waitFor(100)

100

101 pdf_params = {"printBackground": True}

102 if not self.paginate:

103 # Floating point precision errors cause the printed

104 # PDF from spilling over a new page by a pixel fraction.

105 dimensions = await page.evaluate(

106 """() => {

107 const rect = document.body.getBoundingClientRect();

108 return {

109 width: Math.ceil(rect.width) + 1,

110 height: Math.ceil(rect.height) + 1,

111 }

112 }"""

113 )

114 width = dimensions["width"]

115 height = dimensions["height"]

116 # 200 inches is the maximum size for Adobe Acrobat Reader.

117 pdf_params.update(

118 {

119 "width": min(width, 200 * 72),

120 "height": min(height, 200 * 72),

121 }

122 )

123 pdf_data = await page.pdf(pdf_params)

124

125 await browser.close()

126 return pdf_data

127

128 pool = concurrent.futures.ThreadPoolExecutor()

129 # Create a temporary file to pass the HTML code to Chromium:

130 # Unfortunately, tempfile on Windows does not allow for an already open

131 # file to be opened by a separate process. So we must close it first

132 # before calling Chromium. We also specify delete=False to ensure the

133 # file is not deleted after closing (the default behavior).

134 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)

135 with temp_file:

136 temp_file.write(html.encode("utf-8"))

137 try:

138 # TODO: when dropping Python 3.6, use

139 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result()

140 def run_coroutine(coro):

141 """Run an internal coroutine."""

142 loop = asyncio.new_event_loop()

143 asyncio.set_event_loop(loop)

144 return loop.run_until_complete(coro)

145

146 pdf_data = pool.submit(run_coroutine, main(temp_file)).result()

147 finally:

148 # Ensure the file is deleted even if pypeteer raises an exception

149 os.unlink(temp_file.name)

150 return pdf_data

151

152 def from_notebook_node(self, nb, resources=None, **kw):

153 """Convert from a notebook node."""

154 self._check_launch_reqs()

155 html, resources = super().from_notebook_node(nb, resources=resources, **kw)

156

157 self.log.info("Building PDF")

158 pdf_data = self.run_pyppeteer(html)

159 self.log.info("PDF successfully created")

160

161 # convert output extension to pdf

162 # the writer above required it to be html

163 resources["output_extension"] = ".pdf"

164

165 return pdf_data, resources