Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/webpdf.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

79 statements  

1"""Export to PDF via a headless browser""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import asyncio 

7import concurrent.futures 

8import os 

9import subprocess 

10import sys 

11import tempfile 

12from importlib import util as importlib_util 

13 

14from traitlets import Bool, List, Unicode, default 

15 

16from .html import HTMLExporter 

17 

18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None 

19IS_WINDOWS = os.name == "nt" 

20 

21 

22class WebPDFExporter(HTMLExporter): 

23 """Writer designed to write to PDF files. 

24 

25 This inherits from :class:`HTMLExporter`. It creates the HTML using the 

26 template machinery, and then run playwright to create a pdf. 

27 """ 

28 

29 export_from_notebook = "PDF via HTML" 

30 

31 allow_chromium_download = Bool( 

32 False, 

33 help="Whether to allow downloading Chromium if no suitable version is found on the system.", 

34 ).tag(config=True) 

35 

36 paginate = Bool( 

37 True, 

38 help=""" 

39 Split generated notebook into multiple pages. 

40 

41 If False, a PDF with one long page will be generated. 

42 

43 Set to True to match behavior of LaTeX based PDF generator 

44 """, 

45 ).tag(config=True) 

46 

47 @default("file_extension") 

48 def _file_extension_default(self): 

49 return ".html" 

50 

51 @default("template_name") 

52 def _template_name_default(self): 

53 return "webpdf" 

54 

55 disable_sandbox = Bool( 

56 False, 

57 help=""" 

58 Disable chromium security sandbox when converting to PDF. 

59 

60 WARNING: This could cause arbitrary code execution in specific circumstances, 

61 where JS in your notebook can execute serverside code! Please use with 

62 caution. 

63 

64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox`` 

65 has more information. 

66 

67 This is required for webpdf to work inside most container environments. 

68 """, 

69 ).tag(config=True) 

70 

71 browser_args = List( 

72 Unicode(), 

73 help=""" 

74 Additional arguments to pass to the browser rendering to PDF. 

75 

76 These arguments will be passed directly to the browser launch method 

77 and can be used to customize browser behavior beyond the default settings. 

78 """, 

79 ).tag(config=True) 

80 

81 def run_playwright(self, html): 

82 """Run playwright.""" 

83 

84 async def main(temp_file): 

85 """Run main playwright script.""" 

86 

87 try: 

88 from playwright.async_api import async_playwright # type: ignore[import-not-found] 

89 except ModuleNotFoundError as e: 

90 msg = ( 

91 "Playwright is not installed to support Web PDF conversion. " 

92 "Please install `nbconvert[webpdf]` to enable." 

93 ) 

94 raise RuntimeError(msg) from e 

95 

96 if self.allow_chromium_download: 

97 cmd = [sys.executable, "-m", "playwright", "install", "chromium"] 

98 subprocess.check_call(cmd) # noqa: S603 

99 

100 playwright = await async_playwright().start() 

101 chromium = playwright.chromium 

102 

103 args = self.browser_args 

104 if self.disable_sandbox: 

105 args.append("--no-sandbox") 

106 

107 try: 

108 browser = await chromium.launch( 

109 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args 

110 ) 

111 except Exception as e: 

112 msg = ( 

113 "No suitable chromium executable found on the system. " 

114 "Please use '--allow-chromium-download' to allow downloading one," 

115 "or install it using `playwright install chromium`." 

116 ) 

117 await playwright.stop() 

118 raise RuntimeError(msg) from e 

119 

120 page = await browser.new_page() 

121 await page.emulate_media(media="print") 

122 await page.wait_for_timeout(100) 

123 await page.goto(f"file://{temp_file.name}", wait_until="networkidle") 

124 await page.wait_for_timeout(100) 

125 

126 pdf_params = {"print_background": True} 

127 if not self.paginate: 

128 # Floating point precision errors cause the printed 

129 # PDF from spilling over a new page by a pixel fraction. 

130 dimensions = await page.evaluate( 

131 """() => { 

132 const rect = document.body.getBoundingClientRect(); 

133 return { 

134 width: Math.ceil(rect.width) + 1, 

135 height: Math.ceil(rect.height) + 1, 

136 } 

137 }""" 

138 ) 

139 width = dimensions["width"] 

140 height = dimensions["height"] 

141 # 200 inches is the maximum size for Adobe Acrobat Reader. 

142 pdf_params.update( 

143 { 

144 "width": min(width, 200 * 72), 

145 "height": min(height, 200 * 72), 

146 } 

147 ) 

148 pdf_data = await page.pdf(**pdf_params) 

149 

150 await browser.close() 

151 await playwright.stop() 

152 return pdf_data 

153 

154 pool = concurrent.futures.ThreadPoolExecutor() 

155 # Create a temporary file to pass the HTML code to Chromium: 

156 # Unfortunately, tempfile on Windows does not allow for an already open 

157 # file to be opened by a separate process. So we must close it first 

158 # before calling Chromium. We also specify delete=False to ensure the 

159 # file is not deleted after closing (the default behavior). 

160 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False) 

161 with temp_file: 

162 temp_file.write(html.encode("utf-8")) 

163 try: 

164 # TODO: when dropping Python 3.6, use 

165 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result() 

166 def run_coroutine(coro): 

167 """Run an internal coroutine.""" 

168 loop = ( 

169 asyncio.ProactorEventLoop() # type:ignore[attr-defined] 

170 if IS_WINDOWS 

171 else asyncio.new_event_loop() 

172 ) 

173 

174 asyncio.set_event_loop(loop) 

175 return loop.run_until_complete(coro) 

176 

177 pdf_data = pool.submit(run_coroutine, main(temp_file)).result() 

178 finally: 

179 # Ensure the file is deleted even if playwright raises an exception 

180 os.unlink(temp_file.name) 

181 return pdf_data 

182 

183 def from_notebook_node(self, nb, resources=None, **kw): 

184 """Convert from a notebook node.""" 

185 html, resources = super().from_notebook_node(nb, resources=resources, **kw) 

186 

187 self.log.info("Building PDF") 

188 pdf_data = self.run_playwright(html) 

189 self.log.info("PDF successfully created") 

190 

191 # convert output extension to pdf 

192 # the writer above required it to be html 

193 resources["output_extension"] = ".pdf" 

194 

195 return pdf_data, resources