Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/webpdf.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

78 statements  

1"""Export to PDF via a headless browser""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import asyncio 

7import concurrent.futures 

8import os 

9import subprocess 

10import sys 

11import tempfile 

12from importlib import util as importlib_util 

13 

14from traitlets import Bool, Int, List, Unicode, default 

15 

16from .html import HTMLExporter 

17 

18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None 

19IS_WINDOWS = os.name == "nt" 

20 

21 

22class WebPDFExporter(HTMLExporter): 

23 """Writer designed to write to PDF files. 

24 

25 This inherits from :class:`HTMLExporter`. It creates the HTML using the 

26 template machinery, and then run playwright to create a pdf. 

27 """ 

28 

29 export_from_notebook = "PDF via HTML" 

30 

31 allow_chromium_download = Bool( 

32 False, 

33 help="Whether to allow downloading Chromium if no suitable version is found on the system.", 

34 ).tag(config=True) 

35 

36 paginate = Bool( 

37 True, 

38 help=""" 

39 Split generated notebook into multiple pages. 

40 

41 If False, a PDF with one long page will be generated. 

42 

43 Set to True to match behavior of LaTeX based PDF generator 

44 """, 

45 ).tag(config=True) 

46 

47 page_render_timeout = Int( 

48 100, 

49 help=""" 

50 Time to wait for the page to render before converting to PDF, in milliseconds. 

51 Increase this value if your notebook has a lot of complex JavaScript 

52 output that needs more time to load. 

53 """, 

54 ).tag(config=True) 

55 

56 @default("file_extension") 

57 def _file_extension_default(self): 

58 return ".pdf" 

59 

60 @default("template_extension") 

61 def _template_extension_default(self): 

62 # NOTE: we use .html.j2 so that the HTMLExporter can find the template 

63 return ".html.j2" 

64 

65 @default("template_name") 

66 def _template_name_default(self): 

67 return "webpdf" 

68 

69 disable_sandbox = Bool( 

70 False, 

71 help=""" 

72 Disable chromium security sandbox when converting to PDF. 

73 

74 WARNING: This could cause arbitrary code execution in specific circumstances, 

75 where JS in your notebook can execute serverside code! Please use with 

76 caution. 

77 

78 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox`` 

79 has more information. 

80 

81 This is required for webpdf to work inside most container environments. 

82 """, 

83 ).tag(config=True) 

84 

85 browser_args = List( 

86 Unicode(), 

87 help=""" 

88 Additional arguments to pass to the browser rendering to PDF. 

89 

90 These arguments will be passed directly to the browser launch method 

91 and can be used to customize browser behavior beyond the default settings. 

92 """, 

93 ).tag(config=True) 

94 

95 def run_playwright(self, html): 

96 """Run playwright.""" 

97 

98 async def main(temp_file): 

99 """Run main playwright script.""" 

100 

101 try: 

102 from playwright.async_api import ( # type: ignore[import-not-found] # noqa: PLC0415, 

103 async_playwright, 

104 ) 

105 except ModuleNotFoundError as e: 

106 msg = ( 

107 "Playwright is not installed to support Web PDF conversion. " 

108 "Please install `nbconvert[webpdf]` to enable." 

109 ) 

110 raise RuntimeError(msg) from e 

111 

112 if self.allow_chromium_download: 

113 cmd = [sys.executable, "-m", "playwright", "install", "chromium"] 

114 subprocess.check_call(cmd) # noqa: S603 

115 

116 playwright = await async_playwright().start() 

117 chromium = playwright.chromium 

118 

119 args = self.browser_args 

120 if self.disable_sandbox: 

121 args.append("--no-sandbox") 

122 

123 try: 

124 browser = await chromium.launch( 

125 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args 

126 ) 

127 except Exception as e: 

128 msg = ( 

129 "No suitable chromium executable found on the system. " 

130 "Please use '--allow-chromium-download' to allow downloading one," 

131 "or install it using `playwright install chromium`." 

132 ) 

133 await playwright.stop() 

134 raise RuntimeError(msg) from e 

135 

136 page = await browser.new_page() 

137 await page.emulate_media(media="print") 

138 await page.wait_for_timeout(100) 

139 await page.goto(f"file://{temp_file.name}", wait_until="networkidle") 

140 await page.wait_for_timeout(self.page_render_timeout) 

141 

142 pdf_params = {"print_background": True} 

143 if not self.paginate: 

144 # Floating point precision errors cause the printed 

145 # PDF from spilling over a new page by a pixel fraction. 

146 dimensions = await page.evaluate( 

147 """() => { 

148 const rect = document.body.getBoundingClientRect(); 

149 return { 

150 width: Math.ceil(rect.width) + 1, 

151 height: Math.ceil(rect.height) + 1, 

152 } 

153 }""" 

154 ) 

155 width = dimensions["width"] 

156 height = dimensions["height"] 

157 # 200 inches is the maximum size for Adobe Acrobat Reader. 

158 pdf_params.update( 

159 { 

160 "width": min(width, 200 * 72), 

161 "height": min(height, 200 * 72), 

162 } 

163 ) 

164 pdf_data = await page.pdf(**pdf_params) 

165 

166 await browser.close() 

167 await playwright.stop() 

168 return pdf_data 

169 

170 pool = concurrent.futures.ThreadPoolExecutor() 

171 # Create a temporary file to pass the HTML code to Chromium: 

172 # Unfortunately, tempfile on Windows does not allow for an already open 

173 # file to be opened by a separate process. So we must close it first 

174 # before calling Chromium. We also specify delete=False to ensure the 

175 # file is not deleted after closing (the default behavior). 

176 temp_file = tempfile.NamedTemporaryFile( # noqa: SIM115 

177 suffix=".html", delete=False 

178 ) 

179 with temp_file: 

180 temp_file.write(html.encode("utf-8")) 

181 try: 

182 pdf_data = pool.submit(asyncio.run, main(temp_file)).result() 

183 finally: 

184 # Ensure the file is deleted even if playwright raises an exception 

185 os.unlink(temp_file.name) 

186 return pdf_data 

187 

188 def from_notebook_node(self, nb, resources=None, **kw): 

189 """Convert from a notebook node.""" 

190 html, resources = super().from_notebook_node(nb, resources=resources, **kw) 

191 

192 self.log.info("Building PDF") 

193 pdf_data = self.run_playwright(html) 

194 self.log.info("PDF successfully created") 

195 

196 return pdf_data, resources