Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/webpdf.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

75 statements  

1"""Export to PDF via a headless browser""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import asyncio 

7import concurrent.futures 

8import os 

9import subprocess 

10import sys 

11import tempfile 

12from importlib import util as importlib_util 

13 

14from traitlets import Bool, List, Unicode, default 

15 

16from .html import HTMLExporter 

17 

18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None 

19IS_WINDOWS = os.name == "nt" 

20 

21 

22class WebPDFExporter(HTMLExporter): 

23 """Writer designed to write to PDF files. 

24 

25 This inherits from :class:`HTMLExporter`. It creates the HTML using the 

26 template machinery, and then run playwright to create a pdf. 

27 """ 

28 

29 export_from_notebook = "PDF via HTML" 

30 

31 allow_chromium_download = Bool( 

32 False, 

33 help="Whether to allow downloading Chromium if no suitable version is found on the system.", 

34 ).tag(config=True) 

35 

36 paginate = Bool( 

37 True, 

38 help=""" 

39 Split generated notebook into multiple pages. 

40 

41 If False, a PDF with one long page will be generated. 

42 

43 Set to True to match behavior of LaTeX based PDF generator 

44 """, 

45 ).tag(config=True) 

46 

47 @default("file_extension") 

48 def _file_extension_default(self): 

49 return ".html" 

50 

51 @default("template_name") 

52 def _template_name_default(self): 

53 return "webpdf" 

54 

55 disable_sandbox = Bool( 

56 False, 

57 help=""" 

58 Disable chromium security sandbox when converting to PDF. 

59 

60 WARNING: This could cause arbitrary code execution in specific circumstances, 

61 where JS in your notebook can execute serverside code! Please use with 

62 caution. 

63 

64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox`` 

65 has more information. 

66 

67 This is required for webpdf to work inside most container environments. 

68 """, 

69 ).tag(config=True) 

70 

71 browser_args = List( 

72 Unicode(), 

73 help=""" 

74 Additional arguments to pass to the browser rendering to PDF. 

75 

76 These arguments will be passed directly to the browser launch method 

77 and can be used to customize browser behavior beyond the default settings. 

78 """, 

79 ).tag(config=True) 

80 

81 def run_playwright(self, html): 

82 """Run playwright.""" 

83 

84 async def main(temp_file): 

85 """Run main playwright script.""" 

86 

87 try: 

88 from playwright.async_api import ( # type: ignore[import-not-found] # noqa: PLC0415, 

89 async_playwright, 

90 ) 

91 except ModuleNotFoundError as e: 

92 msg = ( 

93 "Playwright is not installed to support Web PDF conversion. " 

94 "Please install `nbconvert[webpdf]` to enable." 

95 ) 

96 raise RuntimeError(msg) from e 

97 

98 if self.allow_chromium_download: 

99 cmd = [sys.executable, "-m", "playwright", "install", "chromium"] 

100 subprocess.check_call(cmd) # noqa: S603 

101 

102 playwright = await async_playwright().start() 

103 chromium = playwright.chromium 

104 

105 args = self.browser_args 

106 if self.disable_sandbox: 

107 args.append("--no-sandbox") 

108 

109 try: 

110 browser = await chromium.launch( 

111 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args 

112 ) 

113 except Exception as e: 

114 msg = ( 

115 "No suitable chromium executable found on the system. " 

116 "Please use '--allow-chromium-download' to allow downloading one," 

117 "or install it using `playwright install chromium`." 

118 ) 

119 await playwright.stop() 

120 raise RuntimeError(msg) from e 

121 

122 page = await browser.new_page() 

123 await page.emulate_media(media="print") 

124 await page.wait_for_timeout(100) 

125 await page.goto(f"file://{temp_file.name}", wait_until="networkidle") 

126 await page.wait_for_timeout(100) 

127 

128 pdf_params = {"print_background": True} 

129 if not self.paginate: 

130 # Floating point precision errors cause the printed 

131 # PDF from spilling over a new page by a pixel fraction. 

132 dimensions = await page.evaluate( 

133 """() => { 

134 const rect = document.body.getBoundingClientRect(); 

135 return { 

136 width: Math.ceil(rect.width) + 1, 

137 height: Math.ceil(rect.height) + 1, 

138 } 

139 }""" 

140 ) 

141 width = dimensions["width"] 

142 height = dimensions["height"] 

143 # 200 inches is the maximum size for Adobe Acrobat Reader. 

144 pdf_params.update( 

145 { 

146 "width": min(width, 200 * 72), 

147 "height": min(height, 200 * 72), 

148 } 

149 ) 

150 pdf_data = await page.pdf(**pdf_params) 

151 

152 await browser.close() 

153 await playwright.stop() 

154 return pdf_data 

155 

156 pool = concurrent.futures.ThreadPoolExecutor() 

157 # Create a temporary file to pass the HTML code to Chromium: 

158 # Unfortunately, tempfile on Windows does not allow for an already open 

159 # file to be opened by a separate process. So we must close it first 

160 # before calling Chromium. We also specify delete=False to ensure the 

161 # file is not deleted after closing (the default behavior). 

162 temp_file = tempfile.NamedTemporaryFile( # noqa: SIM115 

163 suffix=".html", delete=False 

164 ) 

165 with temp_file: 

166 temp_file.write(html.encode("utf-8")) 

167 try: 

168 pdf_data = pool.submit(asyncio.run, main(temp_file)).result() 

169 finally: 

170 # Ensure the file is deleted even if playwright raises an exception 

171 os.unlink(temp_file.name) 

172 return pdf_data 

173 

174 def from_notebook_node(self, nb, resources=None, **kw): 

175 """Convert from a notebook node.""" 

176 html, resources = super().from_notebook_node(nb, resources=resources, **kw) 

177 

178 self.log.info("Building PDF") 

179 pdf_data = self.run_playwright(html) 

180 self.log.info("PDF successfully created") 

181 

182 # convert output extension to pdf 

183 # the writer above required it to be html 

184 resources["output_extension"] = ".pdf" 

185 

186 return pdf_data, resources