Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbconvert/exporters/webpdf.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

76 statements  

1"""Export to PDF via a headless browser""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import asyncio 

7import concurrent.futures 

8import os 

9import subprocess 

10import sys 

11import tempfile 

12from importlib import util as importlib_util 

13 

14from traitlets import Bool, default 

15 

16from .html import HTMLExporter 

17 

18PLAYWRIGHT_INSTALLED = importlib_util.find_spec("playwright") is not None 

19IS_WINDOWS = os.name == "nt" 

20 

21 

22class WebPDFExporter(HTMLExporter): 

23 """Writer designed to write to PDF files. 

24 

25 This inherits from :class:`HTMLExporter`. It creates the HTML using the 

26 template machinery, and then run playwright to create a pdf. 

27 """ 

28 

29 export_from_notebook = "PDF via HTML" 

30 

31 allow_chromium_download = Bool( 

32 False, 

33 help="Whether to allow downloading Chromium if no suitable version is found on the system.", 

34 ).tag(config=True) 

35 

36 paginate = Bool( 

37 True, 

38 help=""" 

39 Split generated notebook into multiple pages. 

40 

41 If False, a PDF with one long page will be generated. 

42 

43 Set to True to match behavior of LaTeX based PDF generator 

44 """, 

45 ).tag(config=True) 

46 

47 @default("file_extension") 

48 def _file_extension_default(self): 

49 return ".html" 

50 

51 @default("template_name") 

52 def _template_name_default(self): 

53 return "webpdf" 

54 

55 disable_sandbox = Bool( 

56 False, 

57 help=""" 

58 Disable chromium security sandbox when converting to PDF. 

59 

60 WARNING: This could cause arbitrary code execution in specific circumstances, 

61 where JS in your notebook can execute serverside code! Please use with 

62 caution. 

63 

64 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox`` 

65 has more information. 

66 

67 This is required for webpdf to work inside most container environments. 

68 """, 

69 ).tag(config=True) 

70 

71 def run_playwright(self, html): 

72 """Run playwright.""" 

73 

74 async def main(temp_file): 

75 """Run main playwright script.""" 

76 args = ["--no-sandbox"] if self.disable_sandbox else [] 

77 try: 

78 from playwright.async_api import async_playwright # type: ignore[import-not-found] 

79 except ModuleNotFoundError as e: 

80 msg = ( 

81 "Playwright is not installed to support Web PDF conversion. " 

82 "Please install `nbconvert[webpdf]` to enable." 

83 ) 

84 raise RuntimeError(msg) from e 

85 

86 if self.allow_chromium_download: 

87 cmd = [sys.executable, "-m", "playwright", "install", "chromium"] 

88 subprocess.check_call(cmd) # noqa: S603 

89 

90 playwright = await async_playwright().start() 

91 chromium = playwright.chromium 

92 

93 try: 

94 browser = await chromium.launch( 

95 handle_sigint=False, handle_sigterm=False, handle_sighup=False, args=args 

96 ) 

97 except Exception as e: 

98 msg = ( 

99 "No suitable chromium executable found on the system. " 

100 "Please use '--allow-chromium-download' to allow downloading one," 

101 "or install it using `playwright install chromium`." 

102 ) 

103 await playwright.stop() 

104 raise RuntimeError(msg) from e 

105 

106 page = await browser.new_page() 

107 await page.emulate_media(media="print") 

108 await page.wait_for_timeout(100) 

109 await page.goto(f"file://{temp_file.name}", wait_until="networkidle") 

110 await page.wait_for_timeout(100) 

111 

112 pdf_params = {"print_background": True} 

113 if not self.paginate: 

114 # Floating point precision errors cause the printed 

115 # PDF from spilling over a new page by a pixel fraction. 

116 dimensions = await page.evaluate( 

117 """() => { 

118 const rect = document.body.getBoundingClientRect(); 

119 return { 

120 width: Math.ceil(rect.width) + 1, 

121 height: Math.ceil(rect.height) + 1, 

122 } 

123 }""" 

124 ) 

125 width = dimensions["width"] 

126 height = dimensions["height"] 

127 # 200 inches is the maximum size for Adobe Acrobat Reader. 

128 pdf_params.update( 

129 { 

130 "width": min(width, 200 * 72), 

131 "height": min(height, 200 * 72), 

132 } 

133 ) 

134 pdf_data = await page.pdf(**pdf_params) 

135 

136 await browser.close() 

137 await playwright.stop() 

138 return pdf_data 

139 

140 pool = concurrent.futures.ThreadPoolExecutor() 

141 # Create a temporary file to pass the HTML code to Chromium: 

142 # Unfortunately, tempfile on Windows does not allow for an already open 

143 # file to be opened by a separate process. So we must close it first 

144 # before calling Chromium. We also specify delete=False to ensure the 

145 # file is not deleted after closing (the default behavior). 

146 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False) 

147 with temp_file: 

148 temp_file.write(html.encode("utf-8")) 

149 try: 

150 # TODO: when dropping Python 3.6, use 

151 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result() 

152 def run_coroutine(coro): 

153 """Run an internal coroutine.""" 

154 loop = ( 

155 asyncio.ProactorEventLoop() # type:ignore[attr-defined] 

156 if IS_WINDOWS 

157 else asyncio.new_event_loop() 

158 ) 

159 

160 asyncio.set_event_loop(loop) 

161 return loop.run_until_complete(coro) 

162 

163 pdf_data = pool.submit(run_coroutine, main(temp_file)).result() 

164 finally: 

165 # Ensure the file is deleted even if playwright raises an exception 

166 os.unlink(temp_file.name) 

167 return pdf_data 

168 

169 def from_notebook_node(self, nb, resources=None, **kw): 

170 """Convert from a notebook node.""" 

171 html, resources = super().from_notebook_node(nb, resources=resources, **kw) 

172 

173 self.log.info("Building PDF") 

174 pdf_data = self.run_playwright(html) 

175 self.log.info("PDF successfully created") 

176 

177 # convert output extension to pdf 

178 # the writer above required it to be html 

179 resources["output_extension"] = ".pdf" 

180 

181 return pdf_data, resources