Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/exporters/webpdf.py: 29%

68 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""Export to PDF via a headless browser""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5 

6import asyncio 

7import concurrent.futures 

8import os 

9import tempfile 

10from importlib import util as importlib_util 

11 

12from traitlets import Bool, default 

13 

14from .html import HTMLExporter 

15 

16PYPPETEER_INSTALLED = importlib_util.find_spec("pyppeteer") is not None 

17 

18 

19class WebPDFExporter(HTMLExporter): 

20 """Writer designed to write to PDF files. 

21 

22 This inherits from :class:`HTMLExporter`. It creates the HTML using the 

23 template machinery, and then run pyppeteer to create a pdf. 

24 """ 

25 

26 export_from_notebook = "PDF via HTML" 

27 

28 allow_chromium_download = Bool( 

29 False, 

30 help="Whether to allow downloading Chromium if no suitable version is found on the system.", 

31 ).tag(config=True) 

32 

33 paginate = Bool( 

34 True, 

35 help=""" 

36 Split generated notebook into multiple pages. 

37 

38 If False, a PDF with one long page will be generated. 

39 

40 Set to True to match behavior of LaTeX based PDF generator 

41 """, 

42 ).tag(config=True) 

43 

44 @default("file_extension") 

45 def _file_extension_default(self): 

46 return ".html" 

47 

48 @default("template_name") 

49 def _template_name_default(self): 

50 return "webpdf" 

51 

52 disable_sandbox = Bool( 

53 False, 

54 help=""" 

55 Disable chromium security sandbox when converting to PDF. 

56 

57 WARNING: This could cause arbitrary code execution in specific circumstances, 

58 where JS in your notebook can execute serverside code! Please use with 

59 caution. 

60 

61 ``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox`` 

62 has more information. 

63 

64 This is required for webpdf to work inside most container environments. 

65 """, 

66 ).tag(config=True) 

67 

68 def _check_launch_reqs(self): 

69 try: 

70 from pyppeteer import launch # type: ignore[import] 

71 from pyppeteer.util import check_chromium # type:ignore 

72 except ModuleNotFoundError as e: 

73 msg = ( 

74 "Pyppeteer is not installed to support Web PDF conversion. " 

75 "Please install `nbconvert[webpdf]` to enable." 

76 ) 

77 raise RuntimeError(msg) from e 

78 if not self.allow_chromium_download and not check_chromium(): 

79 msg = ( 

80 "No suitable chromium executable found on the system. " 

81 "Please use '--allow-chromium-download' to allow downloading one." 

82 ) 

83 raise RuntimeError(msg) 

84 return launch 

85 

86 def run_pyppeteer(self, html): 

87 """Run pyppeteer.""" 

88 

89 async def main(temp_file): 

90 """Run main pyppeteer script.""" 

91 args = ["--no-sandbox"] if self.disable_sandbox else [] 

92 browser = await self._check_launch_reqs()( 

93 handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False, args=args 

94 ) 

95 page = await browser.newPage() 

96 await page.emulateMedia("print") 

97 await page.waitFor(100) 

98 await page.goto(f"file://{temp_file.name}", waitUntil="networkidle0") 

99 await page.waitFor(100) 

100 

101 pdf_params = {"printBackground": True} 

102 if not self.paginate: 

103 # Floating point precision errors cause the printed 

104 # PDF from spilling over a new page by a pixel fraction. 

105 dimensions = await page.evaluate( 

106 """() => { 

107 const rect = document.body.getBoundingClientRect(); 

108 return { 

109 width: Math.ceil(rect.width) + 1, 

110 height: Math.ceil(rect.height) + 1, 

111 } 

112 }""" 

113 ) 

114 width = dimensions["width"] 

115 height = dimensions["height"] 

116 # 200 inches is the maximum size for Adobe Acrobat Reader. 

117 pdf_params.update( 

118 { 

119 "width": min(width, 200 * 72), 

120 "height": min(height, 200 * 72), 

121 } 

122 ) 

123 pdf_data = await page.pdf(pdf_params) 

124 

125 await browser.close() 

126 return pdf_data 

127 

128 pool = concurrent.futures.ThreadPoolExecutor() 

129 # Create a temporary file to pass the HTML code to Chromium: 

130 # Unfortunately, tempfile on Windows does not allow for an already open 

131 # file to be opened by a separate process. So we must close it first 

132 # before calling Chromium. We also specify delete=False to ensure the 

133 # file is not deleted after closing (the default behavior). 

134 temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False) 

135 with temp_file: 

136 temp_file.write(html.encode("utf-8")) 

137 try: 

138 # TODO: when dropping Python 3.6, use 

139 # pdf_data = pool.submit(asyncio.run, main(temp_file)).result() 

140 def run_coroutine(coro): 

141 """Run an internal coroutine.""" 

142 loop = asyncio.new_event_loop() 

143 asyncio.set_event_loop(loop) 

144 return loop.run_until_complete(coro) 

145 

146 pdf_data = pool.submit(run_coroutine, main(temp_file)).result() 

147 finally: 

148 # Ensure the file is deleted even if pypeteer raises an exception 

149 os.unlink(temp_file.name) 

150 return pdf_data 

151 

152 def from_notebook_node(self, nb, resources=None, **kw): 

153 """Convert from a notebook node.""" 

154 self._check_launch_reqs() 

155 html, resources = super().from_notebook_node(nb, resources=resources, **kw) 

156 

157 self.log.info("Building PDF") 

158 pdf_data = self.run_pyppeteer(html) 

159 self.log.info("PDF successfully created") 

160 

161 # convert output extension to pdf 

162 # the writer above required it to be html 

163 resources["output_extension"] = ".pdf" 

164 

165 return pdf_data, resources