Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractoutput.py: 24%

59 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""A preprocessor that extracts all of the outputs from the 

2notebook file. The extracted outputs are returned in the 'resources' dictionary. 

3""" 

4 

5# Copyright (c) IPython Development Team. 

6# Distributed under the terms of the Modified BSD License. 

7 

8import json 

9import os 

10import sys 

11from binascii import a2b_base64 

12from mimetypes import guess_extension 

13from textwrap import dedent 

14 

15from traitlets import Set, Unicode 

16 

17from .base import Preprocessor 

18 

19 

20def guess_extension_without_jpe(mimetype): 

21 """ 

22 This function fixes a problem with '.jpe' extensions 

23 of jpeg images which are then not recognised by latex. 

24 For any other case, the function works in the same way 

25 as mimetypes.guess_extension 

26 """ 

27 ext = guess_extension(mimetype) 

28 if ext == ".jpe": 

29 ext = ".jpeg" 

30 return ext 

31 

32 

33def platform_utf_8_encode(data): 

34 """Encode data based on platform.""" 

35 if isinstance(data, str): 

36 if sys.platform == "win32": 

37 data = data.replace("\n", "\r\n") 

38 data = data.encode("utf-8") 

39 return data 

40 

41 

42class ExtractOutputPreprocessor(Preprocessor): 

43 """ 

44 Extracts all of the outputs from the notebook file. The extracted 

45 outputs are returned in the 'resources' dictionary. 

46 """ 

47 

48 output_filename_template = Unicode("{unique_key}_{cell_index}_{index}{extension}").tag( 

49 config=True 

50 ) 

51 

52 extract_output_types = Set({"image/png", "image/jpeg", "image/svg+xml", "application/pdf"}).tag( 

53 config=True 

54 ) 

55 

56 def preprocess_cell(self, cell, resources, cell_index): # noqa 

57 """ 

58 Apply a transformation on each cell, 

59 

60 Parameters 

61 ---------- 

62 cell : NotebookNode cell 

63 Notebook cell being processed 

64 resources : dictionary 

65 Additional resources used in the conversion process. Allows 

66 preprocessors to pass variables into the Jinja engine. 

67 cell_index : int 

68 Index of the cell being processed (see base.py) 

69 """ 

70 

71 # Get the unique key from the resource dict if it exists. If it does not 

72 # exist, use 'output' as the default. Also, get files directory if it 

73 # has been specified 

74 unique_key = resources.get("unique_key", "output") 

75 output_files_dir = resources.get("output_files_dir", None) 

76 

77 # Make sure outputs key exists 

78 if not isinstance(resources["outputs"], dict): 

79 resources["outputs"] = {} 

80 

81 # Loop through all of the outputs in the cell 

82 for index, out in enumerate(cell.get("outputs", [])): 

83 if out.output_type not in {"display_data", "execute_result"}: 

84 continue 

85 if "text/html" in out.data: 

86 out["data"]["text/html"] = dedent(out["data"]["text/html"]) 

87 # Get the output in data formats that the template needs extracted 

88 for mime_type in self.extract_output_types: 

89 if mime_type in out.data: 

90 data = out.data[mime_type] 

91 

92 # Binary files are base64-encoded, SVG is already XML 

93 if mime_type in {"image/png", "image/jpeg", "application/pdf"}: 

94 # data is b64-encoded as text (str, unicode), 

95 # we want the original bytes 

96 data = a2b_base64(data) 

97 elif mime_type == "application/json" or not isinstance(data, str): 

98 # Data is either JSON-like and was parsed into a Python 

99 # object according to the spec, or data is for sure 

100 # JSON. In the latter case we want to go extra sure that 

101 # we enclose a scalar string value into extra quotes by 

102 # serializing it properly. 

103 if isinstance(data, bytes): 

104 # We need to guess the encoding in this 

105 # instance. Some modules that return raw data like 

106 # svg can leave the data in byte form instead of str 

107 data = data.decode("utf-8") 

108 data = platform_utf_8_encode(json.dumps(data)) 

109 else: 

110 # All other text_type data will fall into this path 

111 data = platform_utf_8_encode(data) 

112 

113 ext = guess_extension_without_jpe(mime_type) 

114 if ext is None: 

115 ext = "." + mime_type.rsplit("/")[-1] 

116 if out.metadata.get("filename", ""): 

117 filename = out.metadata["filename"] 

118 if not filename.endswith(ext): 

119 filename += ext 

120 else: 

121 filename = self.output_filename_template.format( 

122 unique_key=unique_key, cell_index=cell_index, index=index, extension=ext 

123 ) 

124 

125 # On the cell, make the figure available via 

126 # cell.outputs[i].metadata.filenames['mime/type'] 

127 # where 

128 # cell.outputs[i].data['mime/type'] contains the data 

129 if output_files_dir is not None: 

130 filename = os.path.join(output_files_dir, filename) 

131 out.metadata.setdefault("filenames", {}) 

132 out.metadata["filenames"][mime_type] = filename 

133 

134 if filename in resources["outputs"]: 

135 msg = ( 

136 "Your outputs have filename metadata associated " 

137 "with them. Nbconvert saves these outputs to " 

138 "external files using this filename metadata. " 

139 "Filenames need to be unique across the notebook, " 

140 "or images will be overwritten. The filename {} is " 

141 "associated with more than one output. The second " 

142 "output associated with this filename is in cell " 

143 "{}.".format(filename, cell_index) 

144 ) 

145 raise ValueError(msg) 

146 # In the resources, make the figure available via 

147 # resources['outputs']['filename'] = data 

148 resources["outputs"][filename] = data 

149 

150 return cell, resources