Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractoutput.py: 24%

1"""A preprocessor that extracts all of the outputs from the

2notebook file. The extracted outputs are returned in the 'resources' dictionary.

3"""

5# Copyright (c) IPython Development Team.

6# Distributed under the terms of the Modified BSD License.

8import json

9import os

10import sys

11from binascii import a2b_base64

12from mimetypes import guess_extension

13from textwrap import dedent

15from traitlets import Set, Unicode

17from .base import Preprocessor

20def guess_extension_without_jpe(mimetype):

21 """

22 This function fixes a problem with '.jpe' extensions

23 of jpeg images which are then not recognised by latex.

24 For any other case, the function works in the same way

25 as mimetypes.guess_extension

26 """

27 ext = guess_extension(mimetype)

28 if ext == ".jpe":

29 ext = ".jpeg"

30 return ext

33def platform_utf_8_encode(data):

34 """Encode data based on platform."""

35 if isinstance(data, str):

36 if sys.platform == "win32":

37 data = data.replace("\n", "\r\n")

38 data = data.encode("utf-8")

39 return data

42class ExtractOutputPreprocessor(Preprocessor):

43 """

44 Extracts all of the outputs from the notebook file. The extracted

45 outputs are returned in the 'resources' dictionary.

46 """

48 output_filename_template = Unicode("{unique_key}_{cell_index}_{index}{extension}").tag(

49 config=True

50 )

52 extract_output_types = Set({"image/png", "image/jpeg", "image/svg+xml", "application/pdf"}).tag(

53 config=True

54 )

56 def preprocess_cell(self, cell, resources, cell_index): # noqa

57 """

58 Apply a transformation on each cell,

60 Parameters

61 ----------

62 cell : NotebookNode cell

63 Notebook cell being processed

64 resources : dictionary

65 Additional resources used in the conversion process. Allows

66 preprocessors to pass variables into the Jinja engine.

67 cell_index : int

68 Index of the cell being processed (see base.py)

69 """

71 # Get the unique key from the resource dict if it exists. If it does not

72 # exist, use 'output' as the default. Also, get files directory if it

73 # has been specified

74 unique_key = resources.get("unique_key", "output")

75 output_files_dir = resources.get("output_files_dir", None)

77 # Make sure outputs key exists

78 if not isinstance(resources["outputs"], dict):

79 resources["outputs"] = {}

81 # Loop through all of the outputs in the cell

82 for index, out in enumerate(cell.get("outputs", [])):

83 if out.output_type not in {"display_data", "execute_result"}:

84 continue

85 if "text/html" in out.data:

86 out["data"]["text/html"] = dedent(out["data"]["text/html"])

87 # Get the output in data formats that the template needs extracted

88 for mime_type in self.extract_output_types:

89 if mime_type in out.data:

90 data = out.data[mime_type]

92 # Binary files are base64-encoded, SVG is already XML

93 if mime_type in {"image/png", "image/jpeg", "application/pdf"}:

94 # data is b64-encoded as text (str, unicode),

95 # we want the original bytes

96 data = a2b_base64(data)

97 elif mime_type == "application/json" or not isinstance(data, str):

98 # Data is either JSON-like and was parsed into a Python

99 # object according to the spec, or data is for sure

100 # JSON. In the latter case we want to go extra sure that

101 # we enclose a scalar string value into extra quotes by

102 # serializing it properly.

103 if isinstance(data, bytes):

104 # We need to guess the encoding in this

105 # instance. Some modules that return raw data like

106 # svg can leave the data in byte form instead of str

107 data = data.decode("utf-8")

108 data = platform_utf_8_encode(json.dumps(data))

109 else:

110 # All other text_type data will fall into this path

111 data = platform_utf_8_encode(data)

112

113 ext = guess_extension_without_jpe(mime_type)

114 if ext is None:

115 ext = "." + mime_type.rsplit("/")[-1]

116 if out.metadata.get("filename", ""):

117 filename = out.metadata["filename"]

118 if not filename.endswith(ext):

119 filename += ext

120 else:

121 filename = self.output_filename_template.format(

122 unique_key=unique_key, cell_index=cell_index, index=index, extension=ext

123 )

124

125 # On the cell, make the figure available via

126 # cell.outputs[i].metadata.filenames['mime/type']

127 # where

128 # cell.outputs[i].data['mime/type'] contains the data

129 if output_files_dir is not None:

130 filename = os.path.join(output_files_dir, filename)

131 out.metadata.setdefault("filenames", {})

132 out.metadata["filenames"][mime_type] = filename

133

134 if filename in resources["outputs"]:

135 msg = (

136 "Your outputs have filename metadata associated "

137 "with them. Nbconvert saves these outputs to "

138 "external files using this filename metadata. "

139 "Filenames need to be unique across the notebook, "

140 "or images will be overwritten. The filename {} is "

141 "associated with more than one output. The second "

142 "output associated with this filename is in cell "

143 "{}.".format(filename, cell_index)

144 )

145 raise ValueError(msg)

146 # In the resources, make the figure available via

147 # resources['outputs']['filename'] = data

148 resources["outputs"][filename] = data

149

150 return cell, resources