Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/preprocessors/extractoutput.py: 24%
59 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""A preprocessor that extracts all of the outputs from the
2notebook file. The extracted outputs are returned in the 'resources' dictionary.
3"""
5# Copyright (c) IPython Development Team.
6# Distributed under the terms of the Modified BSD License.
8import json
9import os
10import sys
11from binascii import a2b_base64
12from mimetypes import guess_extension
13from textwrap import dedent
15from traitlets import Set, Unicode
17from .base import Preprocessor
20def guess_extension_without_jpe(mimetype):
21 """
22 This function fixes a problem with '.jpe' extensions
23 of jpeg images which are then not recognised by latex.
24 For any other case, the function works in the same way
25 as mimetypes.guess_extension
26 """
27 ext = guess_extension(mimetype)
28 if ext == ".jpe":
29 ext = ".jpeg"
30 return ext
33def platform_utf_8_encode(data):
34 """Encode data based on platform."""
35 if isinstance(data, str):
36 if sys.platform == "win32":
37 data = data.replace("\n", "\r\n")
38 data = data.encode("utf-8")
39 return data
42class ExtractOutputPreprocessor(Preprocessor):
43 """
44 Extracts all of the outputs from the notebook file. The extracted
45 outputs are returned in the 'resources' dictionary.
46 """
48 output_filename_template = Unicode("{unique_key}_{cell_index}_{index}{extension}").tag(
49 config=True
50 )
52 extract_output_types = Set({"image/png", "image/jpeg", "image/svg+xml", "application/pdf"}).tag(
53 config=True
54 )
56 def preprocess_cell(self, cell, resources, cell_index): # noqa
57 """
58 Apply a transformation on each cell,
60 Parameters
61 ----------
62 cell : NotebookNode cell
63 Notebook cell being processed
64 resources : dictionary
65 Additional resources used in the conversion process. Allows
66 preprocessors to pass variables into the Jinja engine.
67 cell_index : int
68 Index of the cell being processed (see base.py)
69 """
71 # Get the unique key from the resource dict if it exists. If it does not
72 # exist, use 'output' as the default. Also, get files directory if it
73 # has been specified
74 unique_key = resources.get("unique_key", "output")
75 output_files_dir = resources.get("output_files_dir", None)
77 # Make sure outputs key exists
78 if not isinstance(resources["outputs"], dict):
79 resources["outputs"] = {}
81 # Loop through all of the outputs in the cell
82 for index, out in enumerate(cell.get("outputs", [])):
83 if out.output_type not in {"display_data", "execute_result"}:
84 continue
85 if "text/html" in out.data:
86 out["data"]["text/html"] = dedent(out["data"]["text/html"])
87 # Get the output in data formats that the template needs extracted
88 for mime_type in self.extract_output_types:
89 if mime_type in out.data:
90 data = out.data[mime_type]
92 # Binary files are base64-encoded, SVG is already XML
93 if mime_type in {"image/png", "image/jpeg", "application/pdf"}:
94 # data is b64-encoded as text (str, unicode),
95 # we want the original bytes
96 data = a2b_base64(data)
97 elif mime_type == "application/json" or not isinstance(data, str):
98 # Data is either JSON-like and was parsed into a Python
99 # object according to the spec, or data is for sure
100 # JSON. In the latter case we want to go extra sure that
101 # we enclose a scalar string value into extra quotes by
102 # serializing it properly.
103 if isinstance(data, bytes):
104 # We need to guess the encoding in this
105 # instance. Some modules that return raw data like
106 # svg can leave the data in byte form instead of str
107 data = data.decode("utf-8")
108 data = platform_utf_8_encode(json.dumps(data))
109 else:
110 # All other text_type data will fall into this path
111 data = platform_utf_8_encode(data)
113 ext = guess_extension_without_jpe(mime_type)
114 if ext is None:
115 ext = "." + mime_type.rsplit("/")[-1]
116 if out.metadata.get("filename", ""):
117 filename = out.metadata["filename"]
118 if not filename.endswith(ext):
119 filename += ext
120 else:
121 filename = self.output_filename_template.format(
122 unique_key=unique_key, cell_index=cell_index, index=index, extension=ext
123 )
125 # On the cell, make the figure available via
126 # cell.outputs[i].metadata.filenames['mime/type']
127 # where
128 # cell.outputs[i].data['mime/type'] contains the data
129 if output_files_dir is not None:
130 filename = os.path.join(output_files_dir, filename)
131 out.metadata.setdefault("filenames", {})
132 out.metadata["filenames"][mime_type] = filename
134 if filename in resources["outputs"]:
135 msg = (
136 "Your outputs have filename metadata associated "
137 "with them. Nbconvert saves these outputs to "
138 "external files using this filename metadata. "
139 "Filenames need to be unique across the notebook, "
140 "or images will be overwritten. The filename {} is "
141 "associated with more than one output. The second "
142 "output associated with this filename is in cell "
143 "{}.".format(filename, cell_index)
144 )
145 raise ValueError(msg)
146 # In the resources, make the figure available via
147 # resources['outputs']['filename'] = data
148 resources["outputs"][filename] = data
150 return cell, resources