1"""Base classes and utilities for readers and writers."""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5from __future__ import annotations
6
7from base64 import decodebytes, encodebytes
8
9
10def restore_bytes(nb):
11 """Restore bytes of image data from unicode-only formats.
12
13 Base64 encoding is handled elsewhere. Bytes objects in the notebook are
14 always b64-encoded. We DO NOT encode/decode around file formats.
15
16 Note: this is never used
17 """
18 for ws in nb.worksheets:
19 for cell in ws.cells:
20 if cell.cell_type == "code":
21 for output in cell.outputs:
22 if "png" in output:
23 output.png = output.png.encode("ascii", "replace")
24 if "jpeg" in output:
25 output.jpeg = output.jpeg.encode("ascii", "replace")
26 return nb
27
28
29# output keys that are likely to have multiline values
30_multiline_outputs = ["text", "html", "svg", "latex", "javascript", "json"]
31
32
33# FIXME: workaround for old splitlines()
34def _join_lines(lines):
35 """join lines that have been written by splitlines()
36
37 Has logic to protect against `splitlines()`, which
38 should have been `splitlines(True)`
39 """
40 if lines and lines[0].endswith(("\n", "\r")):
41 # created by splitlines(True)
42 return "".join(lines)
43 # created by splitlines()
44 return "\n".join(lines)
45
46
47def rejoin_lines(nb):
48 """rejoin multiline text into strings
49
50 For reversing effects of ``split_lines(nb)``.
51
52 This only rejoins lines that have been split, so if text objects were not split
53 they will pass through unchanged.
54
55 Used when reading JSON files that may have been passed through split_lines.
56 """
57 for ws in nb.worksheets:
58 for cell in ws.cells:
59 if cell.cell_type == "code":
60 if "input" in cell and isinstance(cell.input, list):
61 cell.input = _join_lines(cell.input)
62 for output in cell.outputs:
63 for key in _multiline_outputs:
64 item = output.get(key, None)
65 if isinstance(item, list):
66 output[key] = _join_lines(item)
67 else: # text, heading cell
68 for key in ["source", "rendered"]:
69 item = cell.get(key, None)
70 if isinstance(item, list):
71 cell[key] = _join_lines(item)
72 return nb
73
74
75def split_lines(nb):
76 """split likely multiline text into lists of strings
77
78 For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
79 reverse the effects of ``split_lines(nb)``.
80
81 Used when writing JSON files.
82 """
83 for ws in nb.worksheets:
84 for cell in ws.cells:
85 if cell.cell_type == "code":
86 if "input" in cell and isinstance(cell.input, str):
87 cell.input = cell.input.splitlines(True)
88 for output in cell.outputs:
89 for key in _multiline_outputs:
90 item = output.get(key, None)
91 if isinstance(item, str):
92 output[key] = item.splitlines(True)
93 else: # text, heading cell
94 for key in ["source", "rendered"]:
95 item = cell.get(key, None)
96 if isinstance(item, str):
97 cell[key] = item.splitlines(True)
98 return nb
99
100
101# b64 encode/decode are never actually used, because all bytes objects in
102# the notebook are already b64-encoded, and we don't need/want to double-encode
103
104
105def base64_decode(nb):
106 """Restore all bytes objects in the notebook from base64-encoded strings.
107
108 Note: This is never used
109 """
110 for ws in nb.worksheets:
111 for cell in ws.cells:
112 if cell.cell_type == "code":
113 for output in cell.outputs:
114 if "png" in output:
115 if isinstance(output.png, str):
116 output.png = output.png.encode("ascii")
117 output.png = decodebytes(output.png)
118 if "jpeg" in output:
119 if isinstance(output.jpeg, str):
120 output.jpeg = output.jpeg.encode("ascii")
121 output.jpeg = decodebytes(output.jpeg)
122 return nb
123
124
125def base64_encode(nb):
126 """Base64 encode all bytes objects in the notebook.
127
128 These will be b64-encoded unicode strings
129
130 Note: This is never used
131 """
132 for ws in nb.worksheets:
133 for cell in ws.cells:
134 if cell.cell_type == "code":
135 for output in cell.outputs:
136 if "png" in output:
137 output.png = encodebytes(output.png).decode("ascii")
138 if "jpeg" in output:
139 output.jpeg = encodebytes(output.jpeg).decode("ascii")
140 return nb
141
142
143def strip_transient(nb):
144 """Strip transient values that shouldn't be stored in files.
145
146 This should be called in *both* read and write.
147 """
148 nb.pop("orig_nbformat", None)
149 nb.pop("orig_nbformat_minor", None)
150 for ws in nb["worksheets"]:
151 for cell in ws["cells"]:
152 cell.get("metadata", {}).pop("trusted", None)
153 # strip cell.trusted even though it shouldn't be used,
154 # since it's where the transient value used to be stored.
155 cell.pop("trusted", None)
156 return nb
157
158
159class NotebookReader:
160 """A class for reading notebooks."""
161
162 def reads(self, s, **kwargs):
163 """Read a notebook from a string."""
164 msg = "loads must be implemented in a subclass"
165 raise NotImplementedError(msg)
166
167 def read(self, fp, **kwargs):
168 """Read a notebook from a file like object"""
169 nbs = fp.read()
170 return self.reads(nbs, **kwargs)
171
172
173class NotebookWriter:
174 """A class for writing notebooks."""
175
176 def writes(self, nb, **kwargs):
177 """Write a notebook to a string."""
178 msg = "loads must be implemented in a subclass"
179 raise NotImplementedError(msg)
180
181 def write(self, nb, fp, **kwargs):
182 """Write a notebook to a file like object"""
183 nbs = self.writes(nb, **kwargs)
184 return fp.write(nbs)