Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyzstd/_zstdfile.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import io
2import warnings
3try:
4 from os import PathLike
5except ImportError:
6 # For Python 3.5
7 class PathLike:
8 pass
10from pyzstd import ZstdCompressor, _ZstdFileReader, \
11 _ZstdFileWriter, _ZSTD_DStreamSizes
13__all__ = ('ZstdFile', 'open')
15class _ZstdDecompressReader(io.RawIOBase):
16 """Adapt decompressor to RawIOBase reader API"""
18 def __init__(self, fp, zstd_dict, option, read_size):
19 self._fp = fp
20 self._decomp = _ZstdFileReader(fp, zstd_dict, option, read_size)
22 def close(self):
23 self._decomp = None
24 return super().close()
26 def readable(self):
27 return True
29 # Some file-like objects don't have .seekable(), invoke when necessary.
30 def seekable(self):
31 return self._fp.seekable()
33 def tell(self):
34 return self._decomp.pos
36 def readinto(self, b):
37 return self._decomp.readinto(b)
39 def readall(self):
40 return self._decomp.readall()
42 # If the new position is within io.BufferedReader's buffer,
43 # this method may not be called.
44 def seek(self, offset, whence=0):
45 # offset is absolute file position
46 if whence == 0: # SEEK_SET
47 pass
48 elif whence == 1: # SEEK_CUR
49 offset = self._decomp.pos + offset
50 elif whence == 2: # SEEK_END
51 if self._decomp.size < 0:
52 # Get file size
53 self._decomp.forward(None)
54 offset = self._decomp.size + offset
55 else:
56 raise ValueError("Invalid whence value: {}".format(whence))
58 # offset is bytes number to skip forward
59 if offset < self._decomp.pos:
60 # Rewind
61 self._decomp.eof = False
62 self._decomp.pos = 0
63 self._decomp.reset_session()
64 self._fp.seek(0)
65 else:
66 offset -= self._decomp.pos
67 # If offset <= 0, .forward() method does nothing.
68 self._decomp.forward(offset)
70 return self._decomp.pos
72_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1]
74_MODE_CLOSED = 0
75_MODE_READ = 1
76_MODE_WRITE = 2
78class _DeprecatedPlaceholder:
79 def __repr__(self):
80 return '<DEPRECATED>'
81_DEPRECATED_PLACEHOLDER = _DeprecatedPlaceholder()
83class ZstdFile(io.BufferedIOBase):
84 """A file object providing transparent zstd (de)compression.
86 A ZstdFile can act as a wrapper for an existing file object, or refer
87 directly to a named file on disk.
89 Note that ZstdFile provides a *binary* file interface - data read is
90 returned as bytes, and data to be written should be an object that
91 supports the Buffer Protocol.
92 """
93 FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK
94 FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME
96 _READER_CLASS = _ZstdDecompressReader
98 def __init__(self, filename, mode="r", *,
99 level_or_option=None, zstd_dict=None,
100 read_size=_DEPRECATED_PLACEHOLDER, write_size=_DEPRECATED_PLACEHOLDER):
101 """Open a zstd compressed file in binary mode.
103 filename can be either an actual file name (given as a str, bytes, or
104 PathLike object), in which case the named file is opened, or it can be
105 an existing file object to read from or write to.
107 mode can be "r" for reading (default), "w" for (over)writing, "x" for
108 creating exclusively, or "a" for appending. These can equivalently be
109 given as "rb", "wb", "xb" and "ab" respectively.
111 Parameters
112 level_or_option: When it's an int object, it represents compression
113 level. When it's a dict object, it contains advanced compression
114 parameters. Note, in read mode (decompression), it can only be a
115 dict object, that represents decompression option. It doesn't
116 support int type compression level in this case.
117 zstd_dict: A ZstdDict object, pre-trained dictionary for compression /
118 decompression.
119 """
120 if read_size == _DEPRECATED_PLACEHOLDER:
121 read_size = 131075
122 else:
123 warnings.warn("pyzstd.ZstdFile()'s read_size parameter is deprecated", DeprecationWarning, stacklevel=2)
124 if write_size == _DEPRECATED_PLACEHOLDER:
125 write_size = 131591
126 else:
127 warnings.warn("pyzstd.ZstdFile()'s write_size parameter is deprecated", DeprecationWarning, stacklevel=2)
129 self._fp = None
130 self._closefp = False
131 self._mode = _MODE_CLOSED
133 # Read or write mode
134 if mode in ("r", "rb"):
135 if not isinstance(level_or_option, (type(None), dict)):
136 raise TypeError(
137 ("In read mode (decompression), level_or_option argument "
138 "should be a dict object, that represents decompression "
139 "option. It doesn't support int type compression level "
140 "in this case."))
141 if write_size != 131591:
142 raise ValueError(
143 "write_size argument is only valid in write modes.")
144 mode_code = _MODE_READ
145 elif mode in ("w", "wb", "a", "ab", "x", "xb"):
146 if not isinstance(level_or_option, (type(None), int, dict)):
147 raise TypeError(("level_or_option argument "
148 "should be int or dict object."))
149 if read_size != 131075:
150 raise ValueError(
151 "read_size argument is only valid in read mode.")
152 mode_code = _MODE_WRITE
153 else:
154 raise ValueError("Invalid mode: {!r}".format(mode))
156 # File object
157 if isinstance(filename, (str, bytes, PathLike)):
158 if "b" not in mode:
159 mode += "b"
160 self._fp = io.open(filename, mode)
161 self._closefp = True
162 elif hasattr(filename, "read") or hasattr(filename, "write"):
163 self._fp = filename
164 else:
165 raise TypeError(("filename must be a str, bytes, "
166 "file or PathLike object"))
168 # Set ._mode here for ._closefp in .close(). If the following code
169 # fails, IOBase's cleanup code will call .close(), so that ._fp can
170 # be closed.
171 self._mode = mode_code
173 # Reader or writer
174 if mode_code == _MODE_READ:
175 raw = self._READER_CLASS(
176 self._fp,
177 zstd_dict=zstd_dict,
178 option=level_or_option,
179 read_size=read_size)
180 self._buffer = io.BufferedReader(raw, _ZSTD_DStreamOutSize)
181 elif mode_code == _MODE_WRITE:
182 self._pos = 0
183 self._writer = _ZstdFileWriter(
184 self._fp,
185 level_or_option=level_or_option,
186 zstd_dict=zstd_dict,
187 write_size=write_size)
189 def close(self):
190 """Flush and close the file.
192 May be called more than once without error. Once the file is
193 closed, any other operation on it will raise a ValueError.
194 """
195 if self._mode == _MODE_CLOSED:
196 return
198 try:
199 # In .__init__ method, if fails after setting ._mode attribute,
200 # these attributes don't exist.
201 if hasattr(self, "_buffer"):
202 try:
203 self._buffer.close()
204 finally:
205 # Set to None for ._check_mode()
206 self._buffer = None
207 elif hasattr(self, "_writer"):
208 try:
209 self.flush(self.FLUSH_FRAME)
210 finally:
211 # Set to None for ._check_mode()
212 self._writer = None
213 finally:
214 try:
215 if self._closefp:
216 self._fp.close()
217 finally:
218 self._fp = None
219 self._closefp = False
220 self._mode = _MODE_CLOSED
222 # None argument means the file should be closed
223 def _check_mode(self, expected_mode=None):
224 # If closed, raise ValueError.
225 if self._mode == _MODE_CLOSED:
226 raise ValueError("I/O operation on closed file")
228 # Check _MODE_READ/_MODE_WRITE mode
229 if expected_mode == _MODE_READ:
230 if self._mode != _MODE_READ:
231 raise io.UnsupportedOperation("File not open for reading")
232 elif expected_mode == _MODE_WRITE:
233 if self._mode != _MODE_WRITE:
234 raise io.UnsupportedOperation("File not open for writing")
236 # Re-raise other AttributeError exception
237 raise
239 # If modify this method, also modify SeekableZstdFile.write() method.
240 def write(self, data):
241 """Write a bytes-like object to the file.
243 Returns the number of uncompressed bytes written, which is
244 always the length of data in bytes. Note that due to buffering,
245 the file on disk may not reflect the data written until .flush()
246 or .close() is called.
247 """
248 # Compress & write
249 try:
250 input_size, _ = self._writer.write(data)
251 except AttributeError:
252 self._check_mode(_MODE_WRITE)
254 self._pos += input_size
255 return input_size
257 # If modify this method, also modify SeekableZstdFile.flush() method.
258 def flush(self, mode=FLUSH_BLOCK):
259 """Flush remaining data to the underlying stream.
261 The mode argument can be ZstdFile.FLUSH_BLOCK, ZstdFile.FLUSH_FRAME.
262 Abuse of this method will reduce compression ratio, use it only when
263 necessary.
265 If the program is interrupted afterwards, all data can be recovered.
266 To ensure saving to disk, also need to use os.fsync(fd).
268 This method does nothing in reading mode.
269 """
270 if self._mode != _MODE_WRITE:
271 # Like IOBase.flush(), do nothing in reading mode.
272 # TextIOWrapper.close() relies on this behavior.
273 if self._mode == _MODE_READ:
274 return
275 # Closed, raise ValueError.
276 self._check_mode()
278 # Flush zstd block/frame, and write.
279 self._writer.flush(mode)
281 def read(self, size=-1):
282 """Read up to size uncompressed bytes from the file.
284 If size is negative or omitted, read until EOF is reached.
285 Returns b"" if the file is already at EOF.
286 """
287 if size is None:
288 size = -1
289 try:
290 return self._buffer.read(size)
291 except AttributeError:
292 self._check_mode(_MODE_READ)
294 def read1(self, size=-1):
295 """Read up to size uncompressed bytes, while trying to avoid
296 making multiple reads from the underlying stream. Reads up to a
297 buffer's worth of data if size is negative.
299 Returns b"" if the file is at EOF.
300 """
301 if size < 0:
302 size = _ZSTD_DStreamOutSize
304 try:
305 return self._buffer.read1(size)
306 except AttributeError:
307 self._check_mode(_MODE_READ)
309 def readinto(self, b):
310 """Read bytes into b.
312 Returns the number of bytes read (0 for EOF).
313 """
314 try:
315 return self._buffer.readinto(b)
316 except AttributeError:
317 self._check_mode(_MODE_READ)
319 def readinto1(self, b):
320 """Read bytes into b, while trying to avoid making multiple reads
321 from the underlying stream.
323 Returns the number of bytes read (0 for EOF).
324 """
325 try:
326 return self._buffer.readinto1(b)
327 except AttributeError:
328 self._check_mode(_MODE_READ)
330 def readline(self, size=-1):
331 """Read a line of uncompressed bytes from the file.
333 The terminating newline (if present) is retained. If size is
334 non-negative, no more than size bytes will be read (in which
335 case the line may be incomplete). Returns b'' if already at EOF.
336 """
337 if size is None:
338 size = -1
339 try:
340 return self._buffer.readline(size)
341 except AttributeError:
342 self._check_mode(_MODE_READ)
344 def seek(self, offset, whence=io.SEEK_SET):
345 """Change the file position.
347 The new position is specified by offset, relative to the
348 position indicated by whence. Possible values for whence are:
350 0: start of stream (default): offset must not be negative
351 1: current stream position
352 2: end of stream; offset must not be positive
354 Returns the new file position.
356 Note that seeking is emulated, so depending on the arguments,
357 this operation may be extremely slow.
358 """
359 try:
360 # BufferedReader.seek() checks seekable
361 return self._buffer.seek(offset, whence)
362 except AttributeError:
363 self._check_mode(_MODE_READ)
365 def peek(self, size=-1):
366 """Return buffered data without advancing the file position.
368 Always returns at least one byte of data, unless at EOF.
369 The exact number of bytes returned is unspecified.
370 """
371 # Relies on the undocumented fact that BufferedReader.peek() always
372 # returns at least one byte (except at EOF)
373 try:
374 return self._buffer.peek(size)
375 except AttributeError:
376 self._check_mode(_MODE_READ)
378 def __iter__(self):
379 try:
380 self._buffer
381 except AttributeError:
382 self._check_mode(_MODE_READ)
383 return self
385 def __next__(self):
386 ret = self._buffer.readline()
387 if ret:
388 return ret
389 raise StopIteration
391 def tell(self):
392 """Return the current file position."""
393 if self._mode == _MODE_READ:
394 return self._buffer.tell()
395 elif self._mode == _MODE_WRITE:
396 return self._pos
398 # Closed, raise ValueError.
399 self._check_mode()
401 def fileno(self):
402 """Return the file descriptor for the underlying file."""
403 try:
404 return self._fp.fileno()
405 except AttributeError:
406 # Closed, raise ValueError.
407 self._check_mode()
409 @property
410 def name(self):
411 """Return the file name for the underlying file."""
412 try:
413 return self._fp.name
414 except AttributeError:
415 self._check_mode()
417 @property
418 def closed(self):
419 """True if this file is closed."""
420 return self._mode == _MODE_CLOSED
422 def writable(self):
423 """Return whether the file was opened for writing."""
424 if self._mode == _MODE_WRITE:
425 return True
426 elif self._mode == _MODE_READ:
427 return False
429 # Closed, raise ValueError.
430 self._check_mode()
432 def readable(self):
433 """Return whether the file was opened for reading."""
434 if self._mode == _MODE_READ:
435 return True
436 elif self._mode == _MODE_WRITE:
437 return False
439 # Closed, raise ValueError.
440 self._check_mode()
442 def seekable(self):
443 """Return whether the file supports seeking."""
444 if self._mode == _MODE_READ:
445 return self._buffer.seekable()
446 elif self._mode == _MODE_WRITE:
447 return False
449 # Closed, raise ValueError.
450 self._check_mode()
453# Copied from lzma module
454def open(filename, mode="rb", *, level_or_option=None, zstd_dict=None,
455 encoding=None, errors=None, newline=None):
456 """Open a zstd compressed file in binary or text mode.
458 filename can be either an actual file name (given as a str, bytes, or
459 PathLike object), in which case the named file is opened, or it can be an
460 existing file object to read from or write to.
462 The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a",
463 "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode.
465 The level_or_option and zstd_dict parameters specify the settings, as for
466 ZstdCompressor, ZstdDecompressor and ZstdFile.
468 When using read mode (decompression), the level_or_option parameter can
469 only be a dict object, that represents decompression option. It doesn't
470 support int type compression level in this case.
472 For binary mode, this function is equivalent to the ZstdFile constructor:
473 ZstdFile(filename, mode, ...). In this case, the encoding, errors and
474 newline parameters must not be provided.
476 For text mode, an ZstdFile object is created, and wrapped in an
477 io.TextIOWrapper instance with the specified encoding, error handling
478 behavior, and line ending(s).
479 """
481 if "t" in mode:
482 if "b" in mode:
483 raise ValueError("Invalid mode: %r" % (mode,))
484 else:
485 if encoding is not None:
486 raise ValueError("Argument 'encoding' not supported in binary mode")
487 if errors is not None:
488 raise ValueError("Argument 'errors' not supported in binary mode")
489 if newline is not None:
490 raise ValueError("Argument 'newline' not supported in binary mode")
492 zstd_mode = mode.replace("t", "")
493 binary_file = ZstdFile(filename, zstd_mode,
494 level_or_option=level_or_option, zstd_dict=zstd_dict)
496 if "t" in mode:
497 return io.TextIOWrapper(binary_file, encoding, errors, newline)
498 else:
499 return binary_file