Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bz2file.py: 60%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Module for reading and writing bzip2-compressed files.
3This module contains a backport of Python 3.4's bz2.open() function and
4BZ2File class, adapted to work with earlier versions of Python.
5"""
7__all__ = ["BZ2File", "open"]
9__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11import io
12import sys
13import warnings
15try:
16 from threading import RLock
17except ImportError:
18 from dummy_threading import RLock
20from bz2 import BZ2Compressor, BZ2Decompressor
23_MODE_CLOSED = 0
24_MODE_READ = 1
25_MODE_READ_EOF = 2
26_MODE_WRITE = 3
28_BUFFER_SIZE = 8192
30_STR_TYPES = (str, unicode) if (str is bytes) else (str, bytes)
32# The 'x' mode for open() was introduced in Python 3.3.
33_HAS_OPEN_X_MODE = sys.version_info[:2] >= (3, 3)
35_builtin_open = open
38class BZ2File(io.BufferedIOBase):
40 """A file object providing transparent bzip2 (de)compression.
42 A BZ2File can act as a wrapper for an existing file object, or refer
43 directly to a named file on disk.
45 Note that BZ2File provides a *binary* file interface - data read is
46 returned as bytes, and data to be written should be given as bytes.
47 """
49 def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
50 """Open a bzip2-compressed file.
52 If filename is a str, bytes or unicode object, it gives the name
53 of the file to be opened. Otherwise, it should be a file object,
54 which will be used to read or write the compressed data.
56 mode can be 'r' for reading (default), 'w' for (over)writing,
57 'x' for creating exclusively, or 'a' for appending. These can
58 equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
60 buffering is ignored. Its use is deprecated.
62 If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
63 and 9 specifying the level of compression: 1 produces the least
64 compression, and 9 (default) produces the most compression.
66 If mode is 'r', the input file may be the concatenation of
67 multiple compressed streams.
68 """
69 # This lock must be recursive, so that BufferedIOBase's
70 # readline(), readlines() and writelines() don't deadlock.
71 self._lock = RLock()
72 self._fp = None
73 self._closefp = False
74 self._mode = _MODE_CLOSED
75 self._pos = 0
76 self._size = -1
78 if buffering is not None:
79 warnings.warn("Use of 'buffering' argument is deprecated",
80 DeprecationWarning)
82 if not (1 <= compresslevel <= 9):
83 raise ValueError("compresslevel must be between 1 and 9")
85 if mode in ("", "r", "rb"):
86 mode = "rb"
87 mode_code = _MODE_READ
88 self._decompressor = BZ2Decompressor()
89 self._buffer = b""
90 self._buffer_offset = 0
91 elif mode in ("w", "wb"):
92 mode = "wb"
93 mode_code = _MODE_WRITE
94 self._compressor = BZ2Compressor(compresslevel)
95 elif mode in ("x", "xb") and _HAS_OPEN_X_MODE:
96 mode = "xb"
97 mode_code = _MODE_WRITE
98 self._compressor = BZ2Compressor(compresslevel)
99 elif mode in ("a", "ab"):
100 mode = "ab"
101 mode_code = _MODE_WRITE
102 self._compressor = BZ2Compressor(compresslevel)
103 else:
104 raise ValueError("Invalid mode: %r" % (mode,))
106 if isinstance(filename, _STR_TYPES):
107 self._fp = _builtin_open(filename, mode)
108 self._closefp = True
109 self._mode = mode_code
110 elif hasattr(filename, "read") or hasattr(filename, "write"):
111 self._fp = filename
112 self._mode = mode_code
113 else:
114 raise TypeError("filename must be a %s or %s object, or a file" %
115 (_STR_TYPES[0].__name__, _STR_TYPES[1].__name__))
117 def close(self):
118 """Flush and close the file.
120 May be called more than once without error. Once the file is
121 closed, any other operation on it will raise a ValueError.
122 """
123 with self._lock:
124 if self._mode == _MODE_CLOSED:
125 return
126 try:
127 if self._mode in (_MODE_READ, _MODE_READ_EOF):
128 self._decompressor = None
129 elif self._mode == _MODE_WRITE:
130 self._fp.write(self._compressor.flush())
131 self._compressor = None
132 finally:
133 try:
134 if self._closefp:
135 self._fp.close()
136 finally:
137 self._fp = None
138 self._closefp = False
139 self._mode = _MODE_CLOSED
140 self._buffer = b""
141 self._buffer_offset = 0
143 @property
144 def closed(self):
145 """True if this file is closed."""
146 return self._mode == _MODE_CLOSED
148 def fileno(self):
149 """Return the file descriptor for the underlying file."""
150 self._check_not_closed()
151 return self._fp.fileno()
153 def seekable(self):
154 """Return whether the file supports seeking."""
155 return self.readable() and (self._fp.seekable()
156 if hasattr(self._fp, "seekable")
157 else hasattr(self._fp, "seek"))
159 def readable(self):
160 """Return whether the file was opened for reading."""
161 self._check_not_closed()
162 return self._mode in (_MODE_READ, _MODE_READ_EOF)
164 def writable(self):
165 """Return whether the file was opened for writing."""
166 self._check_not_closed()
167 return self._mode == _MODE_WRITE
169 # Mode-checking helper functions.
171 def _check_not_closed(self):
172 if self.closed:
173 raise ValueError("I/O operation on closed file")
175 def _check_can_read(self):
176 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
177 self._check_not_closed()
178 raise io.UnsupportedOperation("File not open for reading")
180 def _check_can_write(self):
181 if self._mode != _MODE_WRITE:
182 self._check_not_closed()
183 raise io.UnsupportedOperation("File not open for writing")
185 def _check_can_seek(self):
186 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
187 self._check_not_closed()
188 raise io.UnsupportedOperation("Seeking is only supported "
189 "on files open for reading")
190 if hasattr(self._fp, "seekable") and not self._fp.seekable():
191 raise io.UnsupportedOperation("The underlying file object "
192 "does not support seeking")
194 # Fill the readahead buffer if it is empty. Returns False on EOF.
195 def _fill_buffer(self):
196 if self._mode == _MODE_READ_EOF:
197 return False
198 # Depending on the input data, our call to the decompressor may not
199 # return any data. In this case, try again after reading another block.
200 while self._buffer_offset == len(self._buffer):
201 rawblock = (self._decompressor.unused_data or
202 self._fp.read(_BUFFER_SIZE))
204 if not rawblock:
205 try:
206 self._decompressor.decompress(b"")
207 except EOFError:
208 # End-of-stream marker and end of file. We're good.
209 self._mode = _MODE_READ_EOF
210 self._size = self._pos
211 return False
212 else:
213 # Problem - we were expecting more compressed data.
214 raise EOFError("Compressed file ended before the "
215 "end-of-stream marker was reached")
217 try:
218 self._buffer = self._decompressor.decompress(rawblock)
219 except EOFError:
220 # Continue to next stream.
221 self._decompressor = BZ2Decompressor()
222 try:
223 self._buffer = self._decompressor.decompress(rawblock)
224 except IOError:
225 # Trailing data isn't a valid bzip2 stream. We're done here.
226 self._mode = _MODE_READ_EOF
227 self._size = self._pos
228 return False
229 self._buffer_offset = 0
230 return True
232 # Read data until EOF.
233 # If return_data is false, consume the data without returning it.
234 def _read_all(self, return_data=True):
235 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
236 self._buffer = self._buffer[self._buffer_offset:]
237 self._buffer_offset = 0
239 blocks = []
240 while self._fill_buffer():
241 if return_data:
242 blocks.append(self._buffer)
243 self._pos += len(self._buffer)
244 self._buffer = b""
245 if return_data:
246 return b"".join(blocks)
248 # Read a block of up to n bytes.
249 # If return_data is false, consume the data without returning it.
250 def _read_block(self, n, return_data=True):
251 # If we have enough data buffered, return immediately.
252 end = self._buffer_offset + n
253 if end <= len(self._buffer):
254 data = self._buffer[self._buffer_offset : end]
255 self._buffer_offset = end
256 self._pos += len(data)
257 return data if return_data else None
259 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
260 self._buffer = self._buffer[self._buffer_offset:]
261 self._buffer_offset = 0
263 blocks = []
264 while n > 0 and self._fill_buffer():
265 if n < len(self._buffer):
266 data = self._buffer[:n]
267 self._buffer_offset = n
268 else:
269 data = self._buffer
270 self._buffer = b""
271 if return_data:
272 blocks.append(data)
273 self._pos += len(data)
274 n -= len(data)
275 if return_data:
276 return b"".join(blocks)
278 def peek(self, n=0):
279 """Return buffered data without advancing the file position.
281 Always returns at least one byte of data, unless at EOF.
282 The exact number of bytes returned is unspecified.
283 """
284 with self._lock:
285 self._check_can_read()
286 if not self._fill_buffer():
287 return b""
288 return self._buffer[self._buffer_offset:]
290 def read(self, size=-1):
291 """Read up to size uncompressed bytes from the file.
293 If size is negative or omitted, read until EOF is reached.
294 Returns b'' if the file is already at EOF.
295 """
296 if size is None:
297 raise TypeError()
298 with self._lock:
299 self._check_can_read()
300 if size == 0:
301 return b""
302 elif size < 0:
303 return self._read_all()
304 else:
305 return self._read_block(size)
307 def read1(self, size=-1):
308 """Read up to size uncompressed bytes, while trying to avoid
309 making multiple reads from the underlying stream.
311 Returns b'' if the file is at EOF.
312 """
313 # Usually, read1() calls _fp.read() at most once. However, sometimes
314 # this does not give enough data for the decompressor to make progress.
315 # In this case we make multiple reads, to avoid returning b"".
316 with self._lock:
317 self._check_can_read()
318 if (size == 0 or
319 # Only call _fill_buffer() if the buffer is actually empty.
320 # This gives a significant speedup if *size* is small.
321 (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
322 return b""
323 if size > 0:
324 data = self._buffer[self._buffer_offset :
325 self._buffer_offset + size]
326 self._buffer_offset += len(data)
327 else:
328 data = self._buffer[self._buffer_offset:]
329 self._buffer = b""
330 self._buffer_offset = 0
331 self._pos += len(data)
332 return data
334 def readinto(self, b):
335 """Read up to len(b) bytes into b.
337 Returns the number of bytes read (0 for EOF).
338 """
339 with self._lock:
340 return io.BufferedIOBase.readinto(self, b)
342 def readline(self, size=-1):
343 """Read a line of uncompressed bytes from the file.
345 The terminating newline (if present) is retained. If size is
346 non-negative, no more than size bytes will be read (in which
347 case the line may be incomplete). Returns b'' if already at EOF.
348 """
349 if not isinstance(size, int):
350 if not hasattr(size, "__index__"):
351 raise TypeError("Integer argument expected")
352 size = size.__index__()
353 with self._lock:
354 self._check_can_read()
355 # Shortcut for the common case - the whole line is in the buffer.
356 if size < 0:
357 end = self._buffer.find(b"\n", self._buffer_offset) + 1
358 if end > 0:
359 line = self._buffer[self._buffer_offset : end]
360 self._buffer_offset = end
361 self._pos += len(line)
362 return line
363 return io.BufferedIOBase.readline(self, size)
365 def readlines(self, size=-1):
366 """Read a list of lines of uncompressed bytes from the file.
368 size can be specified to control the number of lines read: no
369 further lines will be read once the total size of the lines read
370 so far equals or exceeds size.
371 """
372 if not isinstance(size, int):
373 if not hasattr(size, "__index__"):
374 raise TypeError("Integer argument expected")
375 size = size.__index__()
376 with self._lock:
377 return io.BufferedIOBase.readlines(self, size)
379 def write(self, data):
380 """Write a byte string to the file.
382 Returns the number of uncompressed bytes written, which is
383 always len(data). Note that due to buffering, the file on disk
384 may not reflect the data written until close() is called.
385 """
386 with self._lock:
387 self._check_can_write()
388 compressed = self._compressor.compress(data)
389 self._fp.write(compressed)
390 self._pos += len(data)
391 return len(data)
393 def writelines(self, seq):
394 """Write a sequence of byte strings to the file.
396 Returns the number of uncompressed bytes written.
397 seq can be any iterable yielding byte strings.
399 Line separators are not added between the written byte strings.
400 """
401 with self._lock:
402 return io.BufferedIOBase.writelines(self, seq)
404 # Rewind the file to the beginning of the data stream.
405 def _rewind(self):
406 self._fp.seek(0, 0)
407 self._mode = _MODE_READ
408 self._pos = 0
409 self._decompressor = BZ2Decompressor()
410 self._buffer = b""
411 self._buffer_offset = 0
413 def seek(self, offset, whence=0):
414 """Change the file position.
416 The new position is specified by offset, relative to the
417 position indicated by whence. Values for whence are:
419 0: start of stream (default); offset must not be negative
420 1: current stream position
421 2: end of stream; offset must not be positive
423 Returns the new file position.
425 Note that seeking is emulated, so depending on the parameters,
426 this operation may be extremely slow.
427 """
428 with self._lock:
429 self._check_can_seek()
431 # Recalculate offset as an absolute file position.
432 if whence == 0:
433 pass
434 elif whence == 1:
435 offset = self._pos + offset
436 elif whence == 2:
437 # Seeking relative to EOF - we need to know the file's size.
438 if self._size < 0:
439 self._read_all(return_data=False)
440 offset = self._size + offset
441 else:
442 raise ValueError("Invalid value for whence: %s" % (whence,))
444 # Make it so that offset is the number of bytes to skip forward.
445 if offset < self._pos:
446 self._rewind()
447 else:
448 offset -= self._pos
450 # Read and discard data until we reach the desired position.
451 self._read_block(offset, return_data=False)
453 return self._pos
455 def tell(self):
456 """Return the current file position."""
457 with self._lock:
458 self._check_not_closed()
459 return self._pos
462def open(filename, mode="rb", compresslevel=9,
463 encoding=None, errors=None, newline=None):
464 """Open a bzip2-compressed file in binary or text mode.
466 The filename argument can be an actual filename (a str, bytes or unicode
467 object), or an existing file object to read from or write to.
469 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
470 "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
471 The default mode is "rb", and the default compresslevel is 9.
473 For binary mode, this function is equivalent to the BZ2File
474 constructor: BZ2File(filename, mode, compresslevel). In this case,
475 the encoding, errors and newline arguments must not be provided.
477 For text mode, a BZ2File object is created, and wrapped in an
478 io.TextIOWrapper instance with the specified encoding, error
479 handling behavior, and line ending(s).
481 """
482 if "t" in mode:
483 if "b" in mode:
484 raise ValueError("Invalid mode: %r" % (mode,))
485 else:
486 if encoding is not None:
487 raise ValueError("Argument 'encoding' not supported in binary mode")
488 if errors is not None:
489 raise ValueError("Argument 'errors' not supported in binary mode")
490 if newline is not None:
491 raise ValueError("Argument 'newline' not supported in binary mode")
493 bz_mode = mode.replace("t", "")
494 binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
496 if "t" in mode:
497 return io.TextIOWrapper(binary_file, encoding, errors, newline)
498 else:
499 return binary_file