1import lz4
2import io
3import os
4import builtins
5import sys
6from ._frame import ( # noqa: F401
7 compress,
8 decompress,
9 create_compression_context,
10 compress_begin,
11 compress_chunk,
12 compress_flush,
13 create_decompression_context,
14 reset_decompression_context,
15 decompress_chunk,
16 get_frame_info,
17 BLOCKSIZE_DEFAULT as _BLOCKSIZE_DEFAULT,
18 BLOCKSIZE_MAX64KB as _BLOCKSIZE_MAX64KB,
19 BLOCKSIZE_MAX256KB as _BLOCKSIZE_MAX256KB,
20 BLOCKSIZE_MAX1MB as _BLOCKSIZE_MAX1MB,
21 BLOCKSIZE_MAX4MB as _BLOCKSIZE_MAX4MB,
22 __doc__ as _doc
23)
24
25__doc__ = _doc
26
27try:
28 import _compression # Python 3.6 and later
29except ImportError:
30 from . import _compression
31
32
33BLOCKSIZE_DEFAULT = _BLOCKSIZE_DEFAULT
34"""Specifier for the default block size.
35
36Specifying ``block_size=lz4.frame.BLOCKSIZE_DEFAULT`` will instruct the LZ4
37library to use the default maximum blocksize. This is currently equivalent to
38`lz4.frame.BLOCKSIZE_MAX64KB`
39
40"""
41
42BLOCKSIZE_MAX64KB = _BLOCKSIZE_MAX64KB
43"""Specifier for a maximum block size of 64 kB.
44
45Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX64KB`` will instruct the LZ4
46library to create blocks containing a maximum of 64 kB of uncompressed data.
47
48"""
49
50BLOCKSIZE_MAX256KB = _BLOCKSIZE_MAX256KB
51"""Specifier for a maximum block size of 256 kB.
52
53Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX256KB`` will instruct the LZ4
54library to create blocks containing a maximum of 256 kB of uncompressed data.
55
56"""
57
58BLOCKSIZE_MAX1MB = _BLOCKSIZE_MAX1MB
59"""Specifier for a maximum block size of 1 MB.
60
61Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX1MB`` will instruct the LZ4
62library to create blocks containing a maximum of 1 MB of uncompressed data.
63
64"""
65
66BLOCKSIZE_MAX4MB = _BLOCKSIZE_MAX4MB
67"""Specifier for a maximum block size of 4 MB.
68
69Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX4MB`` will instruct the LZ4
70library to create blocks containing a maximum of 4 MB of uncompressed data.
71
72"""
73
74COMPRESSIONLEVEL_MIN = 0
75"""Specifier for the minimum compression level.
76
77Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MIN`` will
78instruct the LZ4 library to use a compression level of 0
79
80"""
81
82COMPRESSIONLEVEL_MINHC = 3
83"""Specifier for the minimum compression level for high compression mode.
84
85Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC`` will
86instruct the LZ4 library to use a compression level of 3, the minimum for the
87high compression mode.
88
89"""
90
91COMPRESSIONLEVEL_MAX = 16
92"""Specifier for the maximum compression level.
93
94Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MAX`` will
95instruct the LZ4 library to use a compression level of 16, the highest
96compression level available.
97
98"""
99
100
101class LZ4FrameCompressor(object):
102 """Create a LZ4 frame compressor object.
103
104 This object can be used to compress data incrementally.
105
106 Args:
107 block_size (int): Specifies the maximum blocksize to use.
108 Options:
109
110 - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default
111 - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB
112 - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB
113 - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB
114 - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB
115
116 If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT` which
117 is equal to `lz4.frame.BLOCKSIZE_MAX64KB`.
118 block_linked (bool): Specifies whether to use block-linked
119 compression. If ``True``, the compression ratio is improved,
120 especially for small block sizes. If ``False`` the blocks are
121 compressed independently. The default is ``True``.
122 compression_level (int): Specifies the level of compression used.
123 Values between 0-16 are valid, with 0 (default) being the
124 lowest compression (0-2 are the same value), and 16 the highest.
125 Values above 16 will be treated as 16.
126 Values between 4-9 are recommended. 0 is the default.
127 The following module constants are provided as a convenience:
128
129 - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0)
130 - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression (3)
131 - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16)
132
133 content_checksum (bool): Specifies whether to enable checksumming of
134 the payload content. If ``True``, a checksum of the uncompressed
135 data is stored at the end of the compressed frame which is checked
136 during decompression. The default is ``False``.
137 block_checksum (bool): Specifies whether to enable checksumming of
138 the content of each block. If ``True`` a checksum of the
139 uncompressed data in each block in the frame is stored at the end
140 of each block. If present, these checksums will be used to
141 validate the data during decompression. The default is ``False``,
142 meaning block checksums are not calculated and stored. This
143 functionality is only supported if the underlying LZ4 library has
144 version >= 1.8.0. Attempting to set this value to ``True`` with a
145 version of LZ4 < 1.8.0 will cause a ``RuntimeError`` to be raised.
146 auto_flush (bool): When ``False``, the LZ4 library may buffer data
147 until a block is full. When ``True`` no buffering occurs, and
148 partially full blocks may be returned. The default is ``False``.
149 return_bytearray (bool): When ``False`` a ``bytes`` object is returned
150 from the calls to methods of this class. When ``True`` a
151 ``bytearray`` object will be returned. The default is ``False``.
152
153 """
154
155 def __init__(self,
156 block_size=BLOCKSIZE_DEFAULT,
157 block_linked=True,
158 compression_level=COMPRESSIONLEVEL_MIN,
159 content_checksum=False,
160 block_checksum=False,
161 auto_flush=False,
162 return_bytearray=False):
163 self.block_size = block_size
164 self.block_linked = block_linked
165 self.compression_level = compression_level
166 self.content_checksum = content_checksum
167 if block_checksum and lz4.library_version_number() < 10800:
168 raise RuntimeError(
169 'Attempt to set block_checksum to True with LZ4 library'
170 'version < 10800'
171 )
172 self.block_checksum = block_checksum
173 self.auto_flush = auto_flush
174 self.return_bytearray = return_bytearray
175 self._context = None
176 self._started = False
177
178 def __enter__(self):
179 # All necessary initialization is done in __init__
180 return self
181
182 def __exit__(self, exception_type, exception, traceback):
183 self.block_size = None
184 self.block_linked = None
185 self.compression_level = None
186 self.content_checksum = None
187 self.block_checksum = None
188 self.auto_flush = None
189 self.return_bytearray = None
190 self._context = None
191 self._started = False
192
193 def begin(self, source_size=0):
194 """Begin a compression frame.
195
196 The returned data contains frame header information. The data returned
197 from subsequent calls to ``compress()`` should be concatenated with
198 this header.
199
200 Keyword Args:
201 source_size (int): Optionally specify the total size of the
202 uncompressed data. If specified, will be stored in the
203 compressed frame header as an 8-byte field for later use
204 during decompression. Default is 0 (no size stored).
205
206 Returns:
207 bytes or bytearray: frame header data
208
209 """
210
211 if self._started is False:
212 self._context = create_compression_context()
213 result = compress_begin(
214 self._context,
215 block_size=self.block_size,
216 block_linked=self.block_linked,
217 compression_level=self.compression_level,
218 content_checksum=self.content_checksum,
219 block_checksum=self.block_checksum,
220 auto_flush=self.auto_flush,
221 return_bytearray=self.return_bytearray,
222 source_size=source_size,
223 )
224 self._started = True
225 return result
226 else:
227 raise RuntimeError(
228 "LZ4FrameCompressor.begin() called after already initialized"
229 )
230
231 def compress(self, data): # noqa: F811
232 """Compresses data and returns it.
233
234 This compresses ``data`` (a ``bytes`` object), returning a bytes or
235 bytearray object containing compressed data the input.
236
237 If ``auto_flush`` has been set to ``False``, some of ``data`` may be
238 buffered internally, for use in later calls to
239 `LZ4FrameCompressor.compress()` and `LZ4FrameCompressor.flush()`.
240
241 The returned data should be concatenated with the output of any
242 previous calls to `compress()` and a single call to
243 `compress_begin()`.
244
245 Args:
246 data (str, bytes or buffer-compatible object): data to compress
247
248 Returns:
249 bytes or bytearray: compressed data
250
251 """
252 if self._context is None:
253 raise RuntimeError('compress called after flush()')
254
255 if self._started is False:
256 raise RuntimeError('compress called before compress_begin()')
257
258 result = compress_chunk(
259 self._context, data,
260 return_bytearray=self.return_bytearray
261 )
262
263 return result
264
265 def flush(self):
266 """Finish the compression process.
267
268 This returns a ``bytes`` or ``bytearray`` object containing any data
269 stored in the compressor's internal buffers and a frame footer.
270
271 The LZ4FrameCompressor instance may be re-used after this method has
272 been called to create a new frame of compressed data.
273
274 Returns:
275 bytes or bytearray: compressed data and frame footer.
276
277 """
278 result = compress_flush(
279 self._context,
280 end_frame=True,
281 return_bytearray=self.return_bytearray
282 )
283 self._context = None
284 self._started = False
285 return result
286
287 def reset(self):
288 """Reset the `LZ4FrameCompressor` instance.
289
290 This allows the `LZ4FrameCompression` instance to be re-used after an
291 error.
292
293 """
294 self._context = None
295 self._started = False
296
297 def has_context(self):
298 """Return whether the compression context exists.
299
300 Returns:
301 bool: ``True`` if the compression context exists, ``False``
302 otherwise.
303 """
304 return self._context is not None
305
306 def started(self):
307 """Return whether the compression frame has been started.
308
309 Returns:
310 bool: ``True`` if the compression frame has been started, ``False``
311 otherwise.
312 """
313 return self._started
314
315
316class LZ4FrameDecompressor(object):
317 """Create a LZ4 frame decompressor object.
318
319 This can be used to decompress data incrementally.
320
321 For a more convenient way of decompressing an entire compressed frame at
322 once, see `lz4.frame.decompress()`.
323
324 Args:
325 return_bytearray (bool): When ``False`` a bytes object is returned from
326 the calls to methods of this class. When ``True`` a bytearray
327 object will be returned. The default is ``False``.
328
329 Attributes:
330 eof (bool): ``True`` if the end-of-stream marker has been reached.
331 ``False`` otherwise.
332 unused_data (bytes): Data found after the end of the compressed stream.
333 Before the end of the frame is reached, this will be ``b''``.
334 needs_input (bool): ``False`` if the ``decompress()`` method can
335 provide more decompressed data before requiring new uncompressed
336 input. ``True`` otherwise.
337
338 """
339
340 def __init__(self, return_bytearray=False):
341 self._context = create_decompression_context()
342 self.eof = False
343 self.needs_input = True
344 self.unused_data = None
345 self._unconsumed_data = b''
346 self._return_bytearray = return_bytearray
347
348 def __enter__(self):
349 # All necessary initialization is done in __init__
350 return self
351
352 def __exit__(self, exception_type, exception, traceback):
353 self._context = None
354 self.eof = None
355 self.needs_input = None
356 self.unused_data = None
357 self._unconsumed_data = None
358 self._return_bytearray = None
359
360 def reset(self):
361 """Reset the decompressor state.
362
363 This is useful after an error occurs, allowing re-use of the instance.
364
365 """
366 reset_decompression_context(self._context)
367 self.eof = False
368 self.needs_input = True
369 self.unused_data = None
370 self._unconsumed_data = b''
371
372 def decompress(self, data, max_length=-1): # noqa: F811
373 """Decompresses part or all of an LZ4 frame of compressed data.
374
375 The returned data should be concatenated with the output of any
376 previous calls to `decompress()`.
377
378 If ``max_length`` is non-negative, returns at most ``max_length`` bytes
379 of decompressed data. If this limit is reached and further output can
380 be produced, the `needs_input` attribute will be set to ``False``. In
381 this case, the next call to `decompress()` may provide data as
382 ``b''`` to obtain more of the output. In all cases, any unconsumed data
383 from previous calls will be prepended to the input data.
384
385 If all of the input ``data`` was decompressed and returned (either
386 because this was less than ``max_length`` bytes, or because
387 ``max_length`` was negative), the `needs_input` attribute will be set
388 to ``True``.
389
390 If an end of frame marker is encountered in the data during
391 decompression, decompression will stop at the end of the frame, and any
392 data after the end of frame is available from the `unused_data`
393 attribute. In this case, the `LZ4FrameDecompressor` instance is reset
394 and can be used for further decompression.
395
396 Args:
397 data (str, bytes or buffer-compatible object): compressed data to
398 decompress
399
400 Keyword Args:
401 max_length (int): If this is non-negative, this method returns at
402 most ``max_length`` bytes of decompressed data.
403
404 Returns:
405 bytes: Uncompressed data
406
407 """
408 if not isinstance(data, (bytes, bytearray)):
409 data = memoryview(data).tobytes()
410
411 if self._unconsumed_data:
412 data = self._unconsumed_data + data
413
414 decompressed, bytes_read, eoframe = decompress_chunk(
415 self._context,
416 data,
417 max_length=max_length,
418 return_bytearray=self._return_bytearray,
419 )
420
421 if bytes_read < len(data):
422 if eoframe:
423 self.unused_data = data[bytes_read:]
424 else:
425 self._unconsumed_data = data[bytes_read:]
426 self.needs_input = False
427 else:
428 self._unconsumed_data = b''
429 self.needs_input = True
430 self.unused_data = None
431
432 self.eof = eoframe
433
434 return decompressed
435
436
437_MODE_CLOSED = 0
438_MODE_READ = 1
439# Value 2 no longer used
440_MODE_WRITE = 3
441
442
443class LZ4FrameFile(_compression.BaseStream):
444 """A file object providing transparent LZ4F (de)compression.
445
446 An LZ4FFile can act as a wrapper for an existing file object, or refer
447 directly to a named file on disk.
448
449 Note that LZ4FFile provides a *binary* file interface - data read is
450 returned as bytes, and data to be written must be given as bytes.
451
452 When opening a file for writing, the settings used by the compressor can be
453 specified. The underlying compressor object is
454 `lz4.frame.LZ4FrameCompressor`. See the docstrings for that class for
455 details on compression options.
456
457 Args:
458 filename(str, bytes, PathLike, file object): can be either an actual
459 file name (given as a str, bytes, or
460 PathLike object), in which case the named file is opened, or it
461 can be an existing file object to read from or write to.
462
463 Keyword Args:
464 mode(str): mode can be ``'r'`` for reading (default), ``'w'`` for
465 (over)writing, ``'x'`` for creating exclusively, or ``'a'``
466 for appending. These can equivalently be given as ``'rb'``,
467 ``'wb'``, ``'xb'`` and ``'ab'`` respectively.
468 return_bytearray (bool): When ``False`` a bytes object is returned from
469 the calls to methods of this class. When ``True`` a ``bytearray``
470 object will be returned. The default is ``False``.
471 source_size (int): Optionally specify the total size of the
472 uncompressed data. If specified, will be stored in the compressed
473 frame header as an 8-byte field for later use during decompression.
474 Default is ``0`` (no size stored). Only used for writing
475 compressed files.
476 block_size (int): Compressor setting. See
477 `lz4.frame.LZ4FrameCompressor`.
478 block_linked (bool): Compressor setting. See
479 `lz4.frame.LZ4FrameCompressor`.
480 compression_level (int): Compressor setting. See
481 `lz4.frame.LZ4FrameCompressor`.
482 content_checksum (bool): Compressor setting. See
483 `lz4.frame.LZ4FrameCompressor`.
484 block_checksum (bool): Compressor setting. See
485 `lz4.frame.LZ4FrameCompressor`.
486 auto_flush (bool): Compressor setting. See
487 `lz4.frame.LZ4FrameCompressor`.
488
489 """
490
491 def __init__(self, filename=None, mode='r',
492 block_size=BLOCKSIZE_DEFAULT,
493 block_linked=True,
494 compression_level=COMPRESSIONLEVEL_MIN,
495 content_checksum=False,
496 block_checksum=False,
497 auto_flush=False,
498 return_bytearray=False,
499 source_size=0):
500
501 self._fp = None
502 self._closefp = False
503 self._mode = _MODE_CLOSED
504
505 if mode in ('r', 'rb'):
506 mode_code = _MODE_READ
507 elif mode in ('w', 'wb', 'a', 'ab', 'x', 'xb'):
508 mode_code = _MODE_WRITE
509 self._compressor = LZ4FrameCompressor(
510 block_size=block_size,
511 block_linked=block_linked,
512 compression_level=compression_level,
513 content_checksum=content_checksum,
514 block_checksum=block_checksum,
515 auto_flush=auto_flush,
516 return_bytearray=return_bytearray,
517 )
518 self._pos = 0
519 else:
520 raise ValueError('Invalid mode: {!r}'.format(mode))
521
522 if sys.version_info > (3, 6):
523 path_test = isinstance(filename, (str, bytes, os.PathLike))
524 else:
525 path_test = isinstance(filename, (str, bytes))
526
527 if path_test is True:
528 if 'b' not in mode:
529 mode += 'b'
530 self._fp = builtins.open(filename, mode)
531 self._closefp = True
532 self._mode = mode_code
533 elif hasattr(filename, 'read') or hasattr(filename, 'write'):
534 self._fp = filename
535 self._mode = mode_code
536 else:
537 raise TypeError(
538 'filename must be a str, bytes, file or PathLike object'
539 )
540
541 if self._mode == _MODE_READ:
542 raw = _compression.DecompressReader(self._fp, LZ4FrameDecompressor)
543 self._buffer = io.BufferedReader(raw)
544
545 if self._mode == _MODE_WRITE:
546 self._source_size = source_size
547 self._fp.write(self._compressor.begin(source_size=source_size))
548
549 def close(self):
550 """Flush and close the file.
551
552 May be called more than once without error. Once the file is
553 closed, any other operation on it will raise a ValueError.
554 """
555 if self._mode == _MODE_CLOSED:
556 return
557 try:
558 if self._mode == _MODE_READ:
559 self._buffer.close()
560 self._buffer = None
561 elif self._mode == _MODE_WRITE:
562 self.flush()
563 self._compressor = None
564 finally:
565 try:
566 if self._closefp:
567 self._fp.close()
568 finally:
569 self._fp = None
570 self._closefp = False
571 self._mode = _MODE_CLOSED
572
573 @property
574 def closed(self):
575 """Returns ``True`` if this file is closed.
576
577 Returns:
578 bool: ``True`` if the file is closed, ``False`` otherwise.
579
580 """
581 return self._mode == _MODE_CLOSED
582
583 def fileno(self):
584 """Return the file descriptor for the underlying file.
585
586 Returns:
587 file object: file descriptor for file.
588
589 """
590 self._check_not_closed()
591 return self._fp.fileno()
592
593 def seekable(self):
594 """Return whether the file supports seeking.
595
596 Returns:
597 bool: ``True`` if the file supports seeking, ``False`` otherwise.
598
599 """
600 return self.readable() and self._buffer.seekable()
601
602 def readable(self):
603 """Return whether the file was opened for reading.
604
605 Returns:
606 bool: ``True`` if the file was opened for reading, ``False``
607 otherwise.
608
609 """
610 self._check_not_closed()
611 return self._mode == _MODE_READ
612
613 def writable(self):
614 """Return whether the file was opened for writing.
615
616 Returns:
617 bool: ``True`` if the file was opened for writing, ``False``
618 otherwise.
619
620 """
621 self._check_not_closed()
622 return self._mode == _MODE_WRITE
623
624 def peek(self, size=-1):
625 """Return buffered data without advancing the file position.
626
627 Always returns at least one byte of data, unless at EOF. The exact
628 number of bytes returned is unspecified.
629
630 Returns:
631 bytes: uncompressed data
632
633 """
634 self._check_can_read()
635 # Relies on the undocumented fact that BufferedReader.peek() always
636 # returns at least one byte (except at EOF)
637 return self._buffer.peek(size)
638
639 def readall(self):
640 chunks = bytearray()
641
642 while True:
643 data = self.read(io.DEFAULT_BUFFER_SIZE)
644 chunks += data
645 if not data:
646 break
647
648 return bytes(chunks)
649
650 def read(self, size=-1):
651 """Read up to ``size`` uncompressed bytes from the file.
652
653 If ``size`` is negative or omitted, read until ``EOF`` is reached.
654 Returns ``b''`` if the file is already at ``EOF``.
655
656 Args:
657 size(int): If non-negative, specifies the maximum number of
658 uncompressed bytes to return.
659
660 Returns:
661 bytes: uncompressed data
662
663 """
664 self._check_can_read()
665
666 if size < 0 and sys.version_info >= (3, 10):
667 return self.readall()
668 return self._buffer.read(size)
669
670 def read1(self, size=-1):
671 """Read up to ``size`` uncompressed bytes.
672
673 This method tries to avoid making multiple reads from the underlying
674 stream.
675
676 This method reads up to a buffer's worth of data if ``size`` is
677 negative.
678
679 Returns ``b''`` if the file is at EOF.
680
681 Args:
682 size(int): If non-negative, specifies the maximum number of
683 uncompressed bytes to return.
684
685 Returns:
686 bytes: uncompressed data
687
688 """
689 self._check_can_read()
690 if size < 0:
691 size = io.DEFAULT_BUFFER_SIZE
692 return self._buffer.read1(size)
693
694 def readline(self, size=-1):
695 """Read a line of uncompressed bytes from the file.
696
697 The terminating newline (if present) is retained. If size is
698 non-negative, no more than size bytes will be read (in which case the
699 line may be incomplete). Returns b'' if already at EOF.
700
701 Args:
702 size(int): If non-negative, specifies the maximum number of
703 uncompressed bytes to return.
704
705 Returns:
706 bytes: uncompressed data
707
708 """
709 self._check_can_read()
710 return self._buffer.readline(size)
711
712 def write(self, data):
713 """Write a bytes object to the file.
714
715 Returns the number of uncompressed bytes written, which is
716 always the length of data in bytes. Note that due to buffering,
717 the file on disk may not reflect the data written until close()
718 is called.
719
720 Args:
721 data(bytes): uncompressed data to compress and write to the file
722
723 Returns:
724 int: the number of uncompressed bytes written to the file
725
726 """
727 if isinstance(data, (bytes, bytearray)):
728 length = len(data)
729 else:
730 # accept any data that supports the buffer protocol
731 data = memoryview(data)
732 length = data.nbytes
733
734 self._check_can_write()
735
736 if not self._compressor.started():
737 header = self._compressor.begin(source_size=self._source_size)
738 self._fp.write(header)
739
740 compressed = self._compressor.compress(data)
741 self._fp.write(compressed)
742 self._pos += length
743 return length
744
745 def flush(self):
746 """Flush the file, keeping it open.
747
748 May be called more than once without error. The file may continue
749 to be used normally after flushing.
750 """
751 if self.writable() and self._compressor.has_context():
752 self._fp.write(self._compressor.flush())
753 self._fp.flush()
754
755 def seek(self, offset, whence=io.SEEK_SET):
756 """Change the file position.
757
758 The new position is specified by ``offset``, relative to the position
759 indicated by ``whence``. Possible values for ``whence`` are:
760
761 - ``io.SEEK_SET`` or 0: start of stream (default): offset must not be
762 negative
763 - ``io.SEEK_CUR`` or 1: current stream position
764 - ``io.SEEK_END`` or 2: end of stream; offset must not be positive
765
766 Returns the new file position.
767
768 Note that seeking is emulated, so depending on the parameters, this
769 operation may be extremely slow.
770
771 Args:
772 offset(int): new position in the file
773 whence(int): position with which ``offset`` is measured. Allowed
774 values are 0, 1, 2. The default is 0 (start of stream).
775
776 Returns:
777 int: new file position
778
779 """
780 self._check_can_seek()
781 return self._buffer.seek(offset, whence)
782
783 def tell(self):
784 """Return the current file position.
785
786 Args:
787 None
788
789 Returns:
790 int: file position
791
792 """
793 self._check_not_closed()
794 if self._mode == _MODE_READ:
795 return self._buffer.tell()
796 return self._pos
797
798
799def open(filename, mode="rb",
800 encoding=None,
801 errors=None,
802 newline=None,
803 block_size=BLOCKSIZE_DEFAULT,
804 block_linked=True,
805 compression_level=COMPRESSIONLEVEL_MIN,
806 content_checksum=False,
807 block_checksum=False,
808 auto_flush=False,
809 return_bytearray=False,
810 source_size=0):
811 """Open an LZ4Frame-compressed file in binary or text mode.
812
813 ``filename`` can be either an actual file name (given as a str, bytes, or
814 PathLike object), in which case the named file is opened, or it can be an
815 existing file object to read from or write to.
816
817 The ``mode`` argument can be ``'r'``, ``'rb'`` (default), ``'w'``,
818 ``'wb'``, ``'x'``, ``'xb'``, ``'a'``, or ``'ab'`` for binary mode, or
819 ``'rt'``, ``'wt'``, ``'xt'``, or ``'at'`` for text mode.
820
821 For binary mode, this function is equivalent to the `LZ4FrameFile`
822 constructor: `LZ4FrameFile(filename, mode, ...)`.
823
824 For text mode, an `LZ4FrameFile` object is created, and wrapped in an
825 ``io.TextIOWrapper`` instance with the specified encoding, error handling
826 behavior, and line ending(s).
827
828 Args:
829 filename (str, bytes, os.PathLike): file name or file object to open
830
831 Keyword Args:
832 mode (str): mode for opening the file
833 encoding (str): the name of the encoding that will be used for
834 encoding/deconging the stream. It defaults to
835 ``locale.getpreferredencoding(False)``. See ``io.TextIOWrapper``
836 for further details.
837 errors (str): specifies how encoding and decoding errors are to be
838 handled. See ``io.TextIOWrapper`` for further details.
839 newline (str): controls how line endings are handled. See
840 ``io.TextIOWrapper`` for further details.
841 return_bytearray (bool): When ``False`` a bytes object is returned
842 from the calls to methods of this class. When ``True`` a bytearray
843 object will be returned. The default is ``False``.
844 source_size (int): Optionally specify the total size of the
845 uncompressed data. If specified, will be stored in the compressed
846 frame header as an 8-byte field for later use during decompression.
847 Default is 0 (no size stored). Only used for writing compressed
848 files.
849 block_size (int): Compressor setting. See
850 `lz4.frame.LZ4FrameCompressor`.
851 block_linked (bool): Compressor setting. See
852 `lz4.frame.LZ4FrameCompressor`.
853 compression_level (int): Compressor setting. See
854 `lz4.frame.LZ4FrameCompressor`.
855 content_checksum (bool): Compressor setting. See
856 `lz4.frame.LZ4FrameCompressor`.
857 block_checksum (bool): Compressor setting. See
858 `lz4.frame.LZ4FrameCompressor`.
859 auto_flush (bool): Compressor setting. See
860 `lz4.frame.LZ4FrameCompressor`.
861
862 """
863 if 't' in mode:
864 if 'b' in mode:
865 raise ValueError('Invalid mode: %r' % (mode,))
866 else:
867 if encoding is not None:
868 raise ValueError(
869 "Argument 'encoding' not supported in binary mode"
870 )
871 if errors is not None:
872 raise ValueError("Argument 'errors' not supported in binary mode")
873 if newline is not None:
874 raise ValueError("Argument 'newline' not supported in binary mode")
875
876 _mode = mode.replace('t', '')
877
878 binary_file = LZ4FrameFile(
879 filename,
880 mode=_mode,
881 block_size=block_size,
882 block_linked=block_linked,
883 compression_level=compression_level,
884 content_checksum=content_checksum,
885 block_checksum=block_checksum,
886 auto_flush=auto_flush,
887 return_bytearray=return_bytearray,
888 source_size=source_size,
889 )
890
891 if 't' in mode:
892 return io.TextIOWrapper(binary_file, encoding, errors, newline)
893 else:
894 return binary_file