1# Copyright (c) 2017-present, Gregory Szorc
2# All rights reserved.
3#
4# This software may be modified and distributed under the terms
5# of the BSD license. See the LICENSE file for details.
6
7"""Python interface to the Zstandard (zstd) compression library."""
8
9from __future__ import absolute_import, unicode_literals
10
11# This module serves 2 roles:
12#
13# 1) Export the C or CFFI "backend" through a central module.
14# 2) Implement additional functionality built on top of C or CFFI backend.
15
16import builtins
17import io
18import os
19import platform
20
21from typing import ByteString
22
23# Some Python implementations don't support C extensions. That's why we have
24# a CFFI implementation in the first place. The code here import one of our
25# "backends" then re-exports the symbols from this module. For convenience,
26# we support falling back to the CFFI backend if the C extension can't be
27# imported. But for performance reasons, we only do this on unknown Python
28# implementation. Notably, for CPython we require the C extension by default.
29# Because someone will inevitably want special behavior, the behavior is
30# configurable via an environment variable. A potentially better way to handle
31# this is to import a special ``__importpolicy__`` module or something
32# defining a variable and `setup.py` could write the file with whatever
33# policy was specified at build time. Until someone needs it, we go with
34# the hacky but simple environment variable approach.
35_module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default")
36
37if _module_policy == "default":
38 if platform.python_implementation() in ("CPython",):
39 from .backend_c import * # type: ignore
40
41 backend = "cext"
42 elif platform.python_implementation() in ("PyPy",):
43 from .backend_cffi import * # type: ignore
44
45 backend = "cffi"
46 else:
47 try:
48 from .backend_c import *
49
50 backend = "cext"
51 except ImportError:
52 from .backend_cffi import *
53
54 backend = "cffi"
55elif _module_policy == "cffi_fallback":
56 try:
57 from .backend_c import *
58
59 backend = "cext"
60 except ImportError:
61 from .backend_cffi import *
62
63 backend = "cffi"
64elif _module_policy == "rust":
65 from .backend_rust import * # type: ignore
66
67 backend = "rust"
68elif _module_policy == "cext":
69 from .backend_c import *
70
71 backend = "cext"
72elif _module_policy == "cffi":
73 from .backend_cffi import *
74
75 backend = "cffi"
76else:
77 raise ImportError(
78 "unknown module import policy: %s; use default, cffi_fallback, "
79 "cext, or cffi" % _module_policy
80 )
81
82# Keep this in sync with python-zstandard.h, rust-ext/src/lib.rs, and debian/changelog.
83__version__ = "0.23.0"
84
85_MODE_CLOSED = 0
86_MODE_READ = 1
87_MODE_WRITE = 2
88
89
90def open(
91 filename,
92 mode="rb",
93 cctx=None,
94 dctx=None,
95 encoding=None,
96 errors=None,
97 newline=None,
98 closefd=None,
99):
100 """Create a file object with zstd (de)compression.
101
102 The object returned from this function will be a
103 :py:class:`ZstdDecompressionReader` if opened for reading in binary mode,
104 a :py:class:`ZstdCompressionWriter` if opened for writing in binary mode,
105 or an ``io.TextIOWrapper`` if opened for reading or writing in text mode.
106
107 :param filename:
108 ``bytes``, ``str``, or ``os.PathLike`` defining a file to open or a
109 file object (with a ``read()`` or ``write()`` method).
110 :param mode:
111 ``str`` File open mode. Accepts any of the open modes recognized by
112 ``open()``.
113 :param cctx:
114 ``ZstdCompressor`` to use for compression. If not specified and file
115 is opened for writing, the default ``ZstdCompressor`` will be used.
116 :param dctx:
117 ``ZstdDecompressor`` to use for decompression. If not specified and file
118 is opened for reading, the default ``ZstdDecompressor`` will be used.
119 :param encoding:
120 ``str`` that defines text encoding to use when file is opened in text
121 mode.
122 :param errors:
123 ``str`` defining text encoding error handling mode.
124 :param newline:
125 ``str`` defining newline to use in text mode.
126 :param closefd:
127 ``bool`` whether to close the file when the returned object is closed.
128 Only used if a file object is passed. If a filename is specified, the
129 opened file is always closed when the returned object is closed.
130 """
131 normalized_mode = mode.replace("t", "")
132
133 if normalized_mode in ("r", "rb"):
134 dctx = dctx or ZstdDecompressor()
135 open_mode = "r"
136 raw_open_mode = "rb"
137 elif normalized_mode in ("w", "wb", "a", "ab", "x", "xb"):
138 cctx = cctx or ZstdCompressor()
139 open_mode = "w"
140 raw_open_mode = normalized_mode
141 if not raw_open_mode.endswith("b"):
142 raw_open_mode = raw_open_mode + "b"
143 else:
144 raise ValueError("Invalid mode: {!r}".format(mode))
145
146 if hasattr(os, "PathLike"):
147 types = (str, bytes, os.PathLike)
148 else:
149 types = (str, bytes)
150
151 if isinstance(filename, types): # type: ignore
152 inner_fh = builtins.open(filename, raw_open_mode)
153 closefd = True
154 elif hasattr(filename, "read") or hasattr(filename, "write"):
155 inner_fh = filename
156 closefd = bool(closefd)
157 else:
158 raise TypeError(
159 "filename must be a str, bytes, file or PathLike object"
160 )
161
162 if open_mode == "r":
163 fh = dctx.stream_reader(inner_fh, closefd=closefd)
164 elif open_mode == "w":
165 fh = cctx.stream_writer(inner_fh, closefd=closefd)
166 else:
167 raise RuntimeError("logic error in zstandard.open() handling open mode")
168
169 if "b" not in normalized_mode:
170 return io.TextIOWrapper(
171 fh, encoding=encoding, errors=errors, newline=newline
172 )
173 else:
174 return fh
175
176
177def compress(data: ByteString, level: int = 3) -> bytes:
178 """Compress source data using the zstd compression format.
179
180 This performs one-shot compression using basic/default compression
181 settings.
182
183 This method is provided for convenience and is equivalent to calling
184 ``ZstdCompressor(level=level).compress(data)``.
185
186 If you find yourself calling this function in a tight loop,
187 performance will be greater if you construct a single ``ZstdCompressor``
188 and repeatedly call ``compress()`` on it.
189 """
190 cctx = ZstdCompressor(level=level)
191
192 return cctx.compress(data)
193
194
195def decompress(data: ByteString, max_output_size: int = 0) -> bytes:
196 """Decompress a zstd frame into its original data.
197
198 This performs one-shot decompression using basic/default compression
199 settings.
200
201 This method is provided for convenience and is equivalent to calling
202 ``ZstdDecompressor().decompress(data, max_output_size=max_output_size)``.
203
204 If you find yourself calling this function in a tight loop, performance
205 will be greater if you construct a single ``ZstdDecompressor`` and
206 repeatedly call ``decompress()`` on it.
207 """
208 dctx = ZstdDecompressor()
209
210 return dctx.decompress(data, max_output_size=max_output_size)