1"""Python bindings to the Zstandard (zstd) compression library (RFC-8878)."""
2
3import sys
4if not ((3, 9) <= sys.version_info < (3, 14)):
5 raise RuntimeError(f"Unsupported Python version: {sys.version}")
6
7__all__ = (
8 # backports.zstd
9 'COMPRESSION_LEVEL_DEFAULT',
10 'compress',
11 'CompressionParameter',
12 'decompress',
13 'DecompressionParameter',
14 'finalize_dict',
15 'get_frame_info',
16 'Strategy',
17 'train_dict',
18
19 # backports.zstd._shutil
20 'register_shutil',
21
22 # backports.zstd._zstdfile
23 'open',
24 'ZstdFile',
25
26 # backports.zstd._zstd
27 'get_frame_size',
28 'zstd_version',
29 'zstd_version_info',
30 'ZstdCompressor',
31 'ZstdDecompressor',
32 'ZstdDict',
33 'ZstdError',
34)
35
36import backports.zstd._zstd as _zstd
37import enum
38from backports.zstd._zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
39 get_frame_size, zstd_version)
40from backports.zstd._zstdfile import ZstdFile, open, _nbytes
41
42# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
43zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
44 _zstd.zstd_version_number % 100)
45"""Version number of the runtime zstd library as a tuple of integers."""
46
47if zstd_version_info < (1, 4, 5):
48 raise RuntimeError("zstd version is too old")
49
50COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
51"""The default compression level for Zstandard, currently '3'."""
52
53
54class FrameInfo:
55 """Information about a Zstandard frame."""
56
57 __slots__ = 'decompressed_size', 'dictionary_id'
58
59 def __init__(self, decompressed_size, dictionary_id):
60 super().__setattr__('decompressed_size', decompressed_size)
61 super().__setattr__('dictionary_id', dictionary_id)
62
63 def __repr__(self):
64 return (f'FrameInfo(decompressed_size={self.decompressed_size}, '
65 f'dictionary_id={self.dictionary_id})')
66
67 def __setattr__(self, name, _):
68 raise AttributeError(f"can't set attribute {name!r}")
69
70
71def get_frame_info(frame_buffer):
72 """Get Zstandard frame information from a frame header.
73
74 *frame_buffer* is a bytes-like object. It should start from the beginning
75 of a frame, and needs to include at least the frame header (6 to 18 bytes).
76
77 The returned FrameInfo object has two attributes.
78 'decompressed_size' is the size in bytes of the data in the frame when
79 decompressed, or None when the decompressed size is unknown.
80 'dictionary_id' is an int in the range (0, 2**32). The special value 0
81 means that the dictionary ID was not recorded in the frame header,
82 the frame may or may not need a dictionary to be decoded,
83 and the ID of such a dictionary is not specified.
84 """
85 return FrameInfo(*_zstd.get_frame_info(frame_buffer))
86
87
88def train_dict(samples, dict_size):
89 """Return a ZstdDict representing a trained Zstandard dictionary.
90
91 *samples* is an iterable of samples, where a sample is a bytes-like
92 object representing a file.
93
94 *dict_size* is the dictionary's maximum size, in bytes.
95 """
96 if not isinstance(dict_size, int):
97 ds_cls = type(dict_size).__qualname__
98 raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.')
99
100 samples = tuple(samples)
101 chunks = b''.join(samples)
102 chunk_sizes = tuple(_nbytes(sample) for sample in samples)
103 if not chunks:
104 raise ValueError("samples contained no data; can't train dictionary.")
105 dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size)
106 return ZstdDict(dict_content)
107
108
109def finalize_dict(zstd_dict, /, samples, dict_size, level):
110 """Return a ZstdDict representing a finalized Zstandard dictionary.
111
112 Given a custom content as a basis for dictionary, and a set of samples,
113 finalize *zstd_dict* by adding headers and statistics according to the
114 Zstandard dictionary format.
115
116 You may compose an effective dictionary content by hand, which is used as
117 basis dictionary, and use some samples to finalize a dictionary. The basis
118 dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict.
119
120 *samples* is an iterable of samples, where a sample is a bytes-like object
121 representing a file.
122 *dict_size* is the dictionary's maximum size, in bytes.
123 *level* is the expected compression level. The statistics for each
124 compression level differ, so tuning the dictionary to the compression level
125 can provide improvements.
126 """
127
128 if not isinstance(zstd_dict, ZstdDict):
129 raise TypeError('zstd_dict argument should be a ZstdDict object.')
130 if not isinstance(dict_size, int):
131 raise TypeError('dict_size argument should be an int object.')
132 if not isinstance(level, int):
133 raise TypeError('level argument should be an int object.')
134
135 samples = tuple(samples)
136 chunks = b''.join(samples)
137 chunk_sizes = tuple(_nbytes(sample) for sample in samples)
138 if not chunks:
139 raise ValueError("The samples are empty content, can't finalize the "
140 "dictionary.")
141 dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks,
142 chunk_sizes, dict_size, level)
143 return ZstdDict(dict_content)
144
145
146def compress(data, level=None, options=None, zstd_dict=None):
147 """Return Zstandard compressed *data* as bytes.
148
149 *level* is an int specifying the compression level to use, defaulting to
150 COMPRESSION_LEVEL_DEFAULT ('3').
151 *options* is a dict object that contains advanced compression
152 parameters. See CompressionParameter for more on options.
153 *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
154 the function train_dict for how to train a ZstdDict on sample data.
155
156 For incremental compression, use a ZstdCompressor instead.
157 """
158 comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict)
159 return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME)
160
161
162def decompress(data, zstd_dict=None, options=None):
163 """Decompress one or more frames of Zstandard compressed *data*.
164
165 *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
166 the function train_dict for how to train a ZstdDict on sample data.
167 *options* is a dict object that contains advanced compression
168 parameters. See DecompressionParameter for more on options.
169
170 For incremental decompression, use a ZstdDecompressor instead.
171 """
172 results = []
173 while True:
174 decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict)
175 results.append(decomp.decompress(data))
176 if not decomp.eof:
177 raise ZstdError('Compressed data ended before the '
178 'end-of-stream marker was reached')
179 data = decomp.unused_data
180 if not data:
181 break
182 return b''.join(results)
183
184
185class CompressionParameter(enum.IntEnum):
186 """Compression parameters."""
187
188 compression_level = _zstd.ZSTD_c_compressionLevel
189 window_log = _zstd.ZSTD_c_windowLog
190 hash_log = _zstd.ZSTD_c_hashLog
191 chain_log = _zstd.ZSTD_c_chainLog
192 search_log = _zstd.ZSTD_c_searchLog
193 min_match = _zstd.ZSTD_c_minMatch
194 target_length = _zstd.ZSTD_c_targetLength
195 strategy = _zstd.ZSTD_c_strategy
196
197 enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
198 ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
199 ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
200 ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
201 ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog
202
203 content_size_flag = _zstd.ZSTD_c_contentSizeFlag
204 checksum_flag = _zstd.ZSTD_c_checksumFlag
205 dict_id_flag = _zstd.ZSTD_c_dictIDFlag
206
207 nb_workers = _zstd.ZSTD_c_nbWorkers
208 job_size = _zstd.ZSTD_c_jobSize
209 overlap_log = _zstd.ZSTD_c_overlapLog
210
211 def bounds(self):
212 """Return the (lower, upper) int bounds of a compression parameter.
213
214 Both the lower and upper bounds are inclusive.
215 """
216 return _zstd.get_param_bounds(self.value, is_compress=True)
217
218
219class DecompressionParameter(enum.IntEnum):
220 """Decompression parameters."""
221
222 window_log_max = _zstd.ZSTD_d_windowLogMax
223
224 def bounds(self):
225 """Return the (lower, upper) int bounds of a decompression parameter.
226
227 Both the lower and upper bounds are inclusive.
228 """
229 return _zstd.get_param_bounds(self.value, is_compress=False)
230
231
232class Strategy(enum.IntEnum):
233 """Compression strategies, listed from fastest to strongest.
234
235 Note that new strategies might be added in the future.
236 Only the order (from fast to strong) is guaranteed,
237 the numeric value might change.
238 """
239
240 fast = _zstd.ZSTD_fast
241 dfast = _zstd.ZSTD_dfast
242 greedy = _zstd.ZSTD_greedy
243 lazy = _zstd.ZSTD_lazy
244 lazy2 = _zstd.ZSTD_lazy2
245 btlazy2 = _zstd.ZSTD_btlazy2
246 btopt = _zstd.ZSTD_btopt
247 btultra = _zstd.ZSTD_btultra
248 btultra2 = _zstd.ZSTD_btultra2
249
250
251# Check validity of the CompressionParameter & DecompressionParameter types
252_zstd.set_parameter_types(CompressionParameter, DecompressionParameter)
253
254
255# Lazy loading
256def __getattr__(name):
257 if name == "register_shutil":
258 from backports.zstd._shutil import register_shutil
259 return register_shutil
260 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")