Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/backports/zstd/__init__.py: 56%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

107 statements  

1"""Python bindings to the Zstandard (zstd) compression library (RFC-8878).""" 

2 

3import sys 

4if not ((3, 9) <= sys.version_info < (3, 14)): 

5 raise RuntimeError(f"Unsupported Python version: {sys.version}") 

6 

7__all__ = ( 

8 # backports.zstd 

9 'COMPRESSION_LEVEL_DEFAULT', 

10 'compress', 

11 'CompressionParameter', 

12 'decompress', 

13 'DecompressionParameter', 

14 'finalize_dict', 

15 'get_frame_info', 

16 'Strategy', 

17 'train_dict', 

18 

19 # backports.zstd._shutil 

20 'register_shutil', 

21 

22 # backports.zstd._zstdfile 

23 'open', 

24 'ZstdFile', 

25 

26 # backports.zstd._zstd 

27 'get_frame_size', 

28 'zstd_version', 

29 'zstd_version_info', 

30 'ZstdCompressor', 

31 'ZstdDecompressor', 

32 'ZstdDict', 

33 'ZstdError', 

34) 

35 

36import backports.zstd._zstd as _zstd 

37import enum 

38from backports.zstd._zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, 

39 get_frame_size, zstd_version) 

40from backports.zstd._zstdfile import ZstdFile, open, _nbytes 

41 

42# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) 

43zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), 

44 _zstd.zstd_version_number % 100) 

45"""Version number of the runtime zstd library as a tuple of integers.""" 

46 

47if zstd_version_info < (1, 4, 5): 

48 raise RuntimeError("zstd version is too old") 

49 

50COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT 

51"""The default compression level for Zstandard, currently '3'.""" 

52 

53 

54class FrameInfo: 

55 """Information about a Zstandard frame.""" 

56 

57 __slots__ = 'decompressed_size', 'dictionary_id' 

58 

59 def __init__(self, decompressed_size, dictionary_id): 

60 super().__setattr__('decompressed_size', decompressed_size) 

61 super().__setattr__('dictionary_id', dictionary_id) 

62 

63 def __repr__(self): 

64 return (f'FrameInfo(decompressed_size={self.decompressed_size}, ' 

65 f'dictionary_id={self.dictionary_id})') 

66 

67 def __setattr__(self, name, _): 

68 raise AttributeError(f"can't set attribute {name!r}") 

69 

70 

71def get_frame_info(frame_buffer): 

72 """Get Zstandard frame information from a frame header. 

73 

74 *frame_buffer* is a bytes-like object. It should start from the beginning 

75 of a frame, and needs to include at least the frame header (6 to 18 bytes). 

76 

77 The returned FrameInfo object has two attributes. 

78 'decompressed_size' is the size in bytes of the data in the frame when 

79 decompressed, or None when the decompressed size is unknown. 

80 'dictionary_id' is an int in the range (0, 2**32). The special value 0 

81 means that the dictionary ID was not recorded in the frame header, 

82 the frame may or may not need a dictionary to be decoded, 

83 and the ID of such a dictionary is not specified. 

84 """ 

85 return FrameInfo(*_zstd.get_frame_info(frame_buffer)) 

86 

87 

88def train_dict(samples, dict_size): 

89 """Return a ZstdDict representing a trained Zstandard dictionary. 

90 

91 *samples* is an iterable of samples, where a sample is a bytes-like 

92 object representing a file. 

93 

94 *dict_size* is the dictionary's maximum size, in bytes. 

95 """ 

96 if not isinstance(dict_size, int): 

97 ds_cls = type(dict_size).__qualname__ 

98 raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.') 

99 

100 samples = tuple(samples) 

101 chunks = b''.join(samples) 

102 chunk_sizes = tuple(_nbytes(sample) for sample in samples) 

103 if not chunks: 

104 raise ValueError("samples contained no data; can't train dictionary.") 

105 dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) 

106 return ZstdDict(dict_content) 

107 

108 

109def finalize_dict(zstd_dict, /, samples, dict_size, level): 

110 """Return a ZstdDict representing a finalized Zstandard dictionary. 

111 

112 Given a custom content as a basis for dictionary, and a set of samples, 

113 finalize *zstd_dict* by adding headers and statistics according to the 

114 Zstandard dictionary format. 

115 

116 You may compose an effective dictionary content by hand, which is used as 

117 basis dictionary, and use some samples to finalize a dictionary. The basis 

118 dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict. 

119 

120 *samples* is an iterable of samples, where a sample is a bytes-like object 

121 representing a file. 

122 *dict_size* is the dictionary's maximum size, in bytes. 

123 *level* is the expected compression level. The statistics for each 

124 compression level differ, so tuning the dictionary to the compression level 

125 can provide improvements. 

126 """ 

127 

128 if not isinstance(zstd_dict, ZstdDict): 

129 raise TypeError('zstd_dict argument should be a ZstdDict object.') 

130 if not isinstance(dict_size, int): 

131 raise TypeError('dict_size argument should be an int object.') 

132 if not isinstance(level, int): 

133 raise TypeError('level argument should be an int object.') 

134 

135 samples = tuple(samples) 

136 chunks = b''.join(samples) 

137 chunk_sizes = tuple(_nbytes(sample) for sample in samples) 

138 if not chunks: 

139 raise ValueError("The samples are empty content, can't finalize the " 

140 "dictionary.") 

141 dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, 

142 chunk_sizes, dict_size, level) 

143 return ZstdDict(dict_content) 

144 

145 

146def compress(data, level=None, options=None, zstd_dict=None): 

147 """Return Zstandard compressed *data* as bytes. 

148 

149 *level* is an int specifying the compression level to use, defaulting to 

150 COMPRESSION_LEVEL_DEFAULT ('3'). 

151 *options* is a dict object that contains advanced compression 

152 parameters. See CompressionParameter for more on options. 

153 *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See 

154 the function train_dict for how to train a ZstdDict on sample data. 

155 

156 For incremental compression, use a ZstdCompressor instead. 

157 """ 

158 comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) 

159 return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) 

160 

161 

162def decompress(data, zstd_dict=None, options=None): 

163 """Decompress one or more frames of Zstandard compressed *data*. 

164 

165 *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See 

166 the function train_dict for how to train a ZstdDict on sample data. 

167 *options* is a dict object that contains advanced compression 

168 parameters. See DecompressionParameter for more on options. 

169 

170 For incremental decompression, use a ZstdDecompressor instead. 

171 """ 

172 results = [] 

173 while True: 

174 decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) 

175 results.append(decomp.decompress(data)) 

176 if not decomp.eof: 

177 raise ZstdError('Compressed data ended before the ' 

178 'end-of-stream marker was reached') 

179 data = decomp.unused_data 

180 if not data: 

181 break 

182 return b''.join(results) 

183 

184 

185class CompressionParameter(enum.IntEnum): 

186 """Compression parameters.""" 

187 

188 compression_level = _zstd.ZSTD_c_compressionLevel 

189 window_log = _zstd.ZSTD_c_windowLog 

190 hash_log = _zstd.ZSTD_c_hashLog 

191 chain_log = _zstd.ZSTD_c_chainLog 

192 search_log = _zstd.ZSTD_c_searchLog 

193 min_match = _zstd.ZSTD_c_minMatch 

194 target_length = _zstd.ZSTD_c_targetLength 

195 strategy = _zstd.ZSTD_c_strategy 

196 

197 enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching 

198 ldm_hash_log = _zstd.ZSTD_c_ldmHashLog 

199 ldm_min_match = _zstd.ZSTD_c_ldmMinMatch 

200 ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog 

201 ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog 

202 

203 content_size_flag = _zstd.ZSTD_c_contentSizeFlag 

204 checksum_flag = _zstd.ZSTD_c_checksumFlag 

205 dict_id_flag = _zstd.ZSTD_c_dictIDFlag 

206 

207 nb_workers = _zstd.ZSTD_c_nbWorkers 

208 job_size = _zstd.ZSTD_c_jobSize 

209 overlap_log = _zstd.ZSTD_c_overlapLog 

210 

211 def bounds(self): 

212 """Return the (lower, upper) int bounds of a compression parameter. 

213 

214 Both the lower and upper bounds are inclusive. 

215 """ 

216 return _zstd.get_param_bounds(self.value, is_compress=True) 

217 

218 

219class DecompressionParameter(enum.IntEnum): 

220 """Decompression parameters.""" 

221 

222 window_log_max = _zstd.ZSTD_d_windowLogMax 

223 

224 def bounds(self): 

225 """Return the (lower, upper) int bounds of a decompression parameter. 

226 

227 Both the lower and upper bounds are inclusive. 

228 """ 

229 return _zstd.get_param_bounds(self.value, is_compress=False) 

230 

231 

232class Strategy(enum.IntEnum): 

233 """Compression strategies, listed from fastest to strongest. 

234 

235 Note that new strategies might be added in the future. 

236 Only the order (from fast to strong) is guaranteed, 

237 the numeric value might change. 

238 """ 

239 

240 fast = _zstd.ZSTD_fast 

241 dfast = _zstd.ZSTD_dfast 

242 greedy = _zstd.ZSTD_greedy 

243 lazy = _zstd.ZSTD_lazy 

244 lazy2 = _zstd.ZSTD_lazy2 

245 btlazy2 = _zstd.ZSTD_btlazy2 

246 btopt = _zstd.ZSTD_btopt 

247 btultra = _zstd.ZSTD_btultra 

248 btultra2 = _zstd.ZSTD_btultra2 

249 

250 

251# Check validity of the CompressionParameter & DecompressionParameter types 

252_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) 

253 

254 

255# Lazy loading 

256def __getattr__(name): 

257 if name == "register_shutil": 

258 from backports.zstd._shutil import register_shutil 

259 return register_shutil 

260 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")