/src/libzip/lib/zip_algorithm_xz.c
Line | Count | Source |
1 | | /* |
2 | | zip_algorithm_xz.c -- LZMA/XZ (de)compression routines |
3 | | Bazed on zip_algorithm_deflate.c -- deflate (de)compression routines |
4 | | Copyright (C) 2017-2025 Dieter Baron and Thomas Klausner |
5 | | |
6 | | This file is part of libzip, a library to manipulate ZIP archives. |
7 | | The authors can be contacted at <info@libzip.org> |
8 | | |
9 | | Redistribution and use in source and binary forms, with or without |
10 | | modification, are permitted provided that the following conditions |
11 | | are met: |
12 | | 1. Redistributions of source code must retain the above copyright |
13 | | notice, this list of conditions and the following disclaimer. |
14 | | 2. Redistributions in binary form must reproduce the above copyright |
15 | | notice, this list of conditions and the following disclaimer in |
16 | | the documentation and/or other materials provided with the |
17 | | distribution. |
18 | | 3. The names of the authors may not be used to endorse or promote |
19 | | products derived from this software without specific prior |
20 | | written permission. |
21 | | |
22 | | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS |
23 | | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
24 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
26 | | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
28 | | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
29 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
30 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
31 | | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN |
32 | | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | */ |
34 | | |
35 | | #include "zipint.h" |
36 | | |
37 | | #include <limits.h> |
38 | | #include <lzma.h> |
39 | | #include <stdlib.h> |
40 | | #include <zlib.h> |
41 | | |
42 | | enum header_state { |
43 | | INCOMPLETE, |
44 | | OUTPUT, |
45 | | DONE |
46 | | }; |
47 | | |
48 | 0 | #define HEADER_BYTES_ZIP 9 |
49 | 0 | #define HEADER_MAGIC_LENGTH 4 |
50 | | #define HEADER_MAGIC1_OFFSET 0 |
51 | 0 | #define HEADER_MAGIC2_OFFSET 2 |
52 | 0 | #define HEADER_SIZE_OFFSET 9 |
53 | 0 | #define HEADER_SIZE_LENGTH 8 |
54 | 0 | #define HEADER_PARAMETERS_LENGTH 5 |
55 | 0 | #define HEADER_LZMA_ALONE_LENGTH (HEADER_PARAMETERS_LENGTH + HEADER_SIZE_LENGTH) |
56 | | |
57 | | struct ctx { |
58 | | zip_error_t *error; |
59 | | bool compress; |
60 | | zip_uint32_t compression_flags; |
61 | | bool end_of_input; |
62 | | lzma_stream zstr; |
63 | | zip_uint16_t method; |
64 | | /* header member is used for converting from zip to "lzma alone" |
65 | | * format |
66 | | * |
67 | | * "lzma alone" file format starts with: |
68 | | * 5 bytes lzma parameters |
69 | | * 8 bytes uncompressed size |
70 | | * compressed data |
71 | | * |
72 | | * zip archive on-disk format starts with |
73 | | * 4 bytes magic (first two bytes vary, e.g. 0x0914 or 0x1002, next bytes are 0x0500) |
74 | | * 5 bytes lzma parameters |
75 | | * compressed data |
76 | | * |
77 | | * we read the data into a header of the form |
78 | | * 4 bytes magic |
79 | | * 5 bytes lzma parameters |
80 | | * 8 bytes uncompressed size |
81 | | */ |
82 | | zip_uint8_t header[HEADER_MAGIC_LENGTH + HEADER_LZMA_ALONE_LENGTH]; |
83 | | zip_uint8_t header_bytes_offset; |
84 | | enum header_state header_state; |
85 | | zip_uint64_t uncompresssed_size; |
86 | | }; |
87 | | |
88 | | |
89 | 0 | static zip_uint64_t maximum_compressed_size(zip_uint64_t uncompressed_size) { |
90 | | /* |
91 | | According to https://sourceforge.net/p/sevenzip/discussion/45797/thread/b6bd62f8/ |
92 | | |
93 | | 1) you can use |
94 | | outSize = 1.10 * originalSize + 64 KB. |
95 | | in most cases outSize is less then 1.02 from originalSize. |
96 | | 2) You can try LZMA2, where |
97 | | outSize can be = 1.001 * originalSize + 1 KB. |
98 | | */ |
99 | | /* 13 bytes added for lzma alone header */ |
100 | 0 | zip_uint64_t compressed_size = (zip_uint64_t)((double)uncompressed_size * 1.1) + 64 * 1024 + 13; |
101 | |
|
102 | 0 | if (compressed_size < uncompressed_size) { |
103 | 0 | return ZIP_UINT64_MAX; |
104 | 0 | } |
105 | 0 | return compressed_size; |
106 | 0 | } |
107 | | |
108 | | |
109 | 0 | static void *allocate(bool compress, zip_uint32_t compression_flags, zip_error_t *error, zip_uint16_t method) { |
110 | 0 | struct ctx *ctx; |
111 | |
|
112 | 0 | if ((ctx = (struct ctx *)malloc(sizeof(*ctx))) == NULL) { |
113 | 0 | zip_error_set(error, ZIP_ER_MEMORY, 0); |
114 | 0 | return NULL; |
115 | 0 | } |
116 | | |
117 | 0 | ctx->error = error; |
118 | 0 | ctx->compress = compress; |
119 | 0 | if (compression_flags <= 9) { |
120 | 0 | ctx->compression_flags = compression_flags; |
121 | 0 | } |
122 | 0 | else { |
123 | 0 | ctx->compression_flags = 6; /* default value */ |
124 | 0 | } |
125 | 0 | ctx->compression_flags |= LZMA_PRESET_EXTREME; |
126 | 0 | ctx->end_of_input = false; |
127 | 0 | memset(ctx->header, 0, sizeof(ctx->header)); |
128 | 0 | ctx->header_bytes_offset = 0; |
129 | 0 | if (method == ZIP_CM_LZMA) { |
130 | 0 | ctx->header_state = INCOMPLETE; |
131 | 0 | } |
132 | 0 | else { |
133 | 0 | ctx->header_state = DONE; |
134 | 0 | } |
135 | 0 | memset(&ctx->zstr, 0, sizeof(ctx->zstr)); |
136 | 0 | ctx->method = method; |
137 | 0 | return ctx; |
138 | 0 | } |
139 | | |
140 | | |
141 | 0 | static void *compress_allocate(zip_uint16_t method, zip_uint32_t compression_flags, zip_error_t *error) { |
142 | 0 | return allocate(true, compression_flags, error, method); |
143 | 0 | } |
144 | | |
145 | | |
146 | 0 | static void *decompress_allocate(zip_uint16_t method, zip_uint32_t compression_flags, zip_error_t *error) { |
147 | 0 | return allocate(false, compression_flags, error, method); |
148 | 0 | } |
149 | | |
150 | | |
151 | 0 | static void deallocate(void *ud) { |
152 | 0 | struct ctx *ctx = (struct ctx *)ud; |
153 | 0 | free(ctx); |
154 | 0 | } |
155 | | |
156 | | |
157 | 0 | static zip_uint16_t general_purpose_bit_flags(void *ud) { |
158 | 0 | struct ctx *ctx = (struct ctx *)ud; |
159 | |
|
160 | 0 | if (!ctx->compress) { |
161 | 0 | return 0; |
162 | 0 | } |
163 | | |
164 | 0 | if (ctx->method == ZIP_CM_LZMA) { |
165 | | /* liblzma always returns an EOS/EOPM marker, see |
166 | | * https://sourceforge.net/p/lzmautils/discussion/708858/thread/84c5dbb9/#a5e4/3764 */ |
167 | 0 | return 1 << 1; |
168 | 0 | } |
169 | 0 | return 0; |
170 | 0 | } |
171 | | |
172 | 0 | static int map_error(lzma_ret ret) { |
173 | 0 | switch (ret) { |
174 | 0 | case LZMA_DATA_ERROR: |
175 | 0 | case LZMA_UNSUPPORTED_CHECK: |
176 | 0 | return ZIP_ER_COMPRESSED_DATA; |
177 | | |
178 | 0 | case LZMA_MEM_ERROR: |
179 | 0 | return ZIP_ER_MEMORY; |
180 | | |
181 | 0 | case LZMA_OPTIONS_ERROR: |
182 | 0 | return ZIP_ER_INVAL; |
183 | | |
184 | 0 | default: |
185 | 0 | return ZIP_ER_INTERNAL; |
186 | 0 | } |
187 | 0 | } |
188 | | |
189 | | |
190 | 0 | static bool start(void *ud, zip_stat_t *st, zip_file_attributes_t *attributes) { |
191 | 0 | struct ctx *ctx = (struct ctx *)ud; |
192 | 0 | lzma_ret ret; |
193 | |
|
194 | 0 | lzma_options_lzma opt_lzma; |
195 | 0 | lzma_lzma_preset(&opt_lzma, ctx->compression_flags); |
196 | 0 | lzma_filter filters[] = { |
197 | 0 | {.id = (ctx->method == ZIP_CM_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2), .options = &opt_lzma}, |
198 | 0 | {.id = LZMA_VLI_UNKNOWN, .options = NULL}, |
199 | 0 | }; |
200 | |
|
201 | 0 | ctx->zstr.avail_in = 0; |
202 | 0 | ctx->zstr.next_in = NULL; |
203 | 0 | ctx->zstr.avail_out = 0; |
204 | 0 | ctx->zstr.next_out = NULL; |
205 | |
|
206 | 0 | if (ctx->compress) { |
207 | 0 | if (ctx->method == ZIP_CM_LZMA) { |
208 | 0 | ret = lzma_alone_encoder(&ctx->zstr, filters[0].options); |
209 | 0 | } |
210 | 0 | else { |
211 | 0 | ret = lzma_stream_encoder(&ctx->zstr, filters, LZMA_CHECK_CRC64); |
212 | 0 | } |
213 | 0 | } |
214 | 0 | else { |
215 | 0 | if (ctx->method == ZIP_CM_LZMA) { |
216 | 0 | ret = lzma_alone_decoder(&ctx->zstr, UINT64_MAX); |
217 | 0 | } |
218 | 0 | else { |
219 | 0 | ret = lzma_stream_decoder(&ctx->zstr, UINT64_MAX, LZMA_CONCATENATED); |
220 | 0 | } |
221 | 0 | } |
222 | |
|
223 | 0 | if (ret != LZMA_OK) { |
224 | 0 | zip_error_set(ctx->error, map_error(ret), 0); |
225 | 0 | return false; |
226 | 0 | } |
227 | | |
228 | | /* If general purpose bits 1 & 2 are both zero, write real uncompressed size in header. */ |
229 | 0 | if ((attributes->valid & ZIP_FILE_ATTRIBUTES_GENERAL_PURPOSE_BIT_FLAGS) && (attributes->general_purpose_bit_mask & 0x6) == 0x6 && (attributes->general_purpose_bit_flags & 0x06) == 0 && (st->valid & ZIP_STAT_SIZE)) { |
230 | 0 | ctx->uncompresssed_size = st->size; |
231 | 0 | } |
232 | 0 | else { |
233 | 0 | ctx->uncompresssed_size = ZIP_UINT64_MAX; |
234 | 0 | } |
235 | |
|
236 | 0 | return true; |
237 | 0 | } |
238 | | |
239 | | |
240 | 0 | static bool end(void *ud) { |
241 | 0 | struct ctx *ctx = (struct ctx *)ud; |
242 | |
|
243 | 0 | lzma_end(&ctx->zstr); |
244 | 0 | return true; |
245 | 0 | } |
246 | | |
247 | | |
248 | 0 | static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length) { |
249 | 0 | struct ctx *ctx = (struct ctx *)ud; |
250 | |
|
251 | 0 | if (length > UINT_MAX || ctx->zstr.avail_in > 0) { |
252 | 0 | zip_error_set(ctx->error, ZIP_ER_INVAL, 0); |
253 | 0 | return false; |
254 | 0 | } |
255 | | |
256 | | /* For decompression of LZMA1: Have we read the full "lzma alone" header yet? */ |
257 | 0 | if (ctx->method == ZIP_CM_LZMA && !ctx->compress && ctx->header_state == INCOMPLETE) { |
258 | | /* if not, get more of the data */ |
259 | 0 | zip_uint8_t got = (zip_uint8_t)ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, length); |
260 | 0 | (void)memcpy_s(ctx->header + ctx->header_bytes_offset, sizeof(ctx->header) - ctx->header_bytes_offset, data, got); |
261 | 0 | ctx->header_bytes_offset += got; |
262 | 0 | length -= got; |
263 | 0 | data += got; |
264 | | /* Do we have a complete header now? */ |
265 | 0 | if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) { |
266 | 0 | Bytef empty_buffer[1]; |
267 | 0 | zip_buffer_t *buffer; |
268 | | /* check magic */ |
269 | 0 | if (ctx->header[HEADER_MAGIC2_OFFSET] != 0x05 || ctx->header[HEADER_MAGIC2_OFFSET + 1] != 0x00) { |
270 | | /* magic does not match */ |
271 | 0 | zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0); |
272 | 0 | return false; |
273 | 0 | } |
274 | | /* set size of uncompressed data in "lzma alone" header to "unknown" */ |
275 | 0 | if ((buffer = _zip_buffer_new(ctx->header + HEADER_SIZE_OFFSET, HEADER_SIZE_LENGTH)) == NULL) { |
276 | 0 | zip_error_set(ctx->error, ZIP_ER_MEMORY, 0); |
277 | 0 | return false; |
278 | 0 | } |
279 | 0 | _zip_buffer_put_64(buffer, ctx->uncompresssed_size); |
280 | 0 | _zip_buffer_free(buffer); |
281 | | /* Feed header into "lzma alone" decoder, for |
282 | | * initialization; this should not produce output. */ |
283 | 0 | ctx->zstr.next_in = (void *)(ctx->header + HEADER_MAGIC_LENGTH); |
284 | 0 | ctx->zstr.avail_in = HEADER_LZMA_ALONE_LENGTH; |
285 | 0 | ctx->zstr.total_in = 0; |
286 | 0 | ctx->zstr.next_out = empty_buffer; |
287 | 0 | ctx->zstr.avail_out = sizeof(*empty_buffer); |
288 | 0 | ctx->zstr.total_out = 0; |
289 | | /* this just initializes the decoder and does not produce output, so it consumes the complete header */ |
290 | 0 | if (lzma_code(&ctx->zstr, LZMA_RUN) != LZMA_OK || ctx->zstr.total_out > 0) { |
291 | 0 | zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0); |
292 | 0 | return false; |
293 | 0 | } |
294 | 0 | ctx->header_state = DONE; |
295 | 0 | } |
296 | 0 | } |
297 | 0 | ctx->zstr.avail_in = (uInt)length; |
298 | 0 | ctx->zstr.next_in = (Bytef *)data; |
299 | |
|
300 | 0 | return true; |
301 | 0 | } |
302 | | |
303 | | |
304 | 0 | static bool end_of_input(void *ud) { |
305 | 0 | struct ctx *ctx = (struct ctx *)ud; |
306 | |
|
307 | 0 | ctx->end_of_input = true; |
308 | 0 | return ctx->zstr.avail_in != 0; |
309 | 0 | } |
310 | | |
311 | | |
312 | 0 | static zip_compression_status_t process(void *ud, zip_uint8_t *data, zip_uint64_t *length) { |
313 | 0 | struct ctx *ctx = (struct ctx *)ud; |
314 | 0 | uInt avail_out; |
315 | 0 | lzma_ret ret; |
316 | | /* for compression of LZMA1 */ |
317 | 0 | if (ctx->method == ZIP_CM_LZMA && ctx->compress) { |
318 | 0 | if (ctx->header_state == INCOMPLETE) { |
319 | | /* write magic to output buffer */ |
320 | 0 | ctx->header[0] = 0x09; |
321 | 0 | ctx->header[1] = 0x14; |
322 | 0 | ctx->header[2] = 0x05; |
323 | 0 | ctx->header[3] = 0x00; |
324 | | /* generate lzma parameters into output buffer */ |
325 | 0 | ctx->zstr.avail_out = HEADER_LZMA_ALONE_LENGTH; |
326 | 0 | ctx->zstr.next_out = ctx->header + HEADER_MAGIC_LENGTH; |
327 | 0 | ret = lzma_code(&ctx->zstr, LZMA_RUN); |
328 | 0 | if (ret != LZMA_OK || ctx->zstr.avail_out != 0) { |
329 | | /* assume that the whole header will be provided with the first call to lzma_code */ |
330 | 0 | return ZIP_COMPRESSION_ERROR; |
331 | 0 | } |
332 | 0 | ctx->header_state = OUTPUT; |
333 | 0 | } |
334 | 0 | if (ctx->header_state == OUTPUT) { |
335 | | /* write header */ |
336 | 0 | zip_uint8_t write_len = (zip_uint8_t)ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, *length); |
337 | 0 | (void)memcpy_s(data, *length, ctx->header + ctx->header_bytes_offset, write_len); |
338 | 0 | ctx->header_bytes_offset += write_len; |
339 | 0 | *length = write_len; |
340 | 0 | if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) { |
341 | 0 | ctx->header_state = DONE; |
342 | 0 | } |
343 | 0 | return ZIP_COMPRESSION_OK; |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | 0 | avail_out = (uInt)ZIP_MIN(UINT_MAX, *length); |
348 | 0 | ctx->zstr.avail_out = avail_out; |
349 | 0 | ctx->zstr.next_out = (Bytef *)data; |
350 | |
|
351 | 0 | ret = lzma_code(&ctx->zstr, ctx->end_of_input ? LZMA_FINISH : LZMA_RUN); |
352 | 0 | *length = avail_out - ctx->zstr.avail_out; |
353 | |
|
354 | 0 | switch (ret) { |
355 | 0 | case LZMA_OK: |
356 | 0 | return ZIP_COMPRESSION_OK; |
357 | | |
358 | 0 | case LZMA_STREAM_END: |
359 | 0 | return ZIP_COMPRESSION_END; |
360 | | |
361 | 0 | case LZMA_BUF_ERROR: |
362 | 0 | if (ctx->zstr.avail_in == 0) { |
363 | 0 | return ZIP_COMPRESSION_NEED_DATA; |
364 | 0 | } |
365 | | |
366 | | /* fallthrough */ |
367 | 0 | default: |
368 | 0 | zip_error_set(ctx->error, map_error(ret), 0); |
369 | 0 | return ZIP_COMPRESSION_ERROR; |
370 | 0 | } |
371 | 0 | } |
372 | | |
373 | | /* Version Required should be set to 63 (6.3) because this compression |
374 | | method was only defined in appnote.txt version 6.3.8, but Winzip |
375 | | does not unpack it if the value is not 20. */ |
376 | | |
377 | | /* clang-format off */ |
378 | | |
379 | | zip_compression_algorithm_t zip_algorithm_xz_compress = { |
380 | | maximum_compressed_size, |
381 | | compress_allocate, |
382 | | deallocate, |
383 | | general_purpose_bit_flags, |
384 | | 20, |
385 | | start, |
386 | | end, |
387 | | input, |
388 | | end_of_input, |
389 | | process |
390 | | }; |
391 | | |
392 | | |
393 | | zip_compression_algorithm_t zip_algorithm_xz_decompress = { |
394 | | maximum_compressed_size, |
395 | | decompress_allocate, |
396 | | deallocate, |
397 | | general_purpose_bit_flags, |
398 | | 20, |
399 | | start, |
400 | | end, |
401 | | input, |
402 | | end_of_input, |
403 | | process |
404 | | }; |
405 | | |
406 | | /* clang-format on */ |