/src/libzip/lib/zip_algorithm_xz.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | zip_algorithm_xz.c -- LZMA/XZ (de)compression routines |
3 | | Bazed on zip_algorithm_deflate.c -- deflate (de)compression routines |
4 | | Copyright (C) 2017-2023 Dieter Baron and Thomas Klausner |
5 | | |
6 | | This file is part of libzip, a library to manipulate ZIP archives. |
7 | | The authors can be contacted at <info@libzip.org> |
8 | | |
9 | | Redistribution and use in source and binary forms, with or without |
10 | | modification, are permitted provided that the following conditions |
11 | | are met: |
12 | | 1. Redistributions of source code must retain the above copyright |
13 | | notice, this list of conditions and the following disclaimer. |
14 | | 2. Redistributions in binary form must reproduce the above copyright |
15 | | notice, this list of conditions and the following disclaimer in |
16 | | the documentation and/or other materials provided with the |
17 | | distribution. |
18 | | 3. The names of the authors may not be used to endorse or promote |
19 | | products derived from this software without specific prior |
20 | | written permission. |
21 | | |
22 | | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS |
23 | | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
24 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
26 | | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
28 | | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
29 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
30 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
31 | | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN |
32 | | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | */ |
34 | | |
35 | | #include "zipint.h" |
36 | | |
37 | | #include <limits.h> |
38 | | #include <lzma.h> |
39 | | #include <stdlib.h> |
40 | | #include <zlib.h> |
41 | | |
42 | | enum header_state { INCOMPLETE, OUTPUT, DONE }; |
43 | | |
44 | 14.2k | #define HEADER_BYTES_ZIP 9 |
45 | 1.64k | #define HEADER_MAGIC_LENGTH 4 |
46 | | #define HEADER_MAGIC1_OFFSET 0 |
47 | 16.8k | #define HEADER_MAGIC2_OFFSET 2 |
48 | 1.64k | #define HEADER_SIZE_OFFSET 9 |
49 | 3.29k | #define HEADER_SIZE_LENGTH 8 |
50 | 1.64k | #define HEADER_PARAMETERS_LENGTH 5 |
51 | 1.64k | #define HEADER_LZMA_ALONE_LENGTH (HEADER_PARAMETERS_LENGTH + HEADER_SIZE_LENGTH) |
52 | | |
53 | | struct ctx { |
54 | | zip_error_t *error; |
55 | | bool compress; |
56 | | zip_uint32_t compression_flags; |
57 | | bool end_of_input; |
58 | | lzma_stream zstr; |
59 | | zip_uint16_t method; |
60 | | /* header member is used for converting from zip to "lzma alone" |
61 | | * format |
62 | | * |
63 | | * "lzma alone" file format starts with: |
64 | | * 5 bytes lzma parameters |
65 | | * 8 bytes uncompressed size |
66 | | * compressed data |
67 | | * |
68 | | * zip archive on-disk format starts with |
69 | | * 4 bytes magic (first two bytes vary, e.g. 0x0914 or 0x1002, next bytes are 0x0500) |
70 | | * 5 bytes lzma parameters |
71 | | * compressed data |
72 | | * |
73 | | * we read the data into a header of the form |
74 | | * 4 bytes magic |
75 | | * 5 bytes lzma parameters |
76 | | * 8 bytes uncompressed size |
77 | | */ |
78 | | zip_uint8_t header[HEADER_MAGIC_LENGTH + HEADER_LZMA_ALONE_LENGTH]; |
79 | | zip_uint8_t header_bytes_offset; |
80 | | enum header_state header_state; |
81 | | zip_uint64_t uncompresssed_size; |
82 | | }; |
83 | | |
84 | | |
85 | | static zip_uint64_t |
86 | 0 | maximum_compressed_size(zip_uint64_t uncompressed_size) { |
87 | | /* |
88 | | According to https://sourceforge.net/p/sevenzip/discussion/45797/thread/b6bd62f8/ |
89 | | |
90 | | 1) you can use |
91 | | outSize = 1.10 * originalSize + 64 KB. |
92 | | in most cases outSize is less then 1.02 from originalSize. |
93 | | 2) You can try LZMA2, where |
94 | | outSize can be = 1.001 * originalSize + 1 KB. |
95 | | */ |
96 | | /* 13 bytes added for lzma alone header */ |
97 | 0 | zip_uint64_t compressed_size = (zip_uint64_t)((double)uncompressed_size * 1.1) + 64 * 1024 + 13; |
98 | |
|
99 | 0 | if (compressed_size < uncompressed_size) { |
100 | 0 | return ZIP_UINT64_MAX; |
101 | 0 | } |
102 | 0 | return compressed_size; |
103 | 0 | } |
104 | | |
105 | | |
106 | | static void * |
107 | 11.6k | allocate(bool compress, zip_uint32_t compression_flags, zip_error_t *error, zip_uint16_t method) { |
108 | 11.6k | struct ctx *ctx; |
109 | | |
110 | 11.6k | if ((ctx = (struct ctx *)malloc(sizeof(*ctx))) == NULL) { |
111 | 0 | zip_error_set(error, ZIP_ER_MEMORY, 0); |
112 | 0 | return NULL; |
113 | 0 | } |
114 | | |
115 | 11.6k | ctx->error = error; |
116 | 11.6k | ctx->compress = compress; |
117 | 11.6k | if (compression_flags <= 9) { |
118 | 11.6k | ctx->compression_flags = compression_flags; |
119 | 11.6k | } else { |
120 | 0 | ctx->compression_flags = 6; /* default value */ |
121 | 0 | } |
122 | 11.6k | ctx->compression_flags |= LZMA_PRESET_EXTREME; |
123 | 11.6k | ctx->end_of_input = false; |
124 | 11.6k | memset(ctx->header, 0, sizeof(ctx->header)); |
125 | 11.6k | ctx->header_bytes_offset = 0; |
126 | 11.6k | if (method == ZIP_CM_LZMA) { |
127 | 10.6k | ctx->header_state = INCOMPLETE; |
128 | 10.6k | } |
129 | 1.01k | else { |
130 | 1.01k | ctx->header_state = DONE; |
131 | 1.01k | } |
132 | 11.6k | memset(&ctx->zstr, 0, sizeof(ctx->zstr)); |
133 | 11.6k | ctx->method = method; |
134 | 11.6k | return ctx; |
135 | 11.6k | } |
136 | | |
137 | | |
138 | | static void * |
139 | 0 | compress_allocate(zip_uint16_t method, zip_uint32_t compression_flags, zip_error_t *error) { |
140 | 0 | return allocate(true, compression_flags, error, method); |
141 | 0 | } |
142 | | |
143 | | |
144 | | static void * |
145 | 11.6k | decompress_allocate(zip_uint16_t method, zip_uint32_t compression_flags, zip_error_t *error) { |
146 | 11.6k | return allocate(false, compression_flags, error, method); |
147 | 11.6k | } |
148 | | |
149 | | |
150 | | static void |
151 | 11.6k | deallocate(void *ud) { |
152 | 11.6k | struct ctx *ctx = (struct ctx *)ud; |
153 | 11.6k | free(ctx); |
154 | 11.6k | } |
155 | | |
156 | | |
157 | | static zip_uint16_t |
158 | 0 | general_purpose_bit_flags(void *ud) { |
159 | 0 | struct ctx *ctx = (struct ctx *)ud; |
160 | |
|
161 | 0 | if (!ctx->compress) { |
162 | 0 | return 0; |
163 | 0 | } |
164 | | |
165 | 0 | if (ctx->method == ZIP_CM_LZMA) { |
166 | | /* liblzma always returns an EOS/EOPM marker, see |
167 | | * https://sourceforge.net/p/lzmautils/discussion/708858/thread/84c5dbb9/#a5e4/3764 */ |
168 | 0 | return 1 << 1; |
169 | 0 | } |
170 | 0 | return 0; |
171 | 0 | } |
172 | | |
173 | | static int |
174 | 1.61k | map_error(lzma_ret ret) { |
175 | 1.61k | switch (ret) { |
176 | 636 | case LZMA_DATA_ERROR: |
177 | 636 | case LZMA_UNSUPPORTED_CHECK: |
178 | 636 | return ZIP_ER_COMPRESSED_DATA; |
179 | | |
180 | 0 | case LZMA_MEM_ERROR: |
181 | 0 | return ZIP_ER_MEMORY; |
182 | | |
183 | 0 | case LZMA_OPTIONS_ERROR: |
184 | 0 | return ZIP_ER_INVAL; |
185 | | |
186 | 977 | default: |
187 | 977 | return ZIP_ER_INTERNAL; |
188 | 1.61k | } |
189 | 1.61k | } |
190 | | |
191 | | |
192 | | static bool |
193 | 9.02k | start(void *ud, zip_stat_t *st, zip_file_attributes_t *attributes) { |
194 | 9.02k | struct ctx *ctx = (struct ctx *)ud; |
195 | 9.02k | lzma_ret ret; |
196 | | |
197 | 9.02k | lzma_options_lzma opt_lzma; |
198 | 9.02k | lzma_lzma_preset(&opt_lzma, ctx->compression_flags); |
199 | 9.02k | lzma_filter filters[] = { |
200 | 9.02k | {.id = (ctx->method == ZIP_CM_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2), .options = &opt_lzma}, |
201 | 9.02k | {.id = LZMA_VLI_UNKNOWN, .options = NULL}, |
202 | 9.02k | }; |
203 | | |
204 | 9.02k | ctx->zstr.avail_in = 0; |
205 | 9.02k | ctx->zstr.next_in = NULL; |
206 | 9.02k | ctx->zstr.avail_out = 0; |
207 | 9.02k | ctx->zstr.next_out = NULL; |
208 | | |
209 | 9.02k | if (ctx->compress) { |
210 | 0 | if (ctx->method == ZIP_CM_LZMA) |
211 | 0 | ret = lzma_alone_encoder(&ctx->zstr, filters[0].options); |
212 | 0 | else |
213 | 0 | ret = lzma_stream_encoder(&ctx->zstr, filters, LZMA_CHECK_CRC64); |
214 | 0 | } |
215 | 9.02k | else { |
216 | 9.02k | if (ctx->method == ZIP_CM_LZMA) |
217 | 8.16k | ret = lzma_alone_decoder(&ctx->zstr, UINT64_MAX); |
218 | 862 | else |
219 | 862 | ret = lzma_stream_decoder(&ctx->zstr, UINT64_MAX, LZMA_CONCATENATED); |
220 | 9.02k | } |
221 | | |
222 | 9.02k | if (ret != LZMA_OK) { |
223 | 0 | zip_error_set(ctx->error, map_error(ret), 0); |
224 | 0 | return false; |
225 | 0 | } |
226 | | |
227 | | /* If general purpose bits 1 & 2 are both zero, write real uncompressed size in header. */ |
228 | 9.02k | if ((attributes->valid & ZIP_FILE_ATTRIBUTES_GENERAL_PURPOSE_BIT_FLAGS) && (attributes->general_purpose_bit_mask & 0x6) == 0x6 && (attributes->general_purpose_bit_flags & 0x06) == 0 && (st->valid & ZIP_STAT_SIZE)) { |
229 | 5.05k | ctx->uncompresssed_size = st->size; |
230 | 5.05k | } |
231 | 3.97k | else { |
232 | 3.97k | ctx->uncompresssed_size = ZIP_UINT64_MAX; |
233 | 3.97k | } |
234 | | |
235 | 9.02k | return true; |
236 | 9.02k | } |
237 | | |
238 | | |
239 | | static bool |
240 | 9.02k | end(void *ud) { |
241 | 9.02k | struct ctx *ctx = (struct ctx *)ud; |
242 | | |
243 | 9.02k | lzma_end(&ctx->zstr); |
244 | 9.02k | return true; |
245 | 9.02k | } |
246 | | |
247 | | |
248 | | static bool |
249 | 15.4k | input(void *ud, zip_uint8_t *data, zip_uint64_t length) { |
250 | 15.4k | struct ctx *ctx = (struct ctx *)ud; |
251 | | |
252 | 15.4k | if (length > UINT_MAX || ctx->zstr.avail_in > 0) { |
253 | 0 | zip_error_set(ctx->error, ZIP_ER_INVAL, 0); |
254 | 0 | return false; |
255 | 0 | } |
256 | | |
257 | | /* For decompression of LZMA1: Have we read the full "lzma alone" header yet? */ |
258 | 15.4k | if (ctx->method == ZIP_CM_LZMA && !ctx->compress && ctx->header_state == INCOMPLETE) { |
259 | | /* if not, get more of the data */ |
260 | 14.2k | zip_uint8_t got = (zip_uint8_t)ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, length); |
261 | 14.2k | (void)memcpy_s(ctx->header + ctx->header_bytes_offset, sizeof(ctx->header) - ctx->header_bytes_offset, data, got); |
262 | 14.2k | ctx->header_bytes_offset += got; |
263 | 14.2k | length -= got; |
264 | 14.2k | data += got; |
265 | | /* Do we have a complete header now? */ |
266 | 14.2k | if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) { |
267 | 13.6k | Bytef empty_buffer[1]; |
268 | 13.6k | zip_buffer_t *buffer; |
269 | | /* check magic */ |
270 | 13.6k | if (ctx->header[HEADER_MAGIC2_OFFSET] != 0x05 || ctx->header[HEADER_MAGIC2_OFFSET + 1] != 0x00) { |
271 | | /* magic does not match */ |
272 | 12.0k | zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0); |
273 | 12.0k | return false; |
274 | 12.0k | } |
275 | | /* set size of uncompressed data in "lzma alone" header to "unknown" */ |
276 | 1.64k | if ((buffer = _zip_buffer_new(ctx->header + HEADER_SIZE_OFFSET, HEADER_SIZE_LENGTH)) == NULL) { |
277 | 0 | zip_error_set(ctx->error, ZIP_ER_MEMORY, 0); |
278 | 0 | return false; |
279 | 0 | } |
280 | 1.64k | _zip_buffer_put_64(buffer, ctx->uncompresssed_size); |
281 | 1.64k | _zip_buffer_free(buffer); |
282 | | /* Feed header into "lzma alone" decoder, for |
283 | | * initialization; this should not produce output. */ |
284 | 1.64k | ctx->zstr.next_in = (void *)(ctx->header + HEADER_MAGIC_LENGTH); |
285 | 1.64k | ctx->zstr.avail_in = HEADER_LZMA_ALONE_LENGTH; |
286 | 1.64k | ctx->zstr.total_in = 0; |
287 | 1.64k | ctx->zstr.next_out = empty_buffer; |
288 | 1.64k | ctx->zstr.avail_out = sizeof(*empty_buffer); |
289 | 1.64k | ctx->zstr.total_out = 0; |
290 | | /* this just initializes the decoder and does not produce output, so it consumes the complete header */ |
291 | 1.64k | if (lzma_code(&ctx->zstr, LZMA_RUN) != LZMA_OK || ctx->zstr.total_out > 0) { |
292 | 408 | zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0); |
293 | 408 | return false; |
294 | 408 | } |
295 | 1.24k | ctx->header_state = DONE; |
296 | 1.24k | } |
297 | 14.2k | } |
298 | 3.00k | ctx->zstr.avail_in = (uInt)length; |
299 | 3.00k | ctx->zstr.next_in = (Bytef *)data; |
300 | | |
301 | 3.00k | return true; |
302 | 15.4k | } |
303 | | |
304 | | |
305 | 2.31k | static bool end_of_input(void *ud) { |
306 | 2.31k | struct ctx *ctx = (struct ctx *)ud; |
307 | | |
308 | 2.31k | ctx->end_of_input = true; |
309 | 2.31k | return ctx->zstr.avail_in != 0; |
310 | 2.31k | } |
311 | | |
312 | | |
313 | | static zip_compression_status_t |
314 | 42.6k | process(void *ud, zip_uint8_t *data, zip_uint64_t *length) { |
315 | 42.6k | struct ctx *ctx = (struct ctx *)ud; |
316 | 42.6k | uInt avail_out; |
317 | 42.6k | lzma_ret ret; |
318 | | /* for compression of LZMA1 */ |
319 | 42.6k | if (ctx->method == ZIP_CM_LZMA && ctx->compress) { |
320 | 0 | if (ctx->header_state == INCOMPLETE) { |
321 | | /* write magic to output buffer */ |
322 | 0 | ctx->header[0] = 0x09; |
323 | 0 | ctx->header[1] = 0x14; |
324 | 0 | ctx->header[2] = 0x05; |
325 | 0 | ctx->header[3] = 0x00; |
326 | | /* generate lzma parameters into output buffer */ |
327 | 0 | ctx->zstr.avail_out = HEADER_LZMA_ALONE_LENGTH; |
328 | 0 | ctx->zstr.next_out = ctx->header + HEADER_MAGIC_LENGTH; |
329 | 0 | ret = lzma_code(&ctx->zstr, LZMA_RUN); |
330 | 0 | if (ret != LZMA_OK || ctx->zstr.avail_out != 0) { |
331 | | /* assume that the whole header will be provided with the first call to lzma_code */ |
332 | 0 | return ZIP_COMPRESSION_ERROR; |
333 | 0 | } |
334 | 0 | ctx->header_state = OUTPUT; |
335 | 0 | } |
336 | 0 | if (ctx->header_state == OUTPUT) { |
337 | | /* write header */ |
338 | 0 | zip_uint8_t write_len = (zip_uint8_t)ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, *length); |
339 | 0 | (void)memcpy_s(data, *length, ctx->header + ctx->header_bytes_offset, write_len); |
340 | 0 | ctx->header_bytes_offset += write_len; |
341 | 0 | *length = write_len; |
342 | 0 | if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) { |
343 | 0 | ctx->header_state = DONE; |
344 | 0 | } |
345 | 0 | return ZIP_COMPRESSION_OK; |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | 42.6k | avail_out = (uInt)ZIP_MIN(UINT_MAX, *length); |
350 | 42.6k | ctx->zstr.avail_out = avail_out; |
351 | 42.6k | ctx->zstr.next_out = (Bytef *)data; |
352 | | |
353 | 42.6k | ret = lzma_code(&ctx->zstr, ctx->end_of_input ? LZMA_FINISH : LZMA_RUN); |
354 | 42.6k | *length = avail_out - ctx->zstr.avail_out; |
355 | | |
356 | 42.6k | switch (ret) { |
357 | 15.8k | case LZMA_OK: |
358 | 15.8k | return ZIP_COMPRESSION_OK; |
359 | | |
360 | 401 | case LZMA_STREAM_END: |
361 | 401 | return ZIP_COMPRESSION_END; |
362 | | |
363 | 24.8k | case LZMA_BUF_ERROR: |
364 | 24.8k | if (ctx->zstr.avail_in == 0) { |
365 | 24.8k | return ZIP_COMPRESSION_NEED_DATA; |
366 | 24.8k | } |
367 | | |
368 | | /* fallthrough */ |
369 | 1.61k | default: |
370 | 1.61k | zip_error_set(ctx->error, map_error(ret), 0); |
371 | 1.61k | return ZIP_COMPRESSION_ERROR; |
372 | 42.6k | } |
373 | 42.6k | } |
374 | | |
375 | | /* Version Required should be set to 63 (6.3) because this compression |
376 | | method was only defined in appnote.txt version 6.3.8, but Winzip |
377 | | does not unpack it if the value is not 20. */ |
378 | | |
379 | | /* clang-format off */ |
380 | | |
381 | | zip_compression_algorithm_t zip_algorithm_xz_compress = { |
382 | | maximum_compressed_size, |
383 | | compress_allocate, |
384 | | deallocate, |
385 | | general_purpose_bit_flags, |
386 | | 20, |
387 | | start, |
388 | | end, |
389 | | input, |
390 | | end_of_input, |
391 | | process |
392 | | }; |
393 | | |
394 | | |
395 | | zip_compression_algorithm_t zip_algorithm_xz_decompress = { |
396 | | maximum_compressed_size, |
397 | | decompress_allocate, |
398 | | deallocate, |
399 | | general_purpose_bit_flags, |
400 | | 20, |
401 | | start, |
402 | | end, |
403 | | input, |
404 | | end_of_input, |
405 | | process |
406 | | }; |
407 | | |
408 | | /* clang-format on */ |