/src/xz/src/liblzma/common/stream_decoder.c
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // |
3 | | /// \file stream_decoder.c |
4 | | /// \brief Decodes .xz Streams |
5 | | // |
6 | | // Author: Lasse Collin |
7 | | // |
8 | | // This file has been put into the public domain. |
9 | | // You can do whatever you want with this file. |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "stream_decoder.h" |
14 | | #include "block_decoder.h" |
15 | | #include "index.h" |
16 | | |
17 | | |
18 | | typedef struct { |
19 | | enum { |
20 | | SEQ_STREAM_HEADER, |
21 | | SEQ_BLOCK_HEADER, |
22 | | SEQ_BLOCK_INIT, |
23 | | SEQ_BLOCK_RUN, |
24 | | SEQ_INDEX, |
25 | | SEQ_STREAM_FOOTER, |
26 | | SEQ_STREAM_PADDING, |
27 | | } sequence; |
28 | | |
29 | | /// Block decoder |
30 | | lzma_next_coder block_decoder; |
31 | | |
32 | | /// Block options decoded by the Block Header decoder and used by |
33 | | /// the Block decoder. |
34 | | lzma_block block_options; |
35 | | |
36 | | /// Stream Flags from Stream Header |
37 | | lzma_stream_flags stream_flags; |
38 | | |
39 | | /// Index is hashed so that it can be compared to the sizes of Blocks |
40 | | /// with O(1) memory usage. |
41 | | lzma_index_hash *index_hash; |
42 | | |
43 | | /// Memory usage limit |
44 | | uint64_t memlimit; |
45 | | |
46 | | /// Amount of memory actually needed (only an estimate) |
47 | | uint64_t memusage; |
48 | | |
49 | | /// If true, LZMA_NO_CHECK is returned if the Stream has |
50 | | /// no integrity check. |
51 | | bool tell_no_check; |
52 | | |
53 | | /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has |
54 | | /// an integrity check that isn't supported by this liblzma build. |
55 | | bool tell_unsupported_check; |
56 | | |
57 | | /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. |
58 | | bool tell_any_check; |
59 | | |
60 | | /// If true, we will tell the Block decoder to skip calculating |
61 | | /// and verifying the integrity check. |
62 | | bool ignore_check; |
63 | | |
64 | | /// If true, we will decode concatenated Streams that possibly have |
65 | | /// Stream Padding between or after them. LZMA_STREAM_END is returned |
66 | | /// once the application isn't giving us any new input (LZMA_FINISH), |
67 | | /// and we aren't in the middle of a Stream, and possible |
68 | | /// Stream Padding is a multiple of four bytes. |
69 | | bool concatenated; |
70 | | |
71 | | /// When decoding concatenated Streams, this is true as long as we |
72 | | /// are decoding the first Stream. This is needed to avoid misleading |
73 | | /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic |
74 | | /// bytes. |
75 | | bool first_stream; |
76 | | |
77 | | /// Write position in buffer[] and position in Stream Padding |
78 | | size_t pos; |
79 | | |
80 | | /// Buffer to hold Stream Header, Block Header, and Stream Footer. |
81 | | /// Block Header has biggest maximum size. |
82 | | uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; |
83 | | } lzma_stream_coder; |
84 | | |
85 | | |
86 | | static lzma_ret |
87 | | stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator) |
88 | 59.1k | { |
89 | | // Initialize the Index hash used to verify the Index. |
90 | 59.1k | coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); |
91 | 59.1k | if (coder->index_hash == NULL) |
92 | 0 | return LZMA_MEM_ERROR; |
93 | | |
94 | | // Reset the rest of the variables. |
95 | 59.1k | coder->sequence = SEQ_STREAM_HEADER; |
96 | 59.1k | coder->pos = 0; |
97 | | |
98 | 59.1k | return LZMA_OK; |
99 | 59.1k | } |
100 | | |
101 | | |
102 | | static lzma_ret |
103 | | stream_decode(void *coder_ptr, const lzma_allocator *allocator, |
104 | | const uint8_t *restrict in, size_t *restrict in_pos, |
105 | | size_t in_size, uint8_t *restrict out, |
106 | | size_t *restrict out_pos, size_t out_size, lzma_action action) |
107 | 49.5k | { |
108 | 49.5k | lzma_stream_coder *coder = coder_ptr; |
109 | | |
110 | | // When decoding the actual Block, it may be able to produce more |
111 | | // output even if we don't give it any new input. |
112 | 49.5k | while (true) |
113 | 419k | switch (coder->sequence) { |
114 | 59.2k | case SEQ_STREAM_HEADER: { |
115 | | // Copy the Stream Header to the internal buffer. |
116 | 59.2k | lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, |
117 | 59.2k | LZMA_STREAM_HEADER_SIZE); |
118 | | |
119 | | // Return if we didn't get the whole Stream Header yet. |
120 | 59.2k | if (coder->pos < LZMA_STREAM_HEADER_SIZE) |
121 | 96 | return LZMA_OK; |
122 | | |
123 | 59.1k | coder->pos = 0; |
124 | | |
125 | | // Decode the Stream Header. |
126 | 59.1k | const lzma_ret ret = lzma_stream_header_decode( |
127 | 59.1k | &coder->stream_flags, coder->buffer); |
128 | 59.1k | if (ret != LZMA_OK) |
129 | 80 | return ret == LZMA_FORMAT_ERROR && !coder->first_stream |
130 | 80 | ? LZMA_DATA_ERROR : ret; |
131 | | |
132 | | // If we are decoding concatenated Streams, and the later |
133 | | // Streams have invalid Header Magic Bytes, we give |
134 | | // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. |
135 | 59.0k | coder->first_stream = false; |
136 | | |
137 | | // Copy the type of the Check so that Block Header and Block |
138 | | // decoders see it. |
139 | 59.0k | coder->block_options.check = coder->stream_flags.check; |
140 | | |
141 | | // Even if we return LZMA_*_CHECK below, we want |
142 | | // to continue from Block Header decoding. |
143 | 59.0k | coder->sequence = SEQ_BLOCK_HEADER; |
144 | | |
145 | | // Detect if there's no integrity check or if it is |
146 | | // unsupported if those were requested by the application. |
147 | 59.0k | if (coder->tell_no_check && coder->stream_flags.check |
148 | 0 | == LZMA_CHECK_NONE) |
149 | 0 | return LZMA_NO_CHECK; |
150 | | |
151 | 59.0k | if (coder->tell_unsupported_check |
152 | 59.0k | && !lzma_check_is_supported( |
153 | 0 | coder->stream_flags.check)) |
154 | 0 | return LZMA_UNSUPPORTED_CHECK; |
155 | | |
156 | 59.0k | if (coder->tell_any_check) |
157 | 0 | return LZMA_GET_CHECK; |
158 | 59.0k | } |
159 | | |
160 | | // Fall through |
161 | | |
162 | 331k | case SEQ_BLOCK_HEADER: { |
163 | 331k | if (*in_pos >= in_size) |
164 | 263 | return LZMA_OK; |
165 | | |
166 | 331k | if (coder->pos == 0) { |
167 | | // Detect if it's Index. |
168 | 331k | if (in[*in_pos] == INDEX_INDICATOR) { |
169 | 49.5k | coder->sequence = SEQ_INDEX; |
170 | 49.5k | break; |
171 | 49.5k | } |
172 | | |
173 | | // Calculate the size of the Block Header. Note that |
174 | | // Block Header decoder wants to see this byte too |
175 | | // so don't advance *in_pos. |
176 | 281k | coder->block_options.header_size |
177 | 281k | = lzma_block_header_size_decode( |
178 | 281k | in[*in_pos]); |
179 | 281k | } |
180 | | |
181 | | // Copy the Block Header to the internal buffer. |
182 | 281k | lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, |
183 | 281k | coder->block_options.header_size); |
184 | | |
185 | | // Return if we didn't get the whole Block Header yet. |
186 | 281k | if (coder->pos < coder->block_options.header_size) |
187 | 37 | return LZMA_OK; |
188 | | |
189 | 281k | coder->pos = 0; |
190 | 281k | coder->sequence = SEQ_BLOCK_INIT; |
191 | 281k | } |
192 | | |
193 | | // Fall through |
194 | | |
195 | 281k | case SEQ_BLOCK_INIT: { |
196 | | // Checking memusage and doing the initialization needs |
197 | | // its own sequence point because we need to be able to |
198 | | // retry if we return LZMA_MEMLIMIT_ERROR. |
199 | | |
200 | | // Version 1 is needed to support the .ignore_check option. |
201 | 281k | coder->block_options.version = 1; |
202 | | |
203 | | // Set up a buffer to hold the filter chain. Block Header |
204 | | // decoder will initialize all members of this array so |
205 | | // we don't need to do it here. |
206 | 281k | lzma_filter filters[LZMA_FILTERS_MAX + 1]; |
207 | 281k | coder->block_options.filters = filters; |
208 | | |
209 | | // Decode the Block Header. |
210 | 281k | return_if_error(lzma_block_header_decode(&coder->block_options, |
211 | 281k | allocator, coder->buffer)); |
212 | | |
213 | | // If LZMA_IGNORE_CHECK was used, this flag needs to be set. |
214 | | // It has to be set after lzma_block_header_decode() because |
215 | | // it always resets this to false. |
216 | 280k | coder->block_options.ignore_check = coder->ignore_check; |
217 | | |
218 | | // Check the memory usage limit. |
219 | 280k | const uint64_t memusage = lzma_raw_decoder_memusage(filters); |
220 | 280k | lzma_ret ret; |
221 | | |
222 | 280k | if (memusage == UINT64_MAX) { |
223 | | // One or more unknown Filter IDs. |
224 | 29 | ret = LZMA_OPTIONS_ERROR; |
225 | 280k | } else { |
226 | | // Now we can set coder->memusage since we know that |
227 | | // the filter chain is valid. We don't want |
228 | | // lzma_memusage() to return UINT64_MAX in case of |
229 | | // invalid filter chain. |
230 | 280k | coder->memusage = memusage; |
231 | | |
232 | 280k | if (memusage > coder->memlimit) { |
233 | | // The chain would need too much memory. |
234 | 15 | ret = LZMA_MEMLIMIT_ERROR; |
235 | 280k | } else { |
236 | | // Memory usage is OK. |
237 | | // Initialize the Block decoder. |
238 | 280k | ret = lzma_block_decoder_init( |
239 | 280k | &coder->block_decoder, |
240 | 280k | allocator, |
241 | 280k | &coder->block_options); |
242 | 280k | } |
243 | 280k | } |
244 | | |
245 | | // Free the allocated filter options since they are needed |
246 | | // only to initialize the Block decoder. |
247 | 280k | lzma_filters_free(filters, allocator); |
248 | 280k | coder->block_options.filters = NULL; |
249 | | |
250 | | // Check if memory usage calculation and Block decoder |
251 | | // initialization succeeded. |
252 | 280k | if (ret != LZMA_OK) |
253 | 50 | return ret; |
254 | | |
255 | 280k | coder->sequence = SEQ_BLOCK_RUN; |
256 | 280k | } |
257 | | |
258 | | // Fall through |
259 | | |
260 | 319k | case SEQ_BLOCK_RUN: { |
261 | 319k | const lzma_ret ret = coder->block_decoder.code( |
262 | 319k | coder->block_decoder.coder, allocator, |
263 | 319k | in, in_pos, in_size, out, out_pos, out_size, |
264 | 319k | action); |
265 | | |
266 | 319k | if (ret != LZMA_STREAM_END) |
267 | 47.3k | return ret; |
268 | | |
269 | | // Block decoded successfully. Add the new size pair to |
270 | | // the Index hash. |
271 | 272k | return_if_error(lzma_index_hash_append(coder->index_hash, |
272 | 272k | lzma_block_unpadded_size( |
273 | 272k | &coder->block_options), |
274 | 272k | coder->block_options.uncompressed_size)); |
275 | | |
276 | 272k | coder->sequence = SEQ_BLOCK_HEADER; |
277 | 272k | break; |
278 | 272k | } |
279 | | |
280 | 49.9k | case SEQ_INDEX: { |
281 | | // If we don't have any input, don't call |
282 | | // lzma_index_hash_decode() since it would return |
283 | | // LZMA_BUF_ERROR, which we must not do here. |
284 | 49.9k | if (*in_pos >= in_size) |
285 | 434 | return LZMA_OK; |
286 | | |
287 | | // Decode the Index and compare it to the hash calculated |
288 | | // from the sizes of the Blocks (if any). |
289 | 49.5k | const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, |
290 | 49.5k | in, in_pos, in_size); |
291 | 49.5k | if (ret != LZMA_STREAM_END) |
292 | 516 | return ret; |
293 | | |
294 | 49.0k | coder->sequence = SEQ_STREAM_FOOTER; |
295 | 49.0k | } |
296 | | |
297 | | // Fall through |
298 | | |
299 | 49.0k | case SEQ_STREAM_FOOTER: { |
300 | | // Copy the Stream Footer to the internal buffer. |
301 | 49.0k | lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, |
302 | 49.0k | LZMA_STREAM_HEADER_SIZE); |
303 | | |
304 | | // Return if we didn't get the whole Stream Footer yet. |
305 | 49.0k | if (coder->pos < LZMA_STREAM_HEADER_SIZE) |
306 | 45 | return LZMA_OK; |
307 | | |
308 | 48.9k | coder->pos = 0; |
309 | | |
310 | | // Decode the Stream Footer. The decoder gives |
311 | | // LZMA_FORMAT_ERROR if the magic bytes don't match, |
312 | | // so convert that return code to LZMA_DATA_ERROR. |
313 | 48.9k | lzma_stream_flags footer_flags; |
314 | 48.9k | const lzma_ret ret = lzma_stream_footer_decode( |
315 | 48.9k | &footer_flags, coder->buffer); |
316 | 48.9k | if (ret != LZMA_OK) |
317 | 54 | return ret == LZMA_FORMAT_ERROR |
318 | 54 | ? LZMA_DATA_ERROR : ret; |
319 | | |
320 | | // Check that Index Size stored in the Stream Footer matches |
321 | | // the real size of the Index field. |
322 | 48.9k | if (lzma_index_hash_size(coder->index_hash) |
323 | 48.9k | != footer_flags.backward_size) |
324 | 34 | return LZMA_DATA_ERROR; |
325 | | |
326 | | // Compare that the Stream Flags fields are identical in |
327 | | // both Stream Header and Stream Footer. |
328 | 48.9k | return_if_error(lzma_stream_flags_compare( |
329 | 48.9k | &coder->stream_flags, &footer_flags)); |
330 | | |
331 | 48.9k | if (!coder->concatenated) |
332 | 0 | return LZMA_STREAM_END; |
333 | | |
334 | 48.9k | coder->sequence = SEQ_STREAM_PADDING; |
335 | 48.9k | } |
336 | | |
337 | | // Fall through |
338 | | |
339 | 48.9k | case SEQ_STREAM_PADDING: |
340 | 48.9k | assert(coder->concatenated); |
341 | | |
342 | | // Skip over possible Stream Padding. |
343 | 42.7M | while (true) { |
344 | 42.7M | if (*in_pos >= in_size) { |
345 | | // Unless LZMA_FINISH was used, we cannot |
346 | | // know if there's more input coming later. |
347 | 14 | if (action != LZMA_FINISH) |
348 | 0 | return LZMA_OK; |
349 | | |
350 | | // Stream Padding must be a multiple of |
351 | | // four bytes. |
352 | 14 | return coder->pos == 0 |
353 | 14 | ? LZMA_STREAM_END |
354 | 14 | : LZMA_DATA_ERROR; |
355 | 14 | } |
356 | | |
357 | | // If the byte is not zero, it probably indicates |
358 | | // beginning of a new Stream (or the file is corrupt). |
359 | 42.7M | if (in[*in_pos] != 0x00) |
360 | 48.8k | break; |
361 | | |
362 | 42.6M | ++*in_pos; |
363 | 42.6M | coder->pos = (coder->pos + 1) & 3; |
364 | 42.6M | } |
365 | | |
366 | | // Stream Padding must be a multiple of four bytes (empty |
367 | | // Stream Padding is OK). |
368 | 48.8k | if (coder->pos != 0) { |
369 | 9 | ++*in_pos; |
370 | 9 | return LZMA_DATA_ERROR; |
371 | 9 | } |
372 | | |
373 | | // Prepare to decode the next Stream. |
374 | 48.8k | return_if_error(stream_decoder_reset(coder, allocator)); |
375 | 48.8k | break; |
376 | | |
377 | 48.8k | default: |
378 | 0 | assert(0); |
379 | 0 | return LZMA_PROG_ERROR; |
380 | 419k | } |
381 | | |
382 | | // Never reached |
383 | 49.5k | } |
384 | | |
385 | | |
386 | | static void |
387 | | stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator) |
388 | 10.3k | { |
389 | 10.3k | lzma_stream_coder *coder = coder_ptr; |
390 | 10.3k | lzma_next_end(&coder->block_decoder, allocator); |
391 | 10.3k | lzma_index_hash_end(coder->index_hash, allocator); |
392 | 10.3k | lzma_free(coder, allocator); |
393 | 10.3k | return; |
394 | 10.3k | } |
395 | | |
396 | | |
397 | | static lzma_check |
398 | | stream_decoder_get_check(const void *coder_ptr) |
399 | 0 | { |
400 | 0 | const lzma_stream_coder *coder = coder_ptr; |
401 | 0 | return coder->stream_flags.check; |
402 | 0 | } |
403 | | |
404 | | |
405 | | static lzma_ret |
406 | | stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage, |
407 | | uint64_t *old_memlimit, uint64_t new_memlimit) |
408 | 0 | { |
409 | 0 | lzma_stream_coder *coder = coder_ptr; |
410 | |
|
411 | 0 | *memusage = coder->memusage; |
412 | 0 | *old_memlimit = coder->memlimit; |
413 | |
|
414 | 0 | if (new_memlimit != 0) { |
415 | 0 | if (new_memlimit < coder->memusage) |
416 | 0 | return LZMA_MEMLIMIT_ERROR; |
417 | | |
418 | 0 | coder->memlimit = new_memlimit; |
419 | 0 | } |
420 | | |
421 | 0 | return LZMA_OK; |
422 | 0 | } |
423 | | |
424 | | |
425 | | extern lzma_ret |
426 | | lzma_stream_decoder_init( |
427 | | lzma_next_coder *next, const lzma_allocator *allocator, |
428 | | uint64_t memlimit, uint32_t flags) |
429 | 10.3k | { |
430 | 10.3k | lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); |
431 | | |
432 | 10.3k | if (flags & ~LZMA_SUPPORTED_FLAGS) |
433 | 0 | return LZMA_OPTIONS_ERROR; |
434 | | |
435 | 10.3k | lzma_stream_coder *coder = next->coder; |
436 | 10.3k | if (coder == NULL) { |
437 | 10.3k | coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); |
438 | 10.3k | if (coder == NULL) |
439 | 0 | return LZMA_MEM_ERROR; |
440 | | |
441 | 10.3k | next->coder = coder; |
442 | 10.3k | next->code = &stream_decode; |
443 | 10.3k | next->end = &stream_decoder_end; |
444 | 10.3k | next->get_check = &stream_decoder_get_check; |
445 | 10.3k | next->memconfig = &stream_decoder_memconfig; |
446 | | |
447 | 10.3k | coder->block_decoder = LZMA_NEXT_CODER_INIT; |
448 | 10.3k | coder->index_hash = NULL; |
449 | 10.3k | } |
450 | | |
451 | 10.3k | coder->memlimit = my_max(1, memlimit); |
452 | 10.3k | coder->memusage = LZMA_MEMUSAGE_BASE; |
453 | 10.3k | coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; |
454 | 10.3k | coder->tell_unsupported_check |
455 | 10.3k | = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; |
456 | 10.3k | coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; |
457 | 10.3k | coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; |
458 | 10.3k | coder->concatenated = (flags & LZMA_CONCATENATED) != 0; |
459 | 10.3k | coder->first_stream = true; |
460 | | |
461 | 10.3k | return stream_decoder_reset(coder, allocator); |
462 | 10.3k | } |
463 | | |
464 | | |
465 | | extern LZMA_API(lzma_ret) |
466 | | lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) |
467 | 10.3k | { |
468 | 10.3k | lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); |
469 | | |
470 | 10.3k | strm->internal->supported_actions[LZMA_RUN] = true; |
471 | 10.3k | strm->internal->supported_actions[LZMA_FINISH] = true; |
472 | | |
473 | 10.3k | return LZMA_OK; |
474 | 10.3k | } |