/src/xz/src/liblzma/lzma/lzma_decoder.c
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // |
3 | | /// \file lzma_decoder.c |
4 | | /// \brief LZMA decoder |
5 | | /// |
6 | | // Authors: Igor Pavlov |
7 | | // Lasse Collin |
8 | | // |
9 | | // This file has been put into the public domain. |
10 | | // You can do whatever you want with this file. |
11 | | // |
12 | | /////////////////////////////////////////////////////////////////////////////// |
13 | | |
14 | | #include "lz_decoder.h" |
15 | | #include "lzma_common.h" |
16 | | #include "lzma_decoder.h" |
17 | | #include "range_decoder.h" |
18 | | |
19 | | // The macros unroll loops with switch statements. |
20 | | // Silence warnings about missing fall-through comments. |
21 | | #if TUKLIB_GNUC_REQ(7, 0) |
22 | | # pragma GCC diagnostic ignored "-Wimplicit-fallthrough" |
23 | | #endif |
24 | | |
25 | | |
26 | | #ifdef HAVE_SMALL |
27 | | |
28 | | // Macros for (somewhat) size-optimized code. |
29 | | #define seq_4(seq) seq |
30 | | |
31 | | #define seq_6(seq) seq |
32 | | |
33 | | #define seq_8(seq) seq |
34 | | |
35 | | #define seq_len(seq) \ |
36 | | seq ## _CHOICE, \ |
37 | | seq ## _CHOICE2, \ |
38 | | seq ## _BITTREE |
39 | | |
40 | | #define len_decode(target, ld, pos_state, seq) \ |
41 | | do { \ |
42 | | case seq ## _CHOICE: \ |
43 | | rc_if_0(ld.choice, seq ## _CHOICE) { \ |
44 | | rc_update_0(ld.choice); \ |
45 | | probs = ld.low[pos_state];\ |
46 | | limit = LEN_LOW_SYMBOLS; \ |
47 | | target = MATCH_LEN_MIN; \ |
48 | | } else { \ |
49 | | rc_update_1(ld.choice); \ |
50 | | case seq ## _CHOICE2: \ |
51 | | rc_if_0(ld.choice2, seq ## _CHOICE2) { \ |
52 | | rc_update_0(ld.choice2); \ |
53 | | probs = ld.mid[pos_state]; \ |
54 | | limit = LEN_MID_SYMBOLS; \ |
55 | | target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ |
56 | | } else { \ |
57 | | rc_update_1(ld.choice2); \ |
58 | | probs = ld.high; \ |
59 | | limit = LEN_HIGH_SYMBOLS; \ |
60 | | target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ |
61 | | + LEN_MID_SYMBOLS; \ |
62 | | } \ |
63 | | } \ |
64 | | symbol = 1; \ |
65 | | case seq ## _BITTREE: \ |
66 | | do { \ |
67 | | rc_bit(probs[symbol], , , seq ## _BITTREE); \ |
68 | | } while (symbol < limit); \ |
69 | | target += symbol - limit; \ |
70 | | } while (0) |
71 | | |
72 | | #else // HAVE_SMALL |
73 | | |
74 | | // Unrolled versions |
75 | | #define seq_4(seq) \ |
76 | | seq ## 0, \ |
77 | | seq ## 1, \ |
78 | | seq ## 2, \ |
79 | | seq ## 3 |
80 | | |
81 | | #define seq_6(seq) \ |
82 | | seq ## 0, \ |
83 | | seq ## 1, \ |
84 | | seq ## 2, \ |
85 | | seq ## 3, \ |
86 | | seq ## 4, \ |
87 | | seq ## 5 |
88 | | |
89 | | #define seq_8(seq) \ |
90 | | seq ## 0, \ |
91 | | seq ## 1, \ |
92 | | seq ## 2, \ |
93 | | seq ## 3, \ |
94 | | seq ## 4, \ |
95 | | seq ## 5, \ |
96 | | seq ## 6, \ |
97 | | seq ## 7 |
98 | | |
99 | | #define seq_len(seq) \ |
100 | | seq ## _CHOICE, \ |
101 | | seq ## _LOW0, \ |
102 | | seq ## _LOW1, \ |
103 | | seq ## _LOW2, \ |
104 | | seq ## _CHOICE2, \ |
105 | | seq ## _MID0, \ |
106 | | seq ## _MID1, \ |
107 | | seq ## _MID2, \ |
108 | | seq ## _HIGH0, \ |
109 | | seq ## _HIGH1, \ |
110 | | seq ## _HIGH2, \ |
111 | | seq ## _HIGH3, \ |
112 | | seq ## _HIGH4, \ |
113 | | seq ## _HIGH5, \ |
114 | | seq ## _HIGH6, \ |
115 | | seq ## _HIGH7 |
116 | | |
117 | 701k | #define len_decode(target, ld, pos_state, seq) \ |
118 | 701k | do { \ |
119 | 702k | symbol = 1; \ |
120 | 702k | case seq ## _CHOICE: \ |
121 | 702k | rc_if_0(ld.choice, seq ## _CHOICE) { \ |
122 | 153k | rc_update_0(ld.choice); \ |
123 | 154k | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ |
124 | 154k | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ |
125 | 154k | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ |
126 | 153k | target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ |
127 | 547k | } else { \ |
128 | 547k | rc_update_1(ld.choice); \ |
129 | 547k | case seq ## _CHOICE2: \ |
130 | 547k | rc_if_0(ld.choice2, seq ## _CHOICE2) { \ |
131 | 142k | rc_update_0(ld.choice2); \ |
132 | 142k | rc_bit_case(ld.mid[pos_state][symbol], , , \ |
133 | 142k | seq ## _MID0); \ |
134 | 142k | rc_bit_case(ld.mid[pos_state][symbol], , , \ |
135 | 142k | seq ## _MID1); \ |
136 | 142k | rc_bit_case(ld.mid[pos_state][symbol], , , \ |
137 | 142k | seq ## _MID2); \ |
138 | 141k | target = symbol - LEN_MID_SYMBOLS \ |
139 | 141k | + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ |
140 | 405k | } else { \ |
141 | 405k | rc_update_1(ld.choice2); \ |
142 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ |
143 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ |
144 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ |
145 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ |
146 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ |
147 | 405k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ |
148 | 404k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ |
149 | 404k | rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ |
150 | 404k | target = symbol - LEN_HIGH_SYMBOLS \ |
151 | 404k | + MATCH_LEN_MIN \ |
152 | 404k | + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ |
153 | 404k | } \ |
154 | 547k | } \ |
155 | 701k | } while (0) |
156 | | |
157 | | #endif // HAVE_SMALL |
158 | | |
159 | | |
160 | | /// Length decoder probabilities; see comments in lzma_common.h. |
161 | | typedef struct { |
162 | | probability choice; |
163 | | probability choice2; |
164 | | probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; |
165 | | probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; |
166 | | probability high[LEN_HIGH_SYMBOLS]; |
167 | | } lzma_length_decoder; |
168 | | |
169 | | |
170 | | typedef struct { |
171 | | /////////////////// |
172 | | // Probabilities // |
173 | | /////////////////// |
174 | | |
175 | | /// Literals; see comments in lzma_common.h. |
176 | | probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; |
177 | | |
178 | | /// If 1, it's a match. Otherwise it's a single 8-bit literal. |
179 | | probability is_match[STATES][POS_STATES_MAX]; |
180 | | |
181 | | /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. |
182 | | probability is_rep[STATES]; |
183 | | |
184 | | /// If 0, distance of a repeated match is rep0. |
185 | | /// Otherwise check is_rep1. |
186 | | probability is_rep0[STATES]; |
187 | | |
188 | | /// If 0, distance of a repeated match is rep1. |
189 | | /// Otherwise check is_rep2. |
190 | | probability is_rep1[STATES]; |
191 | | |
192 | | /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. |
193 | | probability is_rep2[STATES]; |
194 | | |
195 | | /// If 1, the repeated match has length of one byte. Otherwise |
196 | | /// the length is decoded from rep_len_decoder. |
197 | | probability is_rep0_long[STATES][POS_STATES_MAX]; |
198 | | |
199 | | /// Probability tree for the highest two bits of the match distance. |
200 | | /// There is a separate probability tree for match lengths of |
201 | | /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. |
202 | | probability dist_slot[DIST_STATES][DIST_SLOTS]; |
203 | | |
204 | | /// Probability trees for additional bits for match distance when the |
205 | | /// distance is in the range [4, 127]. |
206 | | probability pos_special[FULL_DISTANCES - DIST_MODEL_END]; |
207 | | |
208 | | /// Probability tree for the lowest four bits of a match distance |
209 | | /// that is equal to or greater than 128. |
210 | | probability pos_align[ALIGN_SIZE]; |
211 | | |
212 | | /// Length of a normal match |
213 | | lzma_length_decoder match_len_decoder; |
214 | | |
215 | | /// Length of a repeated match |
216 | | lzma_length_decoder rep_len_decoder; |
217 | | |
218 | | /////////////////// |
219 | | // Decoder state // |
220 | | /////////////////// |
221 | | |
222 | | // Range coder |
223 | | lzma_range_decoder rc; |
224 | | |
225 | | // Types of the most recently seen LZMA symbols |
226 | | lzma_lzma_state state; |
227 | | |
228 | | uint32_t rep0; ///< Distance of the latest match |
229 | | uint32_t rep1; ///< Distance of second latest match |
230 | | uint32_t rep2; ///< Distance of third latest match |
231 | | uint32_t rep3; ///< Distance of fourth latest match |
232 | | |
233 | | uint32_t pos_mask; // (1U << pb) - 1 |
234 | | uint32_t literal_context_bits; |
235 | | uint32_t literal_pos_mask; |
236 | | |
237 | | /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of |
238 | | /// payload marker is expected. |
239 | | lzma_vli uncompressed_size; |
240 | | |
241 | | /// True if end of payload marker (EOPM) is allowed even when |
242 | | /// uncompressed_size is known; false if EOPM must not be present. |
243 | | /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN. |
244 | | bool allow_eopm; |
245 | | |
246 | | //////////////////////////////// |
247 | | // State of incomplete symbol // |
248 | | //////////////////////////////// |
249 | | |
250 | | /// Position where to continue the decoder loop |
251 | | enum { |
252 | | SEQ_NORMALIZE, |
253 | | SEQ_IS_MATCH, |
254 | | seq_8(SEQ_LITERAL), |
255 | | seq_8(SEQ_LITERAL_MATCHED), |
256 | | SEQ_LITERAL_WRITE, |
257 | | SEQ_IS_REP, |
258 | | seq_len(SEQ_MATCH_LEN), |
259 | | seq_6(SEQ_DIST_SLOT), |
260 | | SEQ_DIST_MODEL, |
261 | | SEQ_DIRECT, |
262 | | seq_4(SEQ_ALIGN), |
263 | | SEQ_EOPM, |
264 | | SEQ_IS_REP0, |
265 | | SEQ_SHORTREP, |
266 | | SEQ_IS_REP0_LONG, |
267 | | SEQ_IS_REP1, |
268 | | SEQ_IS_REP2, |
269 | | seq_len(SEQ_REP_LEN), |
270 | | SEQ_COPY, |
271 | | } sequence; |
272 | | |
273 | | /// Base of the current probability tree |
274 | | probability *probs; |
275 | | |
276 | | /// Symbol being decoded. This is also used as an index variable in |
277 | | /// bittree decoders: probs[symbol] |
278 | | uint32_t symbol; |
279 | | |
280 | | /// Used as a loop termination condition on bittree decoders and |
281 | | /// direct bits decoder. |
282 | | uint32_t limit; |
283 | | |
284 | | /// Matched literal decoder: 0x100 or 0 to help avoiding branches. |
285 | | /// Bittree reverse decoders: Offset of the next bit: 1 << offset |
286 | | uint32_t offset; |
287 | | |
288 | | /// If decoding a literal: match byte. |
289 | | /// If decoding a match: length of the match. |
290 | | uint32_t len; |
291 | | } lzma_lzma1_decoder; |
292 | | |
293 | | |
294 | | static lzma_ret |
295 | | lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, |
296 | | const uint8_t *restrict in, |
297 | | size_t *restrict in_pos, size_t in_size) |
298 | 96.6k | { |
299 | 96.6k | lzma_lzma1_decoder *restrict coder = coder_ptr; |
300 | | |
301 | | //////////////////// |
302 | | // Initialization // |
303 | | //////////////////// |
304 | | |
305 | 96.6k | { |
306 | 96.6k | const lzma_ret ret = rc_read_init( |
307 | 96.6k | &coder->rc, in, in_pos, in_size); |
308 | 96.6k | if (ret != LZMA_STREAM_END) |
309 | 381 | return ret; |
310 | 96.6k | } |
311 | | |
312 | | /////////////// |
313 | | // Variables // |
314 | | /////////////// |
315 | | |
316 | | // Making local copies of often-used variables improves both |
317 | | // speed and readability. |
318 | | |
319 | 96.3k | lzma_dict dict = *dictptr; |
320 | | |
321 | 96.3k | const size_t dict_start = dict.pos; |
322 | | |
323 | | // Range decoder |
324 | 96.3k | rc_to_local(coder->rc, *in_pos); |
325 | | |
326 | | // State |
327 | 96.3k | uint32_t state = coder->state; |
328 | 96.3k | uint32_t rep0 = coder->rep0; |
329 | 96.3k | uint32_t rep1 = coder->rep1; |
330 | 96.3k | uint32_t rep2 = coder->rep2; |
331 | 96.3k | uint32_t rep3 = coder->rep3; |
332 | | |
333 | 96.3k | const uint32_t pos_mask = coder->pos_mask; |
334 | | |
335 | | // These variables are actually needed only if we last time ran |
336 | | // out of input in the middle of the decoder loop. |
337 | 96.3k | probability *probs = coder->probs; |
338 | 96.3k | uint32_t symbol = coder->symbol; |
339 | 96.3k | uint32_t limit = coder->limit; |
340 | 96.3k | uint32_t offset = coder->offset; |
341 | 96.3k | uint32_t len = coder->len; |
342 | | |
343 | 96.3k | const uint32_t literal_pos_mask = coder->literal_pos_mask; |
344 | 96.3k | const uint32_t literal_context_bits = coder->literal_context_bits; |
345 | | |
346 | | // Temporary variables |
347 | 96.3k | uint32_t pos_state = dict.pos & pos_mask; |
348 | | |
349 | 96.3k | lzma_ret ret = LZMA_OK; |
350 | | |
351 | | // This is true when the next LZMA symbol is allowed to be EOPM. |
352 | | // That is, if this is false, then EOPM is considered |
353 | | // an invalid symbol and we will return LZMA_DATA_ERROR. |
354 | | // |
355 | | // EOPM is always required (not just allowed) when |
356 | | // the uncompressed size isn't known. When uncompressed size |
357 | | // is known, eopm_is_valid may be set to true later. |
358 | 96.3k | bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN; |
359 | | |
360 | | // If uncompressed size is known and there is enough output space |
361 | | // to decode all the data, limit the available buffer space so that |
362 | | // the main loop won't try to decode past the end of the stream. |
363 | 96.3k | bool might_finish_without_eopm = false; |
364 | 96.3k | if (coder->uncompressed_size != LZMA_VLI_UNKNOWN |
365 | 96.3k | && coder->uncompressed_size <= dict.limit - dict.pos) { |
366 | 10.9k | dict.limit = dict.pos + (size_t)(coder->uncompressed_size); |
367 | 10.9k | might_finish_without_eopm = true; |
368 | 10.9k | } |
369 | | |
370 | | // The main decoder loop. The "switch" is used to restart the decoder at |
371 | | // correct location. Once restarted, the "switch" is no longer used. |
372 | 96.3k | switch (coder->sequence) |
373 | 1.27M | while (true) { |
374 | | // Calculate new pos_state. This is skipped on the first loop |
375 | | // since we already calculated it when setting up the local |
376 | | // variables. |
377 | 1.27M | pos_state = dict.pos & pos_mask; |
378 | | |
379 | 1.27M | case SEQ_NORMALIZE: |
380 | 1.29M | case SEQ_IS_MATCH: |
381 | 1.29M | if (unlikely(might_finish_without_eopm |
382 | 1.29M | && dict.pos == dict.limit)) { |
383 | | // In rare cases there is a useless byte that needs |
384 | | // to be read anyway. |
385 | 10.2k | rc_normalize(SEQ_NORMALIZE); |
386 | | |
387 | | // If the range decoder state is such that we can |
388 | | // be at the end of the LZMA stream, then the |
389 | | // decoding is finished. |
390 | 9.64k | if (rc_is_finished(rc)) { |
391 | 9.55k | ret = LZMA_STREAM_END; |
392 | 9.55k | goto out; |
393 | 9.55k | } |
394 | | |
395 | | // If the caller hasn't allowed EOPM to be present |
396 | | // together with known uncompressed size, then the |
397 | | // LZMA stream is corrupt. |
398 | 87 | if (!coder->allow_eopm) { |
399 | 87 | ret = LZMA_DATA_ERROR; |
400 | 87 | goto out; |
401 | 87 | } |
402 | | |
403 | | // Otherwise continue decoding with the expectation |
404 | | // that the next LZMA symbol is EOPM. |
405 | 0 | eopm_is_valid = true; |
406 | 0 | } |
407 | | |
408 | 2.56M | rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { |
409 | 557k | rc_update_0(coder->is_match[state][pos_state]); |
410 | | |
411 | | // It's a literal i.e. a single 8-bit byte. |
412 | | |
413 | 557k | probs = literal_subcoder(coder->literal, |
414 | 557k | literal_context_bits, literal_pos_mask, |
415 | 557k | dict.pos, dict_get(&dict, 0)); |
416 | 557k | symbol = 1; |
417 | | |
418 | 557k | if (is_literal_state(state)) { |
419 | | // Decode literal without match byte. |
420 | | #ifdef HAVE_SMALL |
421 | | case SEQ_LITERAL: |
422 | | do { |
423 | | rc_bit(probs[symbol], , , SEQ_LITERAL); |
424 | | } while (symbol < (1 << 8)); |
425 | | #else |
426 | 478k | rc_bit_case(probs[symbol], , , SEQ_LITERAL0); |
427 | 478k | rc_bit_case(probs[symbol], , , SEQ_LITERAL1); |
428 | 478k | rc_bit_case(probs[symbol], , , SEQ_LITERAL2); |
429 | 477k | rc_bit_case(probs[symbol], , , SEQ_LITERAL3); |
430 | 477k | rc_bit_case(probs[symbol], , , SEQ_LITERAL4); |
431 | 477k | rc_bit_case(probs[symbol], , , SEQ_LITERAL5); |
432 | 477k | rc_bit_case(probs[symbol], , , SEQ_LITERAL6); |
433 | 477k | rc_bit_case(probs[symbol], , , SEQ_LITERAL7); |
434 | 477k | #endif |
435 | 477k | } else { |
436 | | // Decode literal with match byte. |
437 | | // |
438 | | // We store the byte we compare against |
439 | | // ("match byte") to "len" to minimize the |
440 | | // number of variables we need to store |
441 | | // between decoder calls. |
442 | 79.4k | len = (uint32_t)(dict_get(&dict, rep0)) << 1; |
443 | | |
444 | | // The usage of "offset" allows omitting some |
445 | | // branches, which should give tiny speed |
446 | | // improvement on some CPUs. "offset" gets |
447 | | // set to zero if match_bit didn't match. |
448 | 79.4k | offset = 0x100; |
449 | | |
450 | | #ifdef HAVE_SMALL |
451 | | case SEQ_LITERAL_MATCHED: |
452 | | do { |
453 | | const uint32_t match_bit |
454 | | = len & offset; |
455 | | const uint32_t subcoder_index |
456 | | = offset + match_bit |
457 | | + symbol; |
458 | | |
459 | | rc_bit(probs[subcoder_index], |
460 | | offset &= ~match_bit, |
461 | | offset &= match_bit, |
462 | | SEQ_LITERAL_MATCHED); |
463 | | |
464 | | // It seems to be faster to do this |
465 | | // here instead of putting it to the |
466 | | // beginning of the loop and then |
467 | | // putting the "case" in the middle |
468 | | // of the loop. |
469 | | len <<= 1; |
470 | | |
471 | | } while (symbol < (1 << 8)); |
472 | | #else |
473 | | // Unroll the loop. |
474 | 79.4k | uint32_t match_bit; |
475 | 79.4k | uint32_t subcoder_index; |
476 | | |
477 | 79.4k | # define d(seq) \ |
478 | 635k | case seq: \ |
479 | 635k | match_bit = len & offset; \ |
480 | 635k | subcoder_index = offset + match_bit + symbol; \ |
481 | 635k | rc_bit(probs[subcoder_index], \ |
482 | 635k | offset &= ~match_bit, \ |
483 | 635k | offset &= match_bit, \ |
484 | 635k | seq) |
485 | | |
486 | 80.2k | d(SEQ_LITERAL_MATCHED0); |
487 | 79.2k | len <<= 1; |
488 | 79.9k | d(SEQ_LITERAL_MATCHED1); |
489 | 79.1k | len <<= 1; |
490 | 79.6k | d(SEQ_LITERAL_MATCHED2); |
491 | 79.0k | len <<= 1; |
492 | 79.6k | d(SEQ_LITERAL_MATCHED3); |
493 | 78.8k | len <<= 1; |
494 | 79.3k | d(SEQ_LITERAL_MATCHED4); |
495 | 78.7k | len <<= 1; |
496 | 79.1k | d(SEQ_LITERAL_MATCHED5); |
497 | 78.6k | len <<= 1; |
498 | 78.9k | d(SEQ_LITERAL_MATCHED6); |
499 | 78.5k | len <<= 1; |
500 | 78.9k | d(SEQ_LITERAL_MATCHED7); |
501 | 78.4k | # undef d |
502 | 78.4k | #endif |
503 | 78.4k | } |
504 | | |
505 | | //update_literal(state); |
506 | | // Use a lookup table to update to literal state, |
507 | | // since compared to other state updates, this would |
508 | | // need two branches. |
509 | 555k | static const lzma_lzma_state next_state[] = { |
510 | 555k | STATE_LIT_LIT, |
511 | 555k | STATE_LIT_LIT, |
512 | 555k | STATE_LIT_LIT, |
513 | 555k | STATE_LIT_LIT, |
514 | 555k | STATE_MATCH_LIT_LIT, |
515 | 555k | STATE_REP_LIT_LIT, |
516 | 555k | STATE_SHORTREP_LIT_LIT, |
517 | 555k | STATE_MATCH_LIT, |
518 | 555k | STATE_REP_LIT, |
519 | 555k | STATE_SHORTREP_LIT, |
520 | 555k | STATE_MATCH_LIT, |
521 | 555k | STATE_REP_LIT |
522 | 555k | }; |
523 | 555k | state = next_state[state]; |
524 | | |
525 | 556k | case SEQ_LITERAL_WRITE: |
526 | 556k | if (unlikely(dict_put(&dict, symbol))) { |
527 | 425 | coder->sequence = SEQ_LITERAL_WRITE; |
528 | 425 | goto out; |
529 | 425 | } |
530 | | |
531 | 555k | continue; |
532 | 556k | } |
533 | | |
534 | | // Instead of a new byte we are going to get a byte range |
535 | | // (distance and length) which will be repeated from our |
536 | | // output history. |
537 | | |
538 | 725k | rc_update_1(coder->is_match[state][pos_state]); |
539 | | |
540 | 725k | case SEQ_IS_REP: |
541 | 725k | rc_if_0(coder->is_rep[state], SEQ_IS_REP) { |
542 | | // Not a repeated match |
543 | 221k | rc_update_0(coder->is_rep[state]); |
544 | 221k | update_match(state); |
545 | | |
546 | | // The latest three match distances are kept in |
547 | | // memory in case there are repeated matches. |
548 | 221k | rep3 = rep2; |
549 | 221k | rep2 = rep1; |
550 | 221k | rep1 = rep0; |
551 | | |
552 | | // Decode the length of the match. |
553 | 221k | len_decode(len, coder->match_len_decoder, |
554 | 221k | pos_state, SEQ_MATCH_LEN); |
555 | | |
556 | | // Prepare to decode the highest two bits of the |
557 | | // match distance. |
558 | 220k | probs = coder->dist_slot[get_dist_state(len)]; |
559 | 220k | symbol = 1; |
560 | | |
561 | | #ifdef HAVE_SMALL |
562 | | case SEQ_DIST_SLOT: |
563 | | do { |
564 | | rc_bit(probs[symbol], , , SEQ_DIST_SLOT); |
565 | | } while (symbol < DIST_SLOTS); |
566 | | #else |
567 | 221k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0); |
568 | 221k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1); |
569 | 221k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2); |
570 | 221k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3); |
571 | 220k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4); |
572 | 220k | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5); |
573 | 219k | #endif |
574 | | // Get rid of the highest bit that was needed for |
575 | | // indexing of the probability array. |
576 | 219k | symbol -= DIST_SLOTS; |
577 | 219k | assert(symbol <= 63); |
578 | | |
579 | 219k | if (symbol < DIST_MODEL_START) { |
580 | | // Match distances [0, 3] have only two bits. |
581 | 26.5k | rep0 = symbol; |
582 | 193k | } else { |
583 | | // Decode the lowest [1, 29] bits of |
584 | | // the match distance. |
585 | 193k | limit = (symbol >> 1) - 1; |
586 | 193k | assert(limit >= 1 && limit <= 30); |
587 | 193k | rep0 = 2 + (symbol & 1); |
588 | | |
589 | 193k | if (symbol < DIST_MODEL_END) { |
590 | | // Prepare to decode the low bits for |
591 | | // a distance of [4, 127]. |
592 | 112k | assert(limit <= 5); |
593 | 112k | rep0 <<= limit; |
594 | 112k | assert(rep0 <= 96); |
595 | | // -1 is fine, because we start |
596 | | // decoding at probs[1], not probs[0]. |
597 | | // NOTE: This violates the C standard, |
598 | | // since we are doing pointer |
599 | | // arithmetic past the beginning of |
600 | | // the array. |
601 | 112k | assert((int32_t)(rep0 - symbol - 1) |
602 | 112k | >= -1); |
603 | 112k | assert((int32_t)(rep0 - symbol - 1) |
604 | 112k | <= 82); |
605 | 112k | probs = coder->pos_special + rep0 |
606 | 112k | - symbol - 1; |
607 | 112k | symbol = 1; |
608 | 112k | offset = 0; |
609 | 113k | case SEQ_DIST_MODEL: |
610 | | #ifdef HAVE_SMALL |
611 | | do { |
612 | | rc_bit(probs[symbol], , |
613 | | rep0 += 1U << offset, |
614 | | SEQ_DIST_MODEL); |
615 | | } while (++offset < limit); |
616 | | #else |
617 | 113k | switch (limit) { |
618 | 33.4k | case 5: |
619 | 33.4k | assert(offset == 0); |
620 | 33.4k | rc_bit(probs[symbol], , |
621 | 33.3k | rep0 += 1U, |
622 | 33.3k | SEQ_DIST_MODEL); |
623 | 33.3k | ++offset; |
624 | 33.3k | --limit; |
625 | 54.9k | case 4: |
626 | 54.9k | rc_bit(probs[symbol], , |
627 | 54.7k | rep0 += 1U << offset, |
628 | 54.7k | SEQ_DIST_MODEL); |
629 | 54.7k | ++offset; |
630 | 54.7k | --limit; |
631 | 80.1k | case 3: |
632 | 80.1k | rc_bit(probs[symbol], , |
633 | 79.7k | rep0 += 1U << offset, |
634 | 79.7k | SEQ_DIST_MODEL); |
635 | 79.7k | ++offset; |
636 | 79.7k | --limit; |
637 | 95.9k | case 2: |
638 | 95.9k | rc_bit(probs[symbol], , |
639 | 95.4k | rep0 += 1U << offset, |
640 | 95.4k | SEQ_DIST_MODEL); |
641 | 95.4k | ++offset; |
642 | 95.4k | --limit; |
643 | 112k | case 1: |
644 | | // We need "symbol" only for |
645 | | // indexing the probability |
646 | | // array, thus we can use |
647 | | // rc_bit_last() here to omit |
648 | | // the unneeded updating of |
649 | | // "symbol". |
650 | 112k | rc_bit_last(probs[symbol], , |
651 | 113k | rep0 += 1U << offset, |
652 | 113k | SEQ_DIST_MODEL); |
653 | 113k | } |
654 | 113k | #endif |
655 | 113k | } else { |
656 | | // The distance is >= 128. Decode the |
657 | | // lower bits without probabilities |
658 | | // except the lowest four bits. |
659 | 80.9k | assert(symbol >= 14); |
660 | 80.9k | assert(limit >= 6); |
661 | 80.9k | limit -= ALIGN_BITS; |
662 | 80.9k | assert(limit >= 2); |
663 | 81.6k | case SEQ_DIRECT: |
664 | | // Not worth manual unrolling |
665 | 448k | do { |
666 | 448k | rc_direct(rep0, SEQ_DIRECT); |
667 | 448k | } while (--limit > 0); |
668 | | |
669 | | // Decode the lowest four bits using |
670 | | // probabilities. |
671 | 80.7k | rep0 <<= ALIGN_BITS; |
672 | 80.7k | symbol = 1; |
673 | | #ifdef HAVE_SMALL |
674 | | offset = 0; |
675 | | case SEQ_ALIGN: |
676 | | do { |
677 | | rc_bit(coder->pos_align[ |
678 | | symbol], , |
679 | | rep0 += 1U << offset, |
680 | | SEQ_ALIGN); |
681 | | } while (++offset < ALIGN_BITS); |
682 | | #else |
683 | 80.9k | case SEQ_ALIGN0: |
684 | 80.9k | rc_bit(coder->pos_align[symbol], , |
685 | 80.7k | rep0 += 1, SEQ_ALIGN0); |
686 | 80.8k | case SEQ_ALIGN1: |
687 | 80.8k | rc_bit(coder->pos_align[symbol], , |
688 | 80.7k | rep0 += 2, SEQ_ALIGN1); |
689 | 80.8k | case SEQ_ALIGN2: |
690 | 80.8k | rc_bit(coder->pos_align[symbol], , |
691 | 80.6k | rep0 += 4, SEQ_ALIGN2); |
692 | 80.8k | case SEQ_ALIGN3: |
693 | | // Like in SEQ_DIST_MODEL, we don't |
694 | | // need "symbol" for anything else |
695 | | // than indexing the probability array. |
696 | 80.8k | rc_bit_last(coder->pos_align[symbol], , |
697 | 80.8k | rep0 += 8, SEQ_ALIGN3); |
698 | 80.6k | #endif |
699 | | |
700 | 80.6k | if (rep0 == UINT32_MAX) { |
701 | | // End of payload marker was |
702 | | // found. It may only be |
703 | | // present if |
704 | | // - uncompressed size is |
705 | | // unknown or |
706 | | // - after known uncompressed |
707 | | // size amount of bytes has |
708 | | // been decompressed and |
709 | | // caller has indicated |
710 | | // that EOPM might be used |
711 | | // (it's not allowed in |
712 | | // LZMA2). |
713 | 1 | if (!eopm_is_valid) { |
714 | 1 | ret = LZMA_DATA_ERROR; |
715 | 1 | goto out; |
716 | 1 | } |
717 | | |
718 | 0 | case SEQ_EOPM: |
719 | | // LZMA1 stream with |
720 | | // end-of-payload marker. |
721 | 0 | rc_normalize(SEQ_EOPM); |
722 | 0 | ret = rc_is_finished(rc) |
723 | 0 | ? LZMA_STREAM_END |
724 | 0 | : LZMA_DATA_ERROR; |
725 | 0 | goto out; |
726 | 0 | } |
727 | 80.6k | } |
728 | 193k | } |
729 | | |
730 | | // Validate the distance we just decoded. |
731 | 219k | if (unlikely(!dict_is_distance_valid(&dict, rep0))) { |
732 | 580 | ret = LZMA_DATA_ERROR; |
733 | 580 | goto out; |
734 | 580 | } |
735 | | |
736 | 503k | } else { |
737 | 503k | rc_update_1(coder->is_rep[state]); |
738 | | |
739 | | // Repeated match |
740 | | // |
741 | | // The match distance is a value that we have had |
742 | | // earlier. The latest four match distances are |
743 | | // available as rep0, rep1, rep2 and rep3. We will |
744 | | // now decode which of them is the new distance. |
745 | | // |
746 | | // There cannot be a match if we haven't produced |
747 | | // any output, so check that first. |
748 | 503k | if (unlikely(!dict_is_distance_valid(&dict, 0))) { |
749 | 25 | ret = LZMA_DATA_ERROR; |
750 | 25 | goto out; |
751 | 25 | } |
752 | | |
753 | 503k | case SEQ_IS_REP0: |
754 | 503k | rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { |
755 | 67.0k | rc_update_0(coder->is_rep0[state]); |
756 | | // The distance is rep0. |
757 | | |
758 | 67.3k | case SEQ_IS_REP0_LONG: |
759 | 67.3k | rc_if_0(coder->is_rep0_long[state][pos_state], |
760 | 66.9k | SEQ_IS_REP0_LONG) { |
761 | 23.3k | rc_update_0(coder->is_rep0_long[ |
762 | 23.3k | state][pos_state]); |
763 | | |
764 | 23.3k | update_short_rep(state); |
765 | | |
766 | 23.4k | case SEQ_SHORTREP: |
767 | 23.4k | if (unlikely(dict_put(&dict, dict_get( |
768 | 23.4k | &dict, rep0)))) { |
769 | 108 | coder->sequence = SEQ_SHORTREP; |
770 | 108 | goto out; |
771 | 108 | } |
772 | | |
773 | 23.3k | continue; |
774 | 23.4k | } |
775 | | |
776 | | // Repeating more than one byte at |
777 | | // distance of rep0. |
778 | 43.5k | rc_update_1(coder->is_rep0_long[ |
779 | 43.5k | state][pos_state]); |
780 | | |
781 | 435k | } else { |
782 | 435k | rc_update_1(coder->is_rep0[state]); |
783 | | |
784 | 436k | case SEQ_IS_REP1: |
785 | | // The distance is rep1, rep2 or rep3. Once |
786 | | // we find out which one of these three, it |
787 | | // is stored to rep0 and rep1, rep2 and rep3 |
788 | | // are updated accordingly. |
789 | 436k | rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { |
790 | 63.6k | rc_update_0(coder->is_rep1[state]); |
791 | | |
792 | 63.6k | const uint32_t distance = rep1; |
793 | 63.6k | rep1 = rep0; |
794 | 63.6k | rep0 = distance; |
795 | | |
796 | 372k | } else { |
797 | 372k | rc_update_1(coder->is_rep1[state]); |
798 | 372k | case SEQ_IS_REP2: |
799 | 372k | rc_if_0(coder->is_rep2[state], |
800 | 372k | SEQ_IS_REP2) { |
801 | 27.9k | rc_update_0(coder->is_rep2[ |
802 | 27.9k | state]); |
803 | | |
804 | 27.9k | const uint32_t distance = rep2; |
805 | 27.9k | rep2 = rep1; |
806 | 27.9k | rep1 = rep0; |
807 | 27.9k | rep0 = distance; |
808 | | |
809 | 344k | } else { |
810 | 344k | rc_update_1(coder->is_rep2[ |
811 | 344k | state]); |
812 | | |
813 | 344k | const uint32_t distance = rep3; |
814 | 344k | rep3 = rep2; |
815 | 344k | rep2 = rep1; |
816 | 344k | rep1 = rep0; |
817 | 344k | rep0 = distance; |
818 | 344k | } |
819 | 372k | } |
820 | 435k | } |
821 | | |
822 | 479k | update_long_rep(state); |
823 | | |
824 | | // Decode the length of the repeated match. |
825 | 479k | len_decode(len, coder->rep_len_decoder, |
826 | 479k | pos_state, SEQ_REP_LEN); |
827 | 479k | } |
828 | | |
829 | | ///////////////////////////////// |
830 | | // Repeat from history buffer. // |
831 | | ///////////////////////////////// |
832 | | |
833 | | // The length is always between these limits. There is no way |
834 | | // to trigger the algorithm to set len outside this range. |
835 | 696k | assert(len >= MATCH_LEN_MIN); |
836 | 696k | assert(len <= MATCH_LEN_MAX); |
837 | | |
838 | 748k | case SEQ_COPY: |
839 | | // Repeat len bytes from distance of rep0. |
840 | 748k | if (unlikely(dict_repeat(&dict, rep0, &len))) { |
841 | 51.9k | coder->sequence = SEQ_COPY; |
842 | 51.9k | goto out; |
843 | 51.9k | } |
844 | 748k | } |
845 | | |
846 | 96.3k | out: |
847 | | // Save state |
848 | | |
849 | | // NOTE: Must not copy dict.limit. |
850 | 96.3k | dictptr->pos = dict.pos; |
851 | 96.3k | dictptr->full = dict.full; |
852 | | |
853 | 96.3k | rc_from_local(coder->rc, *in_pos); |
854 | | |
855 | 96.3k | coder->state = state; |
856 | 96.3k | coder->rep0 = rep0; |
857 | 96.3k | coder->rep1 = rep1; |
858 | 96.3k | coder->rep2 = rep2; |
859 | 96.3k | coder->rep3 = rep3; |
860 | | |
861 | 96.3k | coder->probs = probs; |
862 | 96.3k | coder->symbol = symbol; |
863 | 96.3k | coder->limit = limit; |
864 | 96.3k | coder->offset = offset; |
865 | 96.3k | coder->len = len; |
866 | | |
867 | | // Update the remaining amount of uncompressed data if uncompressed |
868 | | // size was known. |
869 | 96.3k | if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { |
870 | 96.3k | coder->uncompressed_size -= dict.pos - dict_start; |
871 | | |
872 | | // If we have gotten all the output but the decoder wants |
873 | | // to write more output, the file is corrupt. There are |
874 | | // three SEQ values where output is produced. |
875 | 96.3k | if (coder->uncompressed_size == 0 && ret == LZMA_OK |
876 | 96.3k | && (coder->sequence == SEQ_LITERAL_WRITE |
877 | 588 | || coder->sequence == SEQ_SHORTREP |
878 | 588 | || coder->sequence == SEQ_COPY)) |
879 | 8 | ret = LZMA_DATA_ERROR; |
880 | 96.3k | } |
881 | | |
882 | 96.3k | if (ret == LZMA_STREAM_END) { |
883 | | // Reset the range decoder so that it is ready to reinitialize |
884 | | // for a new LZMA2 chunk. |
885 | 9.55k | rc_reset(coder->rc); |
886 | 9.55k | coder->sequence = SEQ_IS_MATCH; |
887 | 9.55k | } |
888 | | |
889 | 96.3k | return ret; |
890 | 96.3k | } |
891 | | |
892 | | |
893 | | |
894 | | static void |
895 | | lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, |
896 | | bool allow_eopm) |
897 | 16.9k | { |
898 | 16.9k | lzma_lzma1_decoder *coder = coder_ptr; |
899 | 16.9k | coder->uncompressed_size = uncompressed_size; |
900 | 16.9k | coder->allow_eopm = allow_eopm; |
901 | 16.9k | } |
902 | | |
903 | | |
904 | | static void |
905 | | lzma_decoder_reset(void *coder_ptr, const void *opt) |
906 | 16.6k | { |
907 | 16.6k | lzma_lzma1_decoder *coder = coder_ptr; |
908 | 16.6k | const lzma_options_lzma *options = opt; |
909 | | |
910 | | // NOTE: We assume that lc/lp/pb are valid since they were |
911 | | // successfully decoded with lzma_lzma_decode_properties(). |
912 | | |
913 | | // Calculate pos_mask. We don't need pos_bits as is for anything. |
914 | 16.6k | coder->pos_mask = (1U << options->pb) - 1; |
915 | | |
916 | | // Initialize the literal decoder. |
917 | 16.6k | literal_init(coder->literal, options->lc, options->lp); |
918 | | |
919 | 16.6k | coder->literal_context_bits = options->lc; |
920 | 16.6k | coder->literal_pos_mask = (1U << options->lp) - 1; |
921 | | |
922 | | // State |
923 | 16.6k | coder->state = STATE_LIT_LIT; |
924 | 16.6k | coder->rep0 = 0; |
925 | 16.6k | coder->rep1 = 0; |
926 | 16.6k | coder->rep2 = 0; |
927 | 16.6k | coder->rep3 = 0; |
928 | 16.6k | coder->pos_mask = (1U << options->pb) - 1; |
929 | | |
930 | | // Range decoder |
931 | 16.6k | rc_reset(coder->rc); |
932 | | |
933 | | // Bit and bittree decoders |
934 | 216k | for (uint32_t i = 0; i < STATES; ++i) { |
935 | 676k | for (uint32_t j = 0; j <= coder->pos_mask; ++j) { |
936 | 476k | bit_reset(coder->is_match[i][j]); |
937 | 476k | bit_reset(coder->is_rep0_long[i][j]); |
938 | 476k | } |
939 | | |
940 | 199k | bit_reset(coder->is_rep[i]); |
941 | 199k | bit_reset(coder->is_rep0[i]); |
942 | 199k | bit_reset(coder->is_rep1[i]); |
943 | 199k | bit_reset(coder->is_rep2[i]); |
944 | 199k | } |
945 | | |
946 | 83.3k | for (uint32_t i = 0; i < DIST_STATES; ++i) |
947 | 66.6k | bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); |
948 | | |
949 | 1.91M | for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) |
950 | 1.89M | bit_reset(coder->pos_special[i]); |
951 | | |
952 | 16.6k | bittree_reset(coder->pos_align, ALIGN_BITS); |
953 | | |
954 | | // Len decoders (also bit/bittree) |
955 | 16.6k | const uint32_t num_pos_states = 1U << options->pb; |
956 | 16.6k | bit_reset(coder->match_len_decoder.choice); |
957 | 16.6k | bit_reset(coder->match_len_decoder.choice2); |
958 | 16.6k | bit_reset(coder->rep_len_decoder.choice); |
959 | 16.6k | bit_reset(coder->rep_len_decoder.choice2); |
960 | | |
961 | 56.3k | for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { |
962 | 39.7k | bittree_reset(coder->match_len_decoder.low[pos_state], |
963 | 39.7k | LEN_LOW_BITS); |
964 | 39.7k | bittree_reset(coder->match_len_decoder.mid[pos_state], |
965 | 39.7k | LEN_MID_BITS); |
966 | | |
967 | 39.7k | bittree_reset(coder->rep_len_decoder.low[pos_state], |
968 | 39.7k | LEN_LOW_BITS); |
969 | 39.7k | bittree_reset(coder->rep_len_decoder.mid[pos_state], |
970 | 39.7k | LEN_MID_BITS); |
971 | 39.7k | } |
972 | | |
973 | 16.6k | bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); |
974 | 16.6k | bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); |
975 | | |
976 | 16.6k | coder->sequence = SEQ_IS_MATCH; |
977 | 16.6k | coder->probs = NULL; |
978 | 16.6k | coder->symbol = 0; |
979 | 16.6k | coder->limit = 0; |
980 | 16.6k | coder->offset = 0; |
981 | 16.6k | coder->len = 0; |
982 | | |
983 | 16.6k | return; |
984 | 16.6k | } |
985 | | |
986 | | |
987 | | extern lzma_ret |
988 | | lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator, |
989 | | const lzma_options_lzma *options, lzma_lz_options *lz_options) |
990 | 280k | { |
991 | 280k | if (lz->coder == NULL) { |
992 | 159k | lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator); |
993 | 159k | if (lz->coder == NULL) |
994 | 0 | return LZMA_MEM_ERROR; |
995 | | |
996 | 159k | lz->code = &lzma_decode; |
997 | 159k | lz->reset = &lzma_decoder_reset; |
998 | 159k | lz->set_uncompressed = &lzma_decoder_uncompressed; |
999 | 159k | } |
1000 | | |
1001 | | // All dictionary sizes are OK here. LZ decoder will take care of |
1002 | | // the special cases. |
1003 | 280k | lz_options->dict_size = options->dict_size; |
1004 | 280k | lz_options->preset_dict = options->preset_dict; |
1005 | 280k | lz_options->preset_dict_size = options->preset_dict_size; |
1006 | | |
1007 | 280k | return LZMA_OK; |
1008 | 280k | } |
1009 | | |
1010 | | |
1011 | | /// Allocate and initialize LZMA decoder. This is used only via LZ |
1012 | | /// initialization (lzma_lzma_decoder_init() passes function pointer to |
1013 | | /// the LZ initialization). |
1014 | | static lzma_ret |
1015 | | lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, |
1016 | | lzma_vli id, const void *options, lzma_lz_options *lz_options) |
1017 | 0 | { |
1018 | 0 | if (!is_lclppb_valid(options)) |
1019 | 0 | return LZMA_PROG_ERROR; |
1020 | | |
1021 | 0 | lzma_vli uncomp_size = LZMA_VLI_UNKNOWN; |
1022 | 0 | bool allow_eopm = true; |
1023 | |
|
1024 | 0 | if (id == LZMA_FILTER_LZMA1EXT) { |
1025 | 0 | const lzma_options_lzma *opt = options; |
1026 | | |
1027 | | // Only one flag is supported. |
1028 | 0 | if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) |
1029 | 0 | return LZMA_OPTIONS_ERROR; |
1030 | | |
1031 | | // FIXME? Using lzma_vli instead of uint64_t is weird because |
1032 | | // this has nothing to do with .xz headers and variable-length |
1033 | | // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN |
1034 | | // instead of UINT64_MAX is clearer when unknown size is |
1035 | | // meant. A problem with using lzma_vli is that now we |
1036 | | // allow > LZMA_VLI_MAX which is fine in this file but |
1037 | | // it's still confusing. Note that alone_decoder.c also |
1038 | | // allows > LZMA_VLI_MAX when setting uncompressed size. |
1039 | 0 | uncomp_size = opt->ext_size_low |
1040 | 0 | + ((uint64_t)(opt->ext_size_high) << 32); |
1041 | 0 | allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0 |
1042 | 0 | || uncomp_size == LZMA_VLI_UNKNOWN; |
1043 | 0 | } |
1044 | | |
1045 | 0 | return_if_error(lzma_lzma_decoder_create( |
1046 | 0 | lz, allocator, options, lz_options)); |
1047 | | |
1048 | 0 | lzma_decoder_reset(lz->coder, options); |
1049 | 0 | lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm); |
1050 | |
|
1051 | 0 | return LZMA_OK; |
1052 | 0 | } |
1053 | | |
1054 | | |
1055 | | extern lzma_ret |
1056 | | lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, |
1057 | | const lzma_filter_info *filters) |
1058 | 0 | { |
1059 | | // LZMA can only be the last filter in the chain. This is enforced |
1060 | | // by the raw_decoder initialization. |
1061 | 0 | assert(filters[1].init == NULL); |
1062 | |
|
1063 | 0 | return lzma_lz_decoder_init(next, allocator, filters, |
1064 | 0 | &lzma_decoder_init); |
1065 | 0 | } |
1066 | | |
1067 | | |
1068 | | extern bool |
1069 | | lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) |
1070 | 8.34k | { |
1071 | 8.34k | if (byte > (4 * 5 + 4) * 9 + 8) |
1072 | 2 | return true; |
1073 | | |
1074 | | // See the file format specification to understand this. |
1075 | 8.34k | options->pb = byte / (9 * 5); |
1076 | 8.34k | byte -= options->pb * 9 * 5; |
1077 | 8.34k | options->lp = byte / 9; |
1078 | 8.34k | options->lc = byte - options->lp * 9; |
1079 | | |
1080 | 8.34k | return options->lc + options->lp > LZMA_LCLP_MAX; |
1081 | 8.34k | } |
1082 | | |
1083 | | |
1084 | | extern uint64_t |
1085 | | lzma_lzma_decoder_memusage_nocheck(const void *options) |
1086 | 280k | { |
1087 | 280k | const lzma_options_lzma *const opt = options; |
1088 | 280k | return sizeof(lzma_lzma1_decoder) |
1089 | 280k | + lzma_lz_decoder_memusage(opt->dict_size); |
1090 | 280k | } |
1091 | | |
1092 | | |
1093 | | extern uint64_t |
1094 | | lzma_lzma_decoder_memusage(const void *options) |
1095 | 0 | { |
1096 | 0 | if (!is_lclppb_valid(options)) |
1097 | 0 | return UINT64_MAX; |
1098 | | |
1099 | 0 | return lzma_lzma_decoder_memusage_nocheck(options); |
1100 | 0 | } |
1101 | | |
1102 | | |
1103 | | extern lzma_ret |
1104 | | lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, |
1105 | | const uint8_t *props, size_t props_size) |
1106 | 0 | { |
1107 | 0 | if (props_size != 5) |
1108 | 0 | return LZMA_OPTIONS_ERROR; |
1109 | | |
1110 | 0 | lzma_options_lzma *opt |
1111 | 0 | = lzma_alloc(sizeof(lzma_options_lzma), allocator); |
1112 | 0 | if (opt == NULL) |
1113 | 0 | return LZMA_MEM_ERROR; |
1114 | | |
1115 | 0 | if (lzma_lzma_lclppb_decode(opt, props[0])) |
1116 | 0 | goto error; |
1117 | | |
1118 | | // All dictionary sizes are accepted, including zero. LZ decoder |
1119 | | // will automatically use a dictionary at least a few KiB even if |
1120 | | // a smaller dictionary is requested. |
1121 | 0 | opt->dict_size = read32le(props + 1); |
1122 | |
|
1123 | 0 | opt->preset_dict = NULL; |
1124 | 0 | opt->preset_dict_size = 0; |
1125 | |
|
1126 | 0 | *options = opt; |
1127 | |
|
1128 | 0 | return LZMA_OK; |
1129 | | |
1130 | 0 | error: |
1131 | 0 | lzma_free(opt, allocator); |
1132 | 0 | return LZMA_OPTIONS_ERROR; |
1133 | 0 | } |