/src/xz/src/liblzma/lzma/lzma2_decoder.c
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // |
3 | | /// \file lzma2_decoder.c |
4 | | /// \brief LZMA2 decoder |
5 | | /// |
6 | | // Authors: Igor Pavlov |
7 | | // Lasse Collin |
8 | | // |
9 | | // This file has been put into the public domain. |
10 | | // You can do whatever you want with this file. |
11 | | // |
12 | | /////////////////////////////////////////////////////////////////////////////// |
13 | | |
14 | | #include "lzma2_decoder.h" |
15 | | #include "lz_decoder.h" |
16 | | #include "lzma_decoder.h" |
17 | | |
18 | | |
19 | | typedef struct { |
20 | | enum sequence { |
21 | | SEQ_CONTROL, |
22 | | SEQ_UNCOMPRESSED_1, |
23 | | SEQ_UNCOMPRESSED_2, |
24 | | SEQ_COMPRESSED_0, |
25 | | SEQ_COMPRESSED_1, |
26 | | SEQ_PROPERTIES, |
27 | | SEQ_LZMA, |
28 | | SEQ_COPY, |
29 | | } sequence; |
30 | | |
31 | | /// Sequence after the size fields have been decoded. |
32 | | enum sequence next_sequence; |
33 | | |
34 | | /// LZMA decoder |
35 | | lzma_lz_decoder lzma; |
36 | | |
37 | | /// Uncompressed size of LZMA chunk |
38 | | size_t uncompressed_size; |
39 | | |
40 | | /// Compressed size of the chunk (naturally equals to uncompressed |
41 | | /// size of uncompressed chunk) |
42 | | size_t compressed_size; |
43 | | |
44 | | /// True if properties are needed. This is false before the |
45 | | /// first LZMA chunk. |
46 | | bool need_properties; |
47 | | |
48 | | /// True if dictionary reset is needed. This is false before the |
49 | | /// first chunk (LZMA or uncompressed). |
50 | | bool need_dictionary_reset; |
51 | | |
52 | | lzma_options_lzma options; |
53 | | } lzma_lzma2_coder; |
54 | | |
55 | | |
56 | | static lzma_ret |
57 | | lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, |
58 | | const uint8_t *restrict in, size_t *restrict in_pos, |
59 | | size_t in_size) |
60 | 410k | { |
61 | 410k | lzma_lzma2_coder *restrict coder = coder_ptr; |
62 | | |
63 | | // With SEQ_LZMA it is possible that no new input is needed to do |
64 | | // some progress. The rest of the sequences assume that there is |
65 | | // at least one byte of input. |
66 | 649k | while (*in_pos < in_size || coder->sequence == SEQ_LZMA) |
67 | 645k | switch (coder->sequence) { |
68 | 334k | case SEQ_CONTROL: { |
69 | 334k | const uint32_t control = in[*in_pos]; |
70 | 334k | ++*in_pos; |
71 | | |
72 | | // End marker |
73 | 334k | if (control == 0x00) |
74 | 272k | return LZMA_STREAM_END; |
75 | | |
76 | 62.3k | if (control >= 0xE0 || control == 1) { |
77 | | // Dictionary reset implies that next LZMA chunk has |
78 | | // to set new properties. |
79 | 44.8k | coder->need_properties = true; |
80 | 44.8k | coder->need_dictionary_reset = true; |
81 | 44.8k | } else if (coder->need_dictionary_reset) { |
82 | 31 | return LZMA_DATA_ERROR; |
83 | 31 | } |
84 | | |
85 | 62.3k | if (control >= 0x80) { |
86 | | // LZMA chunk. The highest five bits of the |
87 | | // uncompressed size are taken from the control byte. |
88 | 17.1k | coder->uncompressed_size = (control & 0x1F) << 16; |
89 | 17.1k | coder->sequence = SEQ_UNCOMPRESSED_1; |
90 | | |
91 | | // See if there are new properties or if we need to |
92 | | // reset the state. |
93 | 17.1k | if (control >= 0xC0) { |
94 | | // When there are new properties, state reset |
95 | | // is done at SEQ_PROPERTIES. |
96 | 8.39k | coder->need_properties = false; |
97 | 8.39k | coder->next_sequence = SEQ_PROPERTIES; |
98 | | |
99 | 8.71k | } else if (coder->need_properties) { |
100 | 8 | return LZMA_DATA_ERROR; |
101 | | |
102 | 8.70k | } else { |
103 | 8.70k | coder->next_sequence = SEQ_LZMA; |
104 | | |
105 | | // If only state reset is wanted with old |
106 | | // properties, do the resetting here for |
107 | | // simplicity. |
108 | 8.70k | if (control >= 0xA0) |
109 | 8.32k | coder->lzma.reset(coder->lzma.coder, |
110 | 8.32k | &coder->options); |
111 | 8.70k | } |
112 | 45.1k | } else { |
113 | | // Invalid control values |
114 | 45.1k | if (control > 2) |
115 | 10 | return LZMA_DATA_ERROR; |
116 | | |
117 | | // It's uncompressed chunk |
118 | 45.1k | coder->sequence = SEQ_COMPRESSED_0; |
119 | 45.1k | coder->next_sequence = SEQ_COPY; |
120 | 45.1k | } |
121 | | |
122 | 62.2k | if (coder->need_dictionary_reset) { |
123 | | // Finish the dictionary reset and let the caller |
124 | | // flush the dictionary to the actual output buffer. |
125 | 44.8k | coder->need_dictionary_reset = false; |
126 | 44.8k | dict_reset(dict); |
127 | 44.8k | return LZMA_OK; |
128 | 44.8k | } |
129 | | |
130 | 17.4k | break; |
131 | 62.2k | } |
132 | | |
133 | 17.4k | case SEQ_UNCOMPRESSED_1: |
134 | 17.0k | coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; |
135 | 17.0k | coder->sequence = SEQ_UNCOMPRESSED_2; |
136 | 17.0k | break; |
137 | | |
138 | 16.9k | case SEQ_UNCOMPRESSED_2: |
139 | 16.9k | coder->uncompressed_size += in[(*in_pos)++] + 1U; |
140 | 16.9k | coder->sequence = SEQ_COMPRESSED_0; |
141 | 16.9k | coder->lzma.set_uncompressed(coder->lzma.coder, |
142 | 16.9k | coder->uncompressed_size, false); |
143 | 16.9k | break; |
144 | | |
145 | 62.1k | case SEQ_COMPRESSED_0: |
146 | 62.1k | coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; |
147 | 62.1k | coder->sequence = SEQ_COMPRESSED_1; |
148 | 62.1k | break; |
149 | | |
150 | 62.1k | case SEQ_COMPRESSED_1: |
151 | 62.1k | coder->compressed_size += in[(*in_pos)++] + 1U; |
152 | 62.1k | coder->sequence = coder->next_sequence; |
153 | 62.1k | break; |
154 | | |
155 | 8.34k | case SEQ_PROPERTIES: |
156 | 8.34k | if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) |
157 | 9 | return LZMA_DATA_ERROR; |
158 | | |
159 | 8.33k | coder->lzma.reset(coder->lzma.coder, &coder->options); |
160 | | |
161 | 8.33k | coder->sequence = SEQ_LZMA; |
162 | 8.33k | break; |
163 | | |
164 | 96.6k | case SEQ_LZMA: { |
165 | | // Store the start offset so that we can update |
166 | | // coder->compressed_size later. |
167 | 96.6k | const size_t in_start = *in_pos; |
168 | | |
169 | | // Decode from in[] to *dict. |
170 | 96.6k | const lzma_ret ret = coder->lzma.code(coder->lzma.coder, |
171 | 96.6k | dict, in, in_pos, in_size); |
172 | | |
173 | | // Validate and update coder->compressed_size. |
174 | 96.6k | const size_t in_used = *in_pos - in_start; |
175 | 96.6k | if (in_used > coder->compressed_size) |
176 | 805 | return LZMA_DATA_ERROR; |
177 | | |
178 | 95.8k | coder->compressed_size -= in_used; |
179 | | |
180 | | // Return if we didn't finish the chunk, or an error occurred. |
181 | 95.8k | if (ret != LZMA_STREAM_END) |
182 | 86.3k | return ret; |
183 | | |
184 | | // The LZMA decoder must have consumed the whole chunk now. |
185 | | // We don't need to worry about uncompressed size since it |
186 | | // is checked by the LZMA decoder. |
187 | 9.55k | if (coder->compressed_size != 0) |
188 | 13 | return LZMA_DATA_ERROR; |
189 | | |
190 | 9.54k | coder->sequence = SEQ_CONTROL; |
191 | 9.54k | break; |
192 | 9.55k | } |
193 | | |
194 | 47.3k | case SEQ_COPY: { |
195 | | // Copy from input to the dictionary as is. |
196 | 47.3k | dict_write(dict, in, in_pos, in_size, &coder->compressed_size); |
197 | 47.3k | if (coder->compressed_size != 0) |
198 | 2.64k | return LZMA_OK; |
199 | | |
200 | 44.7k | coder->sequence = SEQ_CONTROL; |
201 | 44.7k | break; |
202 | 47.3k | } |
203 | | |
204 | 0 | default: |
205 | 0 | assert(0); |
206 | 0 | return LZMA_PROG_ERROR; |
207 | 645k | } |
208 | | |
209 | 3.77k | return LZMA_OK; |
210 | 410k | } |
211 | | |
212 | | |
213 | | static void |
214 | | lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) |
215 | 159k | { |
216 | 159k | lzma_lzma2_coder *coder = coder_ptr; |
217 | | |
218 | 159k | assert(coder->lzma.end == NULL); |
219 | 159k | lzma_free(coder->lzma.coder, allocator); |
220 | | |
221 | 159k | lzma_free(coder, allocator); |
222 | | |
223 | 159k | return; |
224 | 159k | } |
225 | | |
226 | | |
227 | | static lzma_ret |
228 | | lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, |
229 | | lzma_vli id lzma_attribute((__unused__)), const void *opt, |
230 | | lzma_lz_options *lz_options) |
231 | 280k | { |
232 | 280k | lzma_lzma2_coder *coder = lz->coder; |
233 | 280k | if (coder == NULL) { |
234 | 159k | coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); |
235 | 159k | if (coder == NULL) |
236 | 0 | return LZMA_MEM_ERROR; |
237 | | |
238 | 159k | lz->coder = coder; |
239 | 159k | lz->code = &lzma2_decode; |
240 | 159k | lz->end = &lzma2_decoder_end; |
241 | | |
242 | 159k | coder->lzma = LZMA_LZ_DECODER_INIT; |
243 | 159k | } |
244 | | |
245 | 280k | const lzma_options_lzma *options = opt; |
246 | | |
247 | 280k | coder->sequence = SEQ_CONTROL; |
248 | 280k | coder->need_properties = true; |
249 | 280k | coder->need_dictionary_reset = options->preset_dict == NULL |
250 | 280k | || options->preset_dict_size == 0; |
251 | | |
252 | 280k | return lzma_lzma_decoder_create(&coder->lzma, |
253 | 280k | allocator, options, lz_options); |
254 | 280k | } |
255 | | |
256 | | |
257 | | extern lzma_ret |
258 | | lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, |
259 | | const lzma_filter_info *filters) |
260 | 280k | { |
261 | | // LZMA2 can only be the last filter in the chain. This is enforced |
262 | | // by the raw_decoder initialization. |
263 | 280k | assert(filters[1].init == NULL); |
264 | | |
265 | 280k | return lzma_lz_decoder_init(next, allocator, filters, |
266 | 280k | &lzma2_decoder_init); |
267 | 280k | } |
268 | | |
269 | | |
270 | | extern uint64_t |
271 | | lzma_lzma2_decoder_memusage(const void *options) |
272 | 280k | { |
273 | 280k | return sizeof(lzma_lzma2_coder) |
274 | 280k | + lzma_lzma_decoder_memusage_nocheck(options); |
275 | 280k | } |
276 | | |
277 | | |
278 | | extern lzma_ret |
279 | | lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, |
280 | | const uint8_t *props, size_t props_size) |
281 | 280k | { |
282 | 280k | if (props_size != 1) |
283 | 4 | return LZMA_OPTIONS_ERROR; |
284 | | |
285 | | // Check that reserved bits are unset. |
286 | 280k | if (props[0] & 0xC0) |
287 | 4 | return LZMA_OPTIONS_ERROR; |
288 | | |
289 | | // Decode the dictionary size. |
290 | 280k | if (props[0] > 40) |
291 | 4 | return LZMA_OPTIONS_ERROR; |
292 | | |
293 | 280k | lzma_options_lzma *opt = lzma_alloc( |
294 | 280k | sizeof(lzma_options_lzma), allocator); |
295 | 280k | if (opt == NULL) |
296 | 0 | return LZMA_MEM_ERROR; |
297 | | |
298 | 280k | if (props[0] == 40) { |
299 | 17 | opt->dict_size = UINT32_MAX; |
300 | 280k | } else { |
301 | 280k | opt->dict_size = 2 | (props[0] & 1U); |
302 | 280k | opt->dict_size <<= props[0] / 2U + 11; |
303 | 280k | } |
304 | | |
305 | 280k | opt->preset_dict = NULL; |
306 | 280k | opt->preset_dict_size = 0; |
307 | | |
308 | 280k | *options = opt; |
309 | | |
310 | 280k | return LZMA_OK; |
311 | 280k | } |