/src/xz/src/liblzma/lzma/lzma2_decoder.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lzma2_decoder.c |
6 | | /// \brief LZMA2 decoder |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "lzma2_decoder.h" |
14 | | #include "lz_decoder.h" |
15 | | #include "lzma_decoder.h" |
16 | | |
17 | | |
18 | | typedef struct { |
19 | | enum sequence { |
20 | | SEQ_CONTROL, |
21 | | SEQ_UNCOMPRESSED_1, |
22 | | SEQ_UNCOMPRESSED_2, |
23 | | SEQ_COMPRESSED_0, |
24 | | SEQ_COMPRESSED_1, |
25 | | SEQ_PROPERTIES, |
26 | | SEQ_LZMA, |
27 | | SEQ_COPY, |
28 | | } sequence; |
29 | | |
30 | | /// Sequence after the size fields have been decoded. |
31 | | enum sequence next_sequence; |
32 | | |
33 | | /// LZMA decoder |
34 | | lzma_lz_decoder lzma; |
35 | | |
36 | | /// Uncompressed size of LZMA chunk |
37 | | size_t uncompressed_size; |
38 | | |
39 | | /// Compressed size of the chunk (naturally equals to uncompressed |
40 | | /// size of uncompressed chunk) |
41 | | size_t compressed_size; |
42 | | |
43 | | /// True if properties are needed. This is false before the |
44 | | /// first LZMA chunk. |
45 | | bool need_properties; |
46 | | |
47 | | /// True if dictionary reset is needed. This is false before the |
48 | | /// first chunk (LZMA or uncompressed). |
49 | | bool need_dictionary_reset; |
50 | | |
51 | | lzma_options_lzma options; |
52 | | } lzma_lzma2_coder; |
53 | | |
54 | | |
55 | | static lzma_ret |
56 | | lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, |
57 | | const uint8_t *restrict in, size_t *restrict in_pos, |
58 | | size_t in_size) |
59 | 792k | { |
60 | 792k | lzma_lzma2_coder *restrict coder = coder_ptr; |
61 | | |
62 | | // With SEQ_LZMA it is possible that no new input is needed to do |
63 | | // some progress. The rest of the sequences assume that there is |
64 | | // at least one byte of input. |
65 | 869k | while (*in_pos < in_size || coder->sequence == SEQ_LZMA) |
66 | 859k | switch (coder->sequence) { |
67 | 22.3k | case SEQ_CONTROL: { |
68 | 22.3k | const uint32_t control = in[*in_pos]; |
69 | 22.3k | ++*in_pos; |
70 | | |
71 | | // End marker |
72 | 22.3k | if (control == 0x00) |
73 | 3.22k | return LZMA_STREAM_END; |
74 | | |
75 | 19.1k | if (control >= 0xE0 || control == 1) { |
76 | | // Dictionary reset implies that next LZMA chunk has |
77 | | // to set new properties. |
78 | 17.3k | coder->need_properties = true; |
79 | 17.3k | coder->need_dictionary_reset = true; |
80 | 17.3k | } else if (coder->need_dictionary_reset) { |
81 | 61 | return LZMA_DATA_ERROR; |
82 | 61 | } |
83 | | |
84 | 19.0k | if (control >= 0x80) { |
85 | | // LZMA chunk. The highest five bits of the |
86 | | // uncompressed size are taken from the control byte. |
87 | 11.8k | coder->uncompressed_size = (control & 0x1F) << 16; |
88 | 11.8k | coder->sequence = SEQ_UNCOMPRESSED_1; |
89 | | |
90 | | // See if there are new properties or if we need to |
91 | | // reset the state. |
92 | 11.8k | if (control >= 0xC0) { |
93 | | // When there are new properties, state reset |
94 | | // is done at SEQ_PROPERTIES. |
95 | 11.6k | coder->need_properties = false; |
96 | 11.6k | coder->next_sequence = SEQ_PROPERTIES; |
97 | | |
98 | 11.6k | } else if (coder->need_properties) { |
99 | 148 | return LZMA_DATA_ERROR; |
100 | | |
101 | 148 | } else { |
102 | 13 | coder->next_sequence = SEQ_LZMA; |
103 | | |
104 | | // If only state reset is wanted with old |
105 | | // properties, do the resetting here for |
106 | | // simplicity. |
107 | 13 | if (control >= 0xA0) |
108 | 3 | coder->lzma.reset(coder->lzma.coder, |
109 | 3 | &coder->options); |
110 | 13 | } |
111 | 11.8k | } else { |
112 | | // Invalid control values |
113 | 7.23k | if (control > 2) |
114 | 137 | return LZMA_DATA_ERROR; |
115 | | |
116 | | // It's uncompressed chunk |
117 | 7.10k | coder->sequence = SEQ_COMPRESSED_0; |
118 | 7.10k | coder->next_sequence = SEQ_COPY; |
119 | 7.10k | } |
120 | | |
121 | 18.7k | if (coder->need_dictionary_reset) { |
122 | | // Finish the dictionary reset and let the caller |
123 | | // flush the dictionary to the actual output buffer. |
124 | 17.3k | coder->need_dictionary_reset = false; |
125 | 17.3k | dict_reset(dict); |
126 | 17.3k | return LZMA_OK; |
127 | 17.3k | } |
128 | | |
129 | 1.41k | break; |
130 | 18.7k | } |
131 | | |
132 | 11.5k | case SEQ_UNCOMPRESSED_1: |
133 | 11.5k | coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; |
134 | 11.5k | coder->sequence = SEQ_UNCOMPRESSED_2; |
135 | 11.5k | break; |
136 | | |
137 | 11.5k | case SEQ_UNCOMPRESSED_2: |
138 | 11.5k | coder->uncompressed_size += in[(*in_pos)++] + 1U; |
139 | 11.5k | coder->sequence = SEQ_COMPRESSED_0; |
140 | 11.5k | coder->lzma.set_uncompressed(coder->lzma.coder, |
141 | 11.5k | coder->uncompressed_size, false); |
142 | 11.5k | break; |
143 | | |
144 | 18.5k | case SEQ_COMPRESSED_0: |
145 | 18.5k | coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; |
146 | 18.5k | coder->sequence = SEQ_COMPRESSED_1; |
147 | 18.5k | break; |
148 | | |
149 | 18.5k | case SEQ_COMPRESSED_1: |
150 | 18.5k | coder->compressed_size += in[(*in_pos)++] + 1U; |
151 | 18.5k | coder->sequence = coder->next_sequence; |
152 | 18.5k | break; |
153 | | |
154 | 11.4k | case SEQ_PROPERTIES: |
155 | 11.4k | if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) |
156 | 164 | return LZMA_DATA_ERROR; |
157 | | |
158 | 11.2k | coder->lzma.reset(coder->lzma.coder, &coder->options); |
159 | | |
160 | 11.2k | coder->sequence = SEQ_LZMA; |
161 | 11.2k | break; |
162 | | |
163 | 753k | case SEQ_LZMA: { |
164 | | // Store the start offset so that we can update |
165 | | // coder->compressed_size later. |
166 | 753k | const size_t in_start = *in_pos; |
167 | | |
168 | | // Decode from in[] to *dict. |
169 | 753k | const lzma_ret ret = coder->lzma.code(coder->lzma.coder, |
170 | 753k | dict, in, in_pos, in_size); |
171 | | |
172 | | // Validate and update coder->compressed_size. |
173 | 753k | const size_t in_used = *in_pos - in_start; |
174 | 753k | if (in_used > coder->compressed_size) |
175 | 1.26k | return LZMA_DATA_ERROR; |
176 | | |
177 | 752k | coder->compressed_size -= in_used; |
178 | | |
179 | | // Return if we didn't finish the chunk, or an error occurred. |
180 | 752k | if (ret != LZMA_STREAM_END) |
181 | 752k | return ret; |
182 | | |
183 | | // The LZMA decoder must have consumed the whole chunk now. |
184 | | // We don't need to worry about uncompressed size since it |
185 | | // is checked by the LZMA decoder. |
186 | 159 | if (coder->compressed_size != 0) |
187 | 143 | return LZMA_DATA_ERROR; |
188 | | |
189 | 16 | coder->sequence = SEQ_CONTROL; |
190 | 16 | break; |
191 | 159 | } |
192 | | |
193 | 12.4k | case SEQ_COPY: { |
194 | | // Copy from input to the dictionary as is. |
195 | 12.4k | dict_write(dict, in, in_pos, in_size, &coder->compressed_size); |
196 | 12.4k | if (coder->compressed_size != 0) |
197 | 8.26k | return LZMA_OK; |
198 | | |
199 | 4.18k | coder->sequence = SEQ_CONTROL; |
200 | 4.18k | break; |
201 | 12.4k | } |
202 | | |
203 | 0 | default: |
204 | 0 | assert(0); |
205 | 0 | return LZMA_PROG_ERROR; |
206 | 859k | } |
207 | | |
208 | 9.35k | return LZMA_OK; |
209 | 792k | } |
210 | | |
211 | | |
212 | | static void |
213 | | lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) |
214 | 9.13k | { |
215 | 9.13k | lzma_lzma2_coder *coder = coder_ptr; |
216 | | |
217 | 9.13k | assert(coder->lzma.end == NULL); |
218 | 9.13k | lzma_free(coder->lzma.coder, allocator); |
219 | | |
220 | 9.13k | lzma_free(coder, allocator); |
221 | | |
222 | 9.13k | return; |
223 | 9.13k | } |
224 | | |
225 | | |
226 | | static lzma_ret |
227 | | lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, |
228 | | lzma_vli id lzma_attribute((__unused__)), const void *opt, |
229 | | lzma_lz_options *lz_options) |
230 | 18.2k | { |
231 | 18.2k | lzma_lzma2_coder *coder = lz->coder; |
232 | 18.2k | if (coder == NULL) { |
233 | 9.13k | coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); |
234 | 9.13k | if (coder == NULL) |
235 | 0 | return LZMA_MEM_ERROR; |
236 | | |
237 | 9.13k | lz->coder = coder; |
238 | 9.13k | lz->code = &lzma2_decode; |
239 | 9.13k | lz->end = &lzma2_decoder_end; |
240 | | |
241 | 9.13k | coder->lzma = LZMA_LZ_DECODER_INIT; |
242 | 9.13k | } |
243 | | |
244 | 18.2k | const lzma_options_lzma *options = opt; |
245 | | |
246 | 18.2k | coder->sequence = SEQ_CONTROL; |
247 | 18.2k | coder->need_properties = true; |
248 | 18.2k | coder->need_dictionary_reset = options->preset_dict == NULL |
249 | 0 | || options->preset_dict_size == 0; |
250 | | |
251 | 18.2k | return lzma_lzma_decoder_create(&coder->lzma, |
252 | 18.2k | allocator, options, lz_options); |
253 | 18.2k | } |
254 | | |
255 | | |
256 | | extern lzma_ret |
257 | | lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, |
258 | | const lzma_filter_info *filters) |
259 | 18.2k | { |
260 | | // LZMA2 can only be the last filter in the chain. This is enforced |
261 | | // by the raw_decoder initialization. |
262 | 18.2k | assert(filters[1].init == NULL); |
263 | | |
264 | 18.2k | return lzma_lz_decoder_init(next, allocator, filters, |
265 | 18.2k | &lzma2_decoder_init); |
266 | 18.2k | } |
267 | | |
268 | | |
269 | | extern uint64_t |
270 | | lzma_lzma2_decoder_memusage(const void *options) |
271 | 18.2k | { |
272 | 18.2k | return sizeof(lzma_lzma2_coder) |
273 | 18.2k | + lzma_lzma_decoder_memusage_nocheck(options); |
274 | 18.2k | } |
275 | | |
276 | | |
277 | | extern lzma_ret |
278 | | lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, |
279 | | const uint8_t *props, size_t props_size) |
280 | 18.5k | { |
281 | 18.5k | if (props_size != 1) |
282 | 40 | return LZMA_OPTIONS_ERROR; |
283 | | |
284 | | // Check that reserved bits are unset. |
285 | 18.4k | if (props[0] & 0xC0) |
286 | 34 | return LZMA_OPTIONS_ERROR; |
287 | | |
288 | | // Decode the dictionary size. |
289 | 18.4k | if (props[0] > 40) |
290 | 27 | return LZMA_OPTIONS_ERROR; |
291 | | |
292 | 18.4k | lzma_options_lzma *opt = lzma_alloc( |
293 | 18.4k | sizeof(lzma_options_lzma), allocator); |
294 | 18.4k | if (opt == NULL) |
295 | 0 | return LZMA_MEM_ERROR; |
296 | | |
297 | 18.4k | if (props[0] == 40) { |
298 | 31 | opt->dict_size = UINT32_MAX; |
299 | 18.3k | } else { |
300 | 18.3k | opt->dict_size = 2 | (props[0] & 1U); |
301 | 18.3k | opt->dict_size <<= props[0] / 2U + 11; |
302 | 18.3k | } |
303 | | |
304 | 18.4k | opt->preset_dict = NULL; |
305 | 18.4k | opt->preset_dict_size = 0; |
306 | | |
307 | 18.4k | *options = opt; |
308 | | |
309 | 18.4k | return LZMA_OK; |
310 | 18.4k | } |