/src/clamav/libclammspack/mspack/lzxd.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* This file is part of libmspack. |
2 | | * (C) 2003-2023 Stuart Caie. |
3 | | * |
4 | | * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted |
5 | | * by Microsoft Corporation. |
6 | | * |
7 | | * libmspack is free software; you can redistribute it and/or modify it under |
8 | | * the terms of the GNU Lesser General Public License (LGPL) version 2.1 |
9 | | * |
10 | | * For further details, see the file COPYING.LIB distributed with libmspack |
11 | | */ |
12 | | |
13 | | /* LZX decompression implementation */ |
14 | | |
15 | | #include <system.h> |
16 | | #include <lzx.h> |
17 | | |
18 | | /* Microsoft's LZX document (in cab-sdk.exe) and their implementation |
19 | | * of the com.ms.util.cab Java package do not concur. |
20 | | * |
21 | | * In the LZX document, there is a table showing the correlation between |
22 | | * window size and the number of position slots. It states that the 1MB |
23 | | * window = 40 slots and the 2MB window = 42 slots. In the implementation, |
24 | | * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the |
25 | | * first slot whose position base is equal to or more than the required |
26 | | * window size'. This would explain why other tables in the document refer |
27 | | * to 50 slots rather than 42. |
28 | | * |
29 | | * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode |
30 | | * is not defined in the specification. |
31 | | * |
32 | | * The LZX document does not state the uncompressed block has an |
33 | | * uncompressed length field. Where does this length field come from, so |
34 | | * we can know how large the block is? The implementation has it as the 24 |
35 | | * bits following after the 3 blocktype bits, before the alignment |
36 | | * padding. |
37 | | * |
38 | | * The LZX document states that aligned offset blocks have their aligned |
39 | | * offset huffman tree AFTER the main and length trees. The implementation |
40 | | * suggests that the aligned offset tree is BEFORE the main and length |
41 | | * trees. |
42 | | * |
43 | | * The LZX document decoding algorithm states that, in an aligned offset |
44 | | * block, if an extra_bits value is 1, 2 or 3, then that number of bits |
45 | | * should be read and the result added to the match offset. This is |
46 | | * correct for 1 and 2, but not 3, where just a huffman symbol (using the |
47 | | * aligned tree) should be read. |
48 | | * |
49 | | * Regarding the E8 preprocessing, the LZX document states 'No translation |
50 | | * may be performed on the last 6 bytes of the input block'. This is |
51 | | * correct. However, the pseudocode provided checks for the *E8 leader* |
52 | | * up to the last 6 bytes. If the leader appears between -10 and -7 bytes |
53 | | * from the end, this would cause the next four bytes to be modified, at |
54 | | * least one of which would be in the last 6 bytes, which is not allowed |
55 | | * according to the spec. |
56 | | * |
57 | | * The specification states that the huffman trees must always contain at |
58 | | * least one element. However, many CAB files contain blocks where the |
59 | | * length tree is completely empty (because there are no matches), and |
60 | | * this is expected to succeed. |
61 | | * |
62 | | * The errors in LZX documentation appear have been corrected in the |
63 | | * new documentation for the LZX DELTA format. |
64 | | * |
65 | | * http://msdn.microsoft.com/en-us/library/cc483133.aspx |
66 | | * |
67 | | * However, this is a different format, an extension of regular LZX. |
68 | | * I have noticed the following differences, there may be more: |
69 | | * |
70 | | * The maximum window size has increased from 2MB to 32MB. This also |
71 | | * increases the maximum number of position slots, etc. |
72 | | * |
73 | | * If the match length is 257 (the maximum possible), this signals |
74 | | * a further length decoding step, that allows for matches up to |
75 | | * 33024 bytes long. |
76 | | * |
77 | | * The format now allows for "reference data", supplied by the caller. |
78 | | * If match offsets go further back than the number of bytes |
79 | | * decompressed so far, that is them accessing the reference data. |
80 | | */ |
81 | | |
82 | | /* import bit-reading macros and code */ |
83 | | #define BITS_TYPE struct lzxd_stream |
84 | 840k | #define BITS_VAR lzx |
85 | | #define BITS_ORDER_MSB |
86 | 504k | #define READ_BYTES do { \ |
87 | 504k | unsigned char b0, b1; \ |
88 | 504k | READ_IF_NEEDED; b0 = *i_ptr++; \ |
89 | 497k | READ_IF_NEEDED; b1 = *i_ptr++; \ |
90 | 494k | INJECT_BITS((b1 << 8) | b0, 16); \ |
91 | 494k | } while (0) |
92 | | #include <readbits.h> |
93 | | |
94 | | /* import huffman-reading macros and code */ |
95 | 33.8k | #define TABLEBITS(tbl) LZX_##tbl##_TABLEBITS |
96 | 2.28M | #define MAXSYMBOLS(tbl) LZX_##tbl##_MAXSYMBOLS |
97 | 2.31M | #define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx] |
98 | 2.34M | #define HUFF_LEN(tbl,idx) lzx->tbl##_len[idx] |
99 | 0 | #define HUFF_ERROR return lzx->error = MSPACK_ERR_DECRUNCH |
100 | | #include <readhuff.h> |
101 | | |
102 | | /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ |
103 | | #define BUILD_TABLE(tbl) \ |
104 | 32.6k | if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ |
105 | 32.6k | &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ |
106 | 32.6k | { \ |
107 | 13.8k | D(("failed to build %s table", #tbl)) \ |
108 | 13.8k | return lzx->error = MSPACK_ERR_DECRUNCH; \ |
109 | 13.8k | } |
110 | | |
111 | 489 | #define BUILD_TABLE_MAYBE_EMPTY(tbl) do { \ |
112 | 489 | lzx->tbl##_empty = 0; \ |
113 | 489 | if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ |
114 | 489 | &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ |
115 | 489 | { \ |
116 | 978 | for (i = 0; i < MAXSYMBOLS(tbl); i++) { \ |
117 | 978 | if (HUFF_LEN(tbl, i) > 0) { \ |
118 | 489 | D(("failed to build %s table", #tbl)) \ |
119 | 489 | return lzx->error = MSPACK_ERR_DECRUNCH; \ |
120 | 489 | } \ |
121 | 978 | } \ |
122 | 489 | /* empty tree - allow it, but don't decode symbols with it */ \ |
123 | 489 | lzx->tbl##_empty = 1; \ |
124 | 0 | } \ |
125 | 489 | } while (0) |
126 | | |
127 | | /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols |
128 | | * first to last in the given table. The code lengths are stored in their |
129 | | * own special LZX way. |
130 | | */ |
131 | 27.6k | #define READ_LENGTHS(tbl, first, last) do { \ |
132 | 27.6k | STORE_BITS; \ |
133 | 27.6k | if (lzxd_read_lens(lzx, &HUFF_LEN(tbl, 0), (first), \ |
134 | 27.6k | (unsigned int)(last))) return lzx->error; \ |
135 | 27.6k | RESTORE_BITS; \ |
136 | 13.2k | } while (0) |
137 | | |
138 | | static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, |
139 | | unsigned int first, unsigned int last) |
140 | 27.6k | { |
141 | 27.6k | DECLARE_HUFF_VARS; |
142 | 27.6k | unsigned int x, y; |
143 | 27.6k | int z; |
144 | | |
145 | 27.6k | RESTORE_BITS; |
146 | | |
147 | | /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */ |
148 | 568k | for (x = 0; x < 20; x++) { |
149 | 541k | READ_BITS(y, 4); |
150 | 540k | lzx->PRETREE_len[x] = y; |
151 | 540k | } |
152 | 26.6k | BUILD_TABLE(PRETREE); |
153 | | |
154 | 2.08M | for (x = first; x < last; ) { |
155 | 2.06M | READ_HUFFSYM(PRETREE, z); |
156 | 2.06M | if (z == 17) { |
157 | | /* code = 17, run of ([read 4 bits]+4) zeros */ |
158 | 80.0k | READ_BITS(y, 4); y += 4; |
159 | 1.00M | while (y--) lens[x++] = 0; |
160 | 80.0k | } |
161 | 1.98M | else if (z == 18) { |
162 | | /* code = 18, run of ([read 5 bits]+20) zeros */ |
163 | 11.0k | READ_BITS(y, 5); y += 20; |
164 | 386k | while (y--) lens[x++] = 0; |
165 | 11.0k | } |
166 | 1.97M | else if (z == 19) { |
167 | | /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */ |
168 | 216k | READ_BITS(y, 1); y += 4; |
169 | 216k | READ_HUFFSYM(PRETREE, z); |
170 | 215k | z = lens[x] - z; if (z < 0) z += 17; |
171 | 1.15M | while (y--) lens[x++] = z; |
172 | 215k | } |
173 | 1.75M | else { |
174 | | /* code = 0 to 16, delta current length entry */ |
175 | 1.75M | z = lens[x] - z; if (z < 0) z += 17; |
176 | 1.75M | lens[x++] = z; |
177 | 1.75M | } |
178 | 2.06M | } |
179 | | |
180 | 13.2k | STORE_BITS; |
181 | | |
182 | 13.2k | return MSPACK_ERR_OK; |
183 | 15.8k | } |
184 | | |
185 | | /* LZX static data tables: |
186 | | * |
187 | | * LZX uses 'position slots' to represent match offsets. For every match, |
188 | | * a small 'position slot' number and a small offset from that slot are |
189 | | * encoded instead of one large offset. |
190 | | * |
191 | | * The number of slots is decided by how many are needed to encode the |
192 | | * largest offset for a given window size. This is easy when the gap between |
193 | | * slots is less than 128Kb, it's a linear relationship. But when extra_bits |
194 | | * reaches its limit of 17 (because LZX can only ensure reading 17 bits of |
195 | | * data at a time), we can only jump 128Kb at a time and have to start |
196 | | * using more and more position slots as each window size doubles. |
197 | | * |
198 | | * position_base[] is an index to the position slot bases |
199 | | * |
200 | | * extra_bits[] states how many bits of offset-from-base data is needed. |
201 | | * |
202 | | * They are calculated as follows: |
203 | | * extra_bits[i] = 0 where i < 4 |
204 | | * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36 |
205 | | * extra_bits[i] = 17 where i >= 36 |
206 | | * position_base[0] = 0 |
207 | | * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1]) |
208 | | */ |
209 | | static const unsigned int position_slots[11] = { |
210 | | 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290 |
211 | | }; |
212 | | static const unsigned char extra_bits[36] = { |
213 | | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, |
214 | | 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16 |
215 | | }; |
216 | | static const unsigned int position_base[290] = { |
217 | | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, |
218 | | 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, |
219 | | 49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, |
220 | | 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, |
221 | | 1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512, |
222 | | 2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088, |
223 | | 3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664, |
224 | | 4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240, |
225 | | 6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816, |
226 | | 7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392, |
227 | | 8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968, |
228 | | 9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544, |
229 | | 10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048, |
230 | | 11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552, |
231 | | 12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056, |
232 | | 12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560, |
233 | | 13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064, |
234 | | 14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568, |
235 | | 15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072, |
236 | | 16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576, |
237 | | 17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080, |
238 | | 18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584, |
239 | | 19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088, |
240 | | 20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592, |
241 | | 21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096, |
242 | | 22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600, |
243 | | 23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104, |
244 | | 23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608, |
245 | | 24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112, |
246 | | 25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616, |
247 | | 26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120, |
248 | | 27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624, |
249 | | 28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128, |
250 | | 29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632, |
251 | | 30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136, |
252 | | 31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640, |
253 | | 32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144, |
254 | | 33161216, 33292288, 33423360 |
255 | | }; |
256 | | |
257 | 32.7k | static void lzxd_reset_state(struct lzxd_stream *lzx) { |
258 | 32.7k | int i; |
259 | | |
260 | 32.7k | lzx->R0 = 1; |
261 | 32.7k | lzx->R1 = 1; |
262 | 32.7k | lzx->R2 = 1; |
263 | 32.7k | lzx->header_read = 0; |
264 | 32.7k | lzx->block_remaining = 0; |
265 | 32.7k | lzx->block_type = LZX_BLOCKTYPE_INVALID; |
266 | | |
267 | | /* initialise tables to 0 (because deltas will be applied to them) */ |
268 | 84.4M | for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0; |
269 | 8.22M | for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0; |
270 | 32.7k | } |
271 | | |
272 | | /*-------- main LZX code --------*/ |
273 | | |
274 | | struct lzxd_stream *lzxd_init(struct mspack_system *system, |
275 | | struct mspack_file *input, |
276 | | struct mspack_file *output, |
277 | | int window_bits, |
278 | | int reset_interval, |
279 | | int input_buffer_size, |
280 | | off_t output_length, |
281 | | char is_delta) |
282 | 34.2k | { |
283 | 34.2k | unsigned int window_size = 1 << window_bits; |
284 | 34.2k | struct lzxd_stream *lzx; |
285 | | |
286 | 34.2k | if (!system) return NULL; |
287 | | |
288 | | /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB), |
289 | | * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB) |
290 | | */ |
291 | 34.2k | if (is_delta) { |
292 | 0 | if (window_bits < 17 || window_bits > 25) return NULL; |
293 | 0 | } |
294 | 34.2k | else { |
295 | 34.2k | if (window_bits < 15 || window_bits > 21) return NULL; |
296 | 34.2k | } |
297 | | |
298 | 32.7k | if (reset_interval < 0 || output_length < 0) { |
299 | 0 | D(("reset interval or output length < 0")) |
300 | 0 | return NULL; |
301 | 0 | } |
302 | | |
303 | | /* round up input buffer size to multiple of two */ |
304 | 32.7k | input_buffer_size = (input_buffer_size + 1) & -2; |
305 | 32.7k | if (input_buffer_size < 2) return NULL; |
306 | | |
307 | | /* allocate decompression state */ |
308 | 32.7k | if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) { |
309 | 0 | return NULL; |
310 | 0 | } |
311 | | |
312 | | /* allocate decompression window and input buffer */ |
313 | 32.7k | lzx->window = (unsigned char *) system->alloc(system, window_size); |
314 | 32.7k | lzx->inbuf = (unsigned char *) system->alloc(system, input_buffer_size); |
315 | 32.7k | if (!lzx->window || !lzx->inbuf) { |
316 | 0 | system->free(lzx->window); |
317 | 0 | system->free(lzx->inbuf); |
318 | 0 | system->free(lzx); |
319 | 0 | return NULL; |
320 | 0 | } |
321 | | |
322 | | /* initialise decompression state */ |
323 | 32.7k | lzx->sys = system; |
324 | 32.7k | lzx->input = input; |
325 | 32.7k | lzx->output = output; |
326 | 32.7k | lzx->offset = 0; |
327 | 32.7k | lzx->length = output_length; |
328 | | |
329 | 32.7k | lzx->inbuf_size = input_buffer_size; |
330 | 32.7k | lzx->window_size = 1 << window_bits; |
331 | 32.7k | lzx->ref_data_size = 0; |
332 | 32.7k | lzx->window_posn = 0; |
333 | 32.7k | lzx->frame_posn = 0; |
334 | 32.7k | lzx->frame = 0; |
335 | 32.7k | lzx->reset_interval = reset_interval; |
336 | 32.7k | lzx->intel_filesize = 0; |
337 | 32.7k | lzx->intel_started = 0; |
338 | 32.7k | lzx->error = MSPACK_ERR_OK; |
339 | 32.7k | lzx->num_offsets = position_slots[window_bits - 15] << 3; |
340 | 32.7k | lzx->is_delta = is_delta; |
341 | | |
342 | 32.7k | lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; |
343 | 32.7k | lzxd_reset_state(lzx); |
344 | 32.7k | INIT_BITS; |
345 | 32.7k | return lzx; |
346 | 32.7k | } |
347 | | |
348 | | int lzxd_set_reference_data(struct lzxd_stream *lzx, |
349 | | struct mspack_system *system, |
350 | | struct mspack_file *input, |
351 | | unsigned int length) |
352 | 0 | { |
353 | 0 | if (!lzx) return MSPACK_ERR_ARGS; |
354 | | |
355 | 0 | if (!lzx->is_delta) { |
356 | 0 | D(("only LZX DELTA streams support reference data")) |
357 | 0 | return MSPACK_ERR_ARGS; |
358 | 0 | } |
359 | 0 | if (lzx->offset) { |
360 | 0 | D(("too late to set reference data after decoding starts")) |
361 | 0 | return MSPACK_ERR_ARGS; |
362 | 0 | } |
363 | 0 | if (length > lzx->window_size) { |
364 | 0 | D(("reference length (%u) is longer than the window", length)) |
365 | 0 | return MSPACK_ERR_ARGS; |
366 | 0 | } |
367 | 0 | if (length > 0 && (!system || !input)) { |
368 | 0 | D(("length > 0 but no system or input")) |
369 | 0 | return MSPACK_ERR_ARGS; |
370 | 0 | } |
371 | | |
372 | 0 | lzx->ref_data_size = length; |
373 | 0 | if (length > 0) { |
374 | | /* copy reference data */ |
375 | 0 | unsigned char *pos = &lzx->window[lzx->window_size - length]; |
376 | 0 | int bytes = system->read(input, pos, length); |
377 | | /* length can't be more than 2^25, so no signedness problem */ |
378 | 0 | if (bytes < (int)length) return MSPACK_ERR_READ; |
379 | 0 | } |
380 | 0 | lzx->ref_data_size = length; |
381 | 0 | return MSPACK_ERR_OK; |
382 | 0 | } |
383 | | |
384 | 28.7k | void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { |
385 | 28.7k | if (lzx && out_bytes > 0) lzx->length = out_bytes; |
386 | 28.7k | } |
387 | | |
388 | 42.5k | int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { |
389 | 42.5k | DECLARE_HUFF_VARS; |
390 | 42.5k | unsigned char *window, *runsrc, *rundest, buf[12], warned = 0; |
391 | 42.5k | unsigned int frame_size, end_frame, window_posn, R0, R1, R2; |
392 | 42.5k | int bytes_todo, this_run, i, j; |
393 | | |
394 | | /* easy answers */ |
395 | 42.5k | if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS; |
396 | 42.5k | if (lzx->error) return lzx->error; |
397 | | |
398 | | /* flush out any stored-up bytes before we begin */ |
399 | 33.9k | i = lzx->o_end - lzx->o_ptr; |
400 | 33.9k | if ((off_t) i > out_bytes) i = (int) out_bytes; |
401 | 33.9k | if (i) { |
402 | 1.59k | if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { |
403 | 0 | return lzx->error = MSPACK_ERR_WRITE; |
404 | 0 | } |
405 | 1.59k | lzx->o_ptr += i; |
406 | 1.59k | lzx->offset += i; |
407 | 1.59k | out_bytes -= i; |
408 | 1.59k | } |
409 | 33.9k | if (out_bytes == 0) return MSPACK_ERR_OK; |
410 | | |
411 | | /* restore local state */ |
412 | 33.2k | RESTORE_BITS; |
413 | 33.2k | window = lzx->window; |
414 | 33.2k | window_posn = lzx->window_posn; |
415 | 33.2k | R0 = lzx->R0; |
416 | 33.2k | R1 = lzx->R1; |
417 | 33.2k | R2 = lzx->R2; |
418 | | |
419 | 33.2k | end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1; |
420 | | |
421 | 23.9M | while (lzx->frame < end_frame) { |
422 | | /* have we reached the reset interval? (if there is one?) */ |
423 | 23.9M | if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { |
424 | 0 | if (lzx->block_remaining) { |
425 | | /* this is a file format error, we can make a best effort to extract what we can */ |
426 | 0 | D(("%d bytes remaining at reset interval", lzx->block_remaining)) |
427 | 0 | if (!warned) { |
428 | 0 | lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression"); |
429 | 0 | warned++; |
430 | 0 | } |
431 | 0 | } |
432 | | |
433 | | /* re-read the intel header and reset the huffman lengths */ |
434 | 0 | lzxd_reset_state(lzx); |
435 | 0 | R0 = lzx->R0; |
436 | 0 | R1 = lzx->R1; |
437 | 0 | R2 = lzx->R2; |
438 | 0 | } |
439 | | |
440 | | /* LZX DELTA format has chunk_size, not present in LZX format */ |
441 | 23.9M | if (lzx->is_delta) { |
442 | 0 | ENSURE_BITS(16); |
443 | 0 | REMOVE_BITS(16); |
444 | 0 | } |
445 | | |
446 | | /* read header if necessary */ |
447 | 23.9M | if (!lzx->header_read) { |
448 | | /* read 1 bit. if bit=0, intel filesize = 0. |
449 | | * if bit=1, read intel filesize (32 bits) */ |
450 | 32.3k | j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); } |
451 | 29.2k | lzx->intel_filesize = (i << 16) | j; |
452 | 29.2k | lzx->header_read = 1; |
453 | 29.2k | } |
454 | | |
455 | | /* calculate size of frame: all frames are 32k except the final frame |
456 | | * which is 32kb or less. this can only be calculated when lzx->length |
457 | | * has been filled in. */ |
458 | 23.9M | frame_size = LZX_FRAME_SIZE; |
459 | 23.9M | if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) { |
460 | 23.9M | frame_size = lzx->length - lzx->offset; |
461 | 23.9M | } |
462 | | |
463 | | /* decode until one more frame is available */ |
464 | 23.9M | bytes_todo = lzx->frame_posn + frame_size - window_posn; |
465 | 23.9M | while (bytes_todo > 0) { |
466 | | /* initialise new block, if one is needed */ |
467 | 42.1k | if (lzx->block_remaining == 0) { |
468 | | /* realign if previous block was an odd-sized UNCOMPRESSED block */ |
469 | 41.9k | if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && |
470 | 41.9k | (lzx->block_length & 1)) |
471 | 12.5k | { |
472 | 12.5k | READ_IF_NEEDED; |
473 | 12.5k | i_ptr++; |
474 | 12.5k | } |
475 | | |
476 | | /* read block type (3 bits) and block length (24 bits) */ |
477 | 41.9k | READ_BITS(lzx->block_type, 3); |
478 | 41.3k | READ_BITS(i, 16); READ_BITS(j, 8); |
479 | 38.8k | lzx->block_remaining = lzx->block_length = (i << 8) | j; |
480 | | /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/ |
481 | | |
482 | | /* read individual block headers */ |
483 | 38.8k | switch (lzx->block_type) { |
484 | 3.99k | case LZX_BLOCKTYPE_ALIGNED: |
485 | | /* read lengths of and build aligned huffman decoding tree */ |
486 | 35.3k | for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } |
487 | 3.90k | BUILD_TABLE(ALIGNED); |
488 | | /* rest of aligned header is same as verbatim */ /*@fallthrough@*/ |
489 | 15.7k | case LZX_BLOCKTYPE_VERBATIM: |
490 | | /* read lengths of and build main huffman decoding tree */ |
491 | 15.7k | READ_LENGTHS(MAINTREE, 0, 256); |
492 | 10.7k | READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets); |
493 | 2.08k | BUILD_TABLE(MAINTREE); |
494 | | /* if the literal 0xE8 is anywhere in the block... */ |
495 | 1.11k | if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; |
496 | | /* read lengths of and build lengths huffman decoding tree */ |
497 | 1.11k | READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); |
498 | 489 | BUILD_TABLE_MAYBE_EMPTY(LENGTH); |
499 | 0 | break; |
500 | | |
501 | 19.8k | case LZX_BLOCKTYPE_UNCOMPRESSED: |
502 | | /* because we can't assume otherwise */ |
503 | 19.8k | lzx->intel_started = 1; |
504 | | |
505 | | /* read 1-16 (not 0-15) bits to align to bytes */ |
506 | 19.8k | if (bits_left == 0) ENSURE_BITS(16); |
507 | 19.8k | bits_left = 0; bit_buffer = 0; |
508 | | |
509 | | /* read 12 bytes of stored R0 / R1 / R2 values */ |
510 | 253k | for (rundest = &buf[0], i = 0; i < 12; i++) { |
511 | 234k | READ_IF_NEEDED; |
512 | 233k | *rundest++ = *i_ptr++; |
513 | 233k | } |
514 | 19.0k | R0 = EndGetI32(&buf[0]); |
515 | 19.0k | R1 = EndGetI32(&buf[4]); |
516 | 19.0k | R2 = EndGetI32(&buf[8]); |
517 | 19.0k | break; |
518 | | |
519 | 977 | default: |
520 | 977 | D(("bad block type")) |
521 | 977 | return lzx->error = MSPACK_ERR_DECRUNCH; |
522 | 38.8k | } |
523 | 38.8k | } |
524 | | |
525 | | /* decode more of the block: |
526 | | * run = min(what's available, what's needed) */ |
527 | 19.2k | this_run = lzx->block_remaining; |
528 | 19.2k | if (this_run > bytes_todo) this_run = bytes_todo; |
529 | | |
530 | | /* assume we decode exactly this_run bytes, for now */ |
531 | 19.2k | bytes_todo -= this_run; |
532 | 19.2k | lzx->block_remaining -= this_run; |
533 | | |
534 | | /* decode at least this_run bytes */ |
535 | 19.2k | switch (lzx->block_type) { |
536 | 0 | case LZX_BLOCKTYPE_ALIGNED: |
537 | 0 | case LZX_BLOCKTYPE_VERBATIM: |
538 | 0 | while (this_run > 0) { |
539 | 0 | int main_element, length_footer, verbatim_bits, aligned_bits, extra; |
540 | 0 | int match_length; |
541 | 0 | unsigned int match_offset; |
542 | 0 | READ_HUFFSYM(MAINTREE, main_element); |
543 | 0 | if (main_element < LZX_NUM_CHARS) { |
544 | | /* literal: 0 to LZX_NUM_CHARS-1 */ |
545 | 0 | window[window_posn++] = main_element; |
546 | 0 | this_run--; |
547 | 0 | } |
548 | 0 | else { |
549 | | /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ |
550 | 0 | main_element -= LZX_NUM_CHARS; |
551 | | |
552 | | /* get match length */ |
553 | 0 | match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; |
554 | 0 | if (match_length == LZX_NUM_PRIMARY_LENGTHS) { |
555 | 0 | if (lzx->LENGTH_empty) { |
556 | 0 | D(("LENGTH symbol needed but tree is empty")) |
557 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
558 | 0 | } |
559 | 0 | READ_HUFFSYM(LENGTH, length_footer); |
560 | 0 | match_length += length_footer; |
561 | 0 | } |
562 | 0 | match_length += LZX_MIN_MATCH; |
563 | | |
564 | | /* get match offset */ |
565 | 0 | switch ((match_offset = (main_element >> 3))) { |
566 | 0 | case 0: match_offset = R0; break; |
567 | 0 | case 1: match_offset = R1; R1=R0; R0 = match_offset; break; |
568 | 0 | case 2: match_offset = R2; R2=R0; R0 = match_offset; break; |
569 | 0 | default: |
570 | 0 | extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; |
571 | 0 | match_offset = position_base[match_offset] - 2; |
572 | 0 | if (extra >= 3 && lzx->block_type == LZX_BLOCKTYPE_ALIGNED) { |
573 | 0 | if (extra > 3) { |
574 | 0 | READ_BITS(verbatim_bits, extra - 3); /* 1-14 bits */ |
575 | 0 | match_offset += verbatim_bits << 3; |
576 | 0 | } |
577 | 0 | READ_HUFFSYM(ALIGNED, aligned_bits); |
578 | 0 | match_offset += aligned_bits; |
579 | 0 | } |
580 | 0 | else if (extra) { |
581 | 0 | READ_BITS(verbatim_bits, extra); /* 1-17 bits */ |
582 | 0 | match_offset += verbatim_bits; |
583 | 0 | } |
584 | | /* update repeated offset LRU queue */ |
585 | 0 | R2 = R1; R1 = R0; R0 = match_offset; |
586 | 0 | } |
587 | | |
588 | | /* LZX DELTA uses max match length to signal even longer match */ |
589 | 0 | if (match_length == LZX_MAX_MATCH && lzx->is_delta) { |
590 | 0 | int extra_len = 0; |
591 | 0 | ENSURE_BITS(3); /* 4 entry huffman tree */ |
592 | 0 | if (PEEK_BITS(1) == 0) { |
593 | 0 | REMOVE_BITS(1); /* '0' -> 8 extra length bits */ |
594 | 0 | READ_BITS(extra_len, 8); |
595 | 0 | } |
596 | 0 | else if (PEEK_BITS(2) == 2) { |
597 | 0 | REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ |
598 | 0 | READ_BITS(extra_len, 10); |
599 | 0 | extra_len += 0x100; |
600 | 0 | } |
601 | 0 | else if (PEEK_BITS(3) == 6) { |
602 | 0 | REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ |
603 | 0 | READ_BITS(extra_len, 12); |
604 | 0 | extra_len += 0x500; |
605 | 0 | } |
606 | 0 | else { |
607 | 0 | REMOVE_BITS(3); /* '111' -> 15 extra length bits */ |
608 | 0 | READ_BITS(extra_len, 15); |
609 | 0 | } |
610 | 0 | match_length += extra_len; |
611 | 0 | } |
612 | | |
613 | 0 | if ((window_posn + match_length) > lzx->window_size) { |
614 | 0 | D(("match ran over window wrap")) |
615 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
616 | 0 | } |
617 | | |
618 | | /* copy match */ |
619 | 0 | rundest = &window[window_posn]; |
620 | 0 | i = match_length; |
621 | | /* does match offset wrap the window? */ |
622 | 0 | if (match_offset > window_posn) { |
623 | 0 | if ((off_t)match_offset > lzx->offset && |
624 | 0 | (match_offset - window_posn) > lzx->ref_data_size) |
625 | 0 | { |
626 | 0 | D(("match offset beyond LZX stream")) |
627 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
628 | 0 | } |
629 | | /* j = length from match offset to end of window */ |
630 | 0 | j = match_offset - window_posn; |
631 | 0 | if (j > (int) lzx->window_size) { |
632 | 0 | D(("match offset beyond window boundaries")) |
633 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
634 | 0 | } |
635 | 0 | runsrc = &window[lzx->window_size - j]; |
636 | 0 | if (j < i) { |
637 | | /* if match goes over the window edge, do two copy runs */ |
638 | 0 | i -= j; while (j-- > 0) *rundest++ = *runsrc++; |
639 | 0 | runsrc = window; |
640 | 0 | } |
641 | 0 | while (i-- > 0) *rundest++ = *runsrc++; |
642 | 0 | } |
643 | 0 | else { |
644 | 0 | runsrc = rundest - match_offset; |
645 | 0 | while (i-- > 0) *rundest++ = *runsrc++; |
646 | 0 | } |
647 | | |
648 | 0 | this_run -= match_length; |
649 | 0 | window_posn += match_length; |
650 | 0 | } |
651 | 0 | } /* while (this_run > 0) */ |
652 | 0 | break; |
653 | | |
654 | 19.2k | case LZX_BLOCKTYPE_UNCOMPRESSED: |
655 | | /* as this_run is limited not to wrap a frame, this also means it |
656 | | * won't wrap the window (as the window is a multiple of 32k) */ |
657 | 19.2k | rundest = &window[window_posn]; |
658 | 19.2k | window_posn += this_run; |
659 | 45.3k | while (this_run > 0) { |
660 | 27.0k | if ((i = i_end - i_ptr) == 0) { |
661 | 4.60k | READ_IF_NEEDED; |
662 | 4.60k | } |
663 | 22.4k | else { |
664 | 22.4k | if (i > this_run) i = this_run; |
665 | 22.4k | lzx->sys->copy(i_ptr, rundest, i); |
666 | 22.4k | rundest += i; |
667 | 22.4k | i_ptr += i; |
668 | 22.4k | this_run -= i; |
669 | 22.4k | } |
670 | 27.0k | } |
671 | 18.2k | break; |
672 | | |
673 | 18.2k | default: |
674 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */ |
675 | 19.2k | } |
676 | | |
677 | | /* did the final match overrun our desired this_run length? */ |
678 | 18.2k | if (this_run < 0) { |
679 | 0 | if ((unsigned int)(-this_run) > lzx->block_remaining) { |
680 | 0 | D(("overrun went past end of block by %d (%d remaining)", |
681 | 0 | -this_run, lzx->block_remaining )) |
682 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
683 | 0 | } |
684 | 0 | lzx->block_remaining -= -this_run; |
685 | 0 | } |
686 | 18.2k | } /* while (bytes_todo > 0) */ |
687 | | |
688 | | /* streams don't extend over frame boundaries */ |
689 | 23.9M | if ((window_posn - lzx->frame_posn) != frame_size) { |
690 | 0 | D(("decode beyond output frame limits! %d != %d", |
691 | 0 | window_posn - lzx->frame_posn, frame_size)) |
692 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
693 | 0 | } |
694 | | |
695 | | /* re-align input bitstream */ |
696 | 23.9M | if (bits_left > 0) ENSURE_BITS(16); |
697 | 23.9M | if (bits_left & 15) REMOVE_BITS(bits_left & 15); |
698 | | |
699 | | /* check that we've used all of the previous frame first */ |
700 | 23.9M | if (lzx->o_ptr != lzx->o_end) { |
701 | 0 | D(("%ld avail bytes, new %d frame", |
702 | 0 | (long)(lzx->o_end - lzx->o_ptr), frame_size)) |
703 | 0 | return lzx->error = MSPACK_ERR_DECRUNCH; |
704 | 0 | } |
705 | | |
706 | | /* does this intel block _really_ need decoding? */ |
707 | 23.9M | if (lzx->intel_started && lzx->intel_filesize && |
708 | 23.9M | (lzx->frame < 32768) && (frame_size > 10)) |
709 | 5.18k | { |
710 | 5.18k | unsigned char *data = &lzx->e8_buf[0]; |
711 | 5.18k | unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; |
712 | 5.18k | signed int curpos = (int) lzx->offset; |
713 | 5.18k | signed int filesize = lzx->intel_filesize; |
714 | 5.18k | signed int abs_off, rel_off; |
715 | | |
716 | | /* copy e8 block to the e8 buffer and tweak if needed */ |
717 | 5.18k | lzx->o_ptr = data; |
718 | 5.18k | lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size); |
719 | | |
720 | 5.98M | while (data < dataend) { |
721 | 5.98M | if (*data++ != 0xE8) { curpos++; continue; } |
722 | 6.70k | abs_off = EndGetI32(data); |
723 | 6.70k | if ((abs_off >= -curpos) && (abs_off < filesize)) { |
724 | 5.46k | rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; |
725 | 5.46k | data[0] = (unsigned char) rel_off; |
726 | 5.46k | data[1] = (unsigned char) (rel_off >> 8); |
727 | 5.46k | data[2] = (unsigned char) (rel_off >> 16); |
728 | 5.46k | data[3] = (unsigned char) (rel_off >> 24); |
729 | 5.46k | } |
730 | 6.70k | data += 4; |
731 | 6.70k | curpos += 5; |
732 | 6.70k | } |
733 | 5.18k | } |
734 | 23.9M | else { |
735 | 23.9M | lzx->o_ptr = &lzx->window[lzx->frame_posn]; |
736 | 23.9M | } |
737 | 23.9M | lzx->o_end = &lzx->o_ptr[frame_size]; |
738 | | |
739 | | /* write a frame */ |
740 | 23.9M | i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size; |
741 | 23.9M | if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { |
742 | 0 | return lzx->error = MSPACK_ERR_WRITE; |
743 | 0 | } |
744 | 23.9M | lzx->o_ptr += i; |
745 | 23.9M | lzx->offset += i; |
746 | 23.9M | out_bytes -= i; |
747 | | |
748 | | /* advance frame start position */ |
749 | 23.9M | lzx->frame_posn += frame_size; |
750 | 23.9M | lzx->frame++; |
751 | | |
752 | | /* wrap window / frame position pointers */ |
753 | 23.9M | if (window_posn == lzx->window_size) window_posn = 0; |
754 | 23.9M | if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0; |
755 | | |
756 | 23.9M | } /* while (lzx->frame < end_frame) */ |
757 | | |
758 | 6.18k | if (out_bytes) { |
759 | 4.55k | D(("bytes left to output")) |
760 | 4.55k | return lzx->error = MSPACK_ERR_DECRUNCH; |
761 | 4.55k | } |
762 | | |
763 | | /* store local state */ |
764 | 1.62k | STORE_BITS; |
765 | 1.62k | lzx->window_posn = window_posn; |
766 | 1.62k | lzx->R0 = R0; |
767 | 1.62k | lzx->R1 = R1; |
768 | 1.62k | lzx->R2 = R2; |
769 | | |
770 | 1.62k | return MSPACK_ERR_OK; |
771 | 6.18k | } |
772 | | |
773 | 32.7k | void lzxd_free(struct lzxd_stream *lzx) { |
774 | 32.7k | struct mspack_system *sys; |
775 | 32.7k | if (lzx) { |
776 | 32.7k | sys = lzx->sys; |
777 | 32.7k | sys->free(lzx->inbuf); |
778 | 32.7k | sys->free(lzx->window); |
779 | 32.7k | sys->free(lzx); |
780 | 32.7k | } |
781 | 32.7k | } |