/src/wireshark/epan/tvbuff_lz77huff.c
Line | Count | Source |
1 | | /* |
2 | | * Decompression code for LZ77+Huffman. This encoding is used by |
3 | | * Microsoft in various file formats and protocols including SMB3. |
4 | | * |
5 | | * See MS-XCA. |
6 | | * |
7 | | * Initial code from Samba re-licensed with Samuel's permission. |
8 | | * Copyright (C) Samuel Cabrero 2017 |
9 | | * |
10 | | * Glib-ification, extra error-checking and WS integration |
11 | | * Copyright (C) Aurélien Aptel 2019 |
12 | | * |
13 | | * Wireshark - Network traffic analyzer |
14 | | * By Gerald Combs <gerald@wireshark.org> |
15 | | * Copyright 1998 Gerald Combs |
16 | | * |
17 | | * SPDX-License-Identifier: GPL-2.0-or-later |
18 | | */ |
19 | | |
20 | | #include <glib.h> |
21 | | #include <stdlib.h> /* qsort */ |
22 | | #include <epan/exceptions.h> |
23 | | #include <epan/tvbuff.h> |
24 | | #include <epan/wmem_scopes.h> |
25 | | |
26 | 0 | #define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */ |
27 | | |
28 | 0 | #define TREE_SIZE 1024 |
29 | 0 | #define ENCODED_TREE_SIZE 256 |
30 | 0 | #define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE) |
31 | | |
32 | | struct input { |
33 | | tvbuff_t *tvb; |
34 | | int offset; |
35 | | size_t size; |
36 | | }; |
37 | | |
38 | | /** |
39 | | * Represents a node in a Huffman prefix code tree |
40 | | */ |
41 | | struct prefix_code_node { |
42 | | /* Stores the symbol encoded by this node in the prefix code tree */ |
43 | | uint16_t symbol; |
44 | | |
45 | | /* Indicates whether this node is a leaf in the tree */ |
46 | | uint8_t leaf; |
47 | | |
48 | | /* |
49 | | * Points to the node's two children. Values are indexes in |
50 | | * the tree node array. The value -1 is used to indicate that |
51 | | * a particular child does not exist |
52 | | */ |
53 | | int16_t child[2]; |
54 | | }; |
55 | | |
56 | | /** |
57 | | * Represent information about a Huffman-encoded symbol |
58 | | */ |
59 | | struct prefix_code_symbol { |
60 | | /* Stores the symbol */ |
61 | | uint16_t symbol; |
62 | | |
63 | | /* Stores the symbol’s Huffman prefix code length */ |
64 | | uint16_t length; |
65 | | }; |
66 | | |
67 | | /** |
68 | | * Represent a byte array as a bit string from which individual bits can |
69 | | * be read |
70 | | */ |
71 | | struct bitstring { |
72 | | /* The byte array */ |
73 | | const struct input *input; |
74 | | |
75 | | /* The index in source from which the next set of bits will be pulled |
76 | | * when the bits in mask have been consumed */ |
77 | | uint32_t bitstring_index; |
78 | | |
79 | | /* Stores the next bits to be consumed in the bit string */ |
80 | | uint32_t mask; |
81 | | |
82 | | /* Stores the number of bits in mask that remain to be consumed */ |
83 | | int32_t bits; |
84 | | }; |
85 | | |
86 | | struct hf_tree { |
87 | | struct prefix_code_node *root; |
88 | | struct prefix_code_node nodes[TREE_SIZE]; |
89 | | }; |
90 | | |
91 | | static bool is_node_valid(struct hf_tree *tree, struct prefix_code_node *node) |
92 | 0 | { |
93 | 0 | return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE); |
94 | 0 | } |
95 | | |
96 | | /** |
97 | | * Links a symbol's prefix_code_node into its correct position in a Huffman |
98 | | * prefix code tree |
99 | | */ |
100 | | static int prefix_code_tree_add_leaf(struct hf_tree *tree, |
101 | | uint32_t leaf_index, |
102 | | uint32_t mask, |
103 | | uint32_t bits, |
104 | | uint32_t *out_index) |
105 | 0 | { |
106 | 0 | struct prefix_code_node *node = &tree->nodes[0]; |
107 | 0 | uint32_t i = leaf_index + 1; |
108 | 0 | uint32_t child_index; |
109 | |
|
110 | 0 | if (leaf_index >= TREE_SIZE) |
111 | 0 | return -1; |
112 | | |
113 | 0 | while (bits > 1) { |
114 | 0 | bits = bits - 1; |
115 | 0 | child_index = (mask >> bits) & 1; |
116 | 0 | if (node->child[child_index] < 0) { |
117 | 0 | if (i >= TREE_SIZE) |
118 | 0 | return -1; |
119 | 0 | node->child[child_index] = i; |
120 | 0 | tree->nodes[i].leaf = false; |
121 | 0 | i = i + 1; |
122 | 0 | } |
123 | 0 | node = tree->nodes + node->child[child_index]; |
124 | 0 | if (!is_node_valid(tree, node)) |
125 | 0 | return -1; |
126 | 0 | } |
127 | | |
128 | 0 | node->child[mask & 1] = leaf_index; |
129 | |
|
130 | 0 | *out_index = i; |
131 | 0 | return 0; |
132 | 0 | } |
133 | | |
134 | | /** |
135 | | * Determines the sort order of one prefix_code_symbol relative to another |
136 | | */ |
137 | | static int compare_symbols(const void *ve1, const void *ve2) |
138 | 0 | { |
139 | 0 | const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1; |
140 | 0 | const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2; |
141 | |
|
142 | 0 | if (e1->length < e2->length) |
143 | 0 | return -1; |
144 | 0 | else if (e1->length > e2->length) |
145 | 0 | return 1; |
146 | 0 | else if (e1->symbol < e2->symbol) |
147 | 0 | return -1; |
148 | 0 | else if (e1->symbol > e2->symbol) |
149 | 0 | return 1; |
150 | 0 | else |
151 | 0 | return 0; |
152 | 0 | } |
153 | | |
154 | | /** |
155 | | * Rebuilds the Huffman prefix code tree that will be used to decode symbols |
156 | | * during decompression |
157 | | */ |
158 | | static int PrefixCodeTreeRebuild( struct hf_tree *tree, |
159 | | const struct input *input) |
160 | 0 | { |
161 | 0 | struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE]; |
162 | 0 | uint32_t i, j, mask, bits; |
163 | 0 | int rc; |
164 | |
|
165 | 0 | for (i = 0; i < TREE_SIZE; i++) { |
166 | 0 | tree->nodes[i].symbol = 0; |
167 | 0 | tree->nodes[i].leaf = false; |
168 | 0 | tree->nodes[i].child[0] = -1; |
169 | 0 | tree->nodes[i].child[1] = -1; |
170 | 0 | } |
171 | |
|
172 | 0 | if (input->size < ENCODED_TREE_SIZE) |
173 | 0 | return -1; |
174 | | |
175 | 0 | for (i = 0; i < ENCODED_TREE_SIZE; i++) { |
176 | 0 | symbolInfo[2*i].symbol = 2*i; |
177 | 0 | symbolInfo[2*i].length = tvb_get_uint8(input->tvb, input->offset+i) & 15; |
178 | 0 | symbolInfo[2*i+1].symbol = 2*i+1; |
179 | 0 | symbolInfo[2*i+1].length = tvb_get_uint8(input->tvb, input->offset+i) >> 4; |
180 | 0 | } |
181 | |
|
182 | 0 | qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols); |
183 | |
|
184 | 0 | i = 0; |
185 | 0 | while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) { |
186 | 0 | i = i + 1; |
187 | 0 | } |
188 | |
|
189 | 0 | mask = 0; |
190 | 0 | bits = 1; |
191 | |
|
192 | 0 | tree->root = &tree->nodes[0]; |
193 | 0 | tree->root->leaf = false; |
194 | |
|
195 | 0 | j = 1; |
196 | 0 | for (; i < 512; i++) { |
197 | | //ws_assert(j < TREE_SIZE); |
198 | 0 | if (j >= TREE_SIZE) { |
199 | 0 | return -1; |
200 | 0 | } |
201 | 0 | tree->nodes[j].symbol = symbolInfo[i].symbol; |
202 | 0 | tree->nodes[j].leaf = true; |
203 | 0 | mask <<= symbolInfo[i].length - bits; |
204 | 0 | bits = symbolInfo[i].length; |
205 | 0 | rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j); |
206 | 0 | if (rc) |
207 | 0 | return rc; |
208 | 0 | mask += 1; |
209 | 0 | } |
210 | | |
211 | 0 | return 0; |
212 | 0 | } |
213 | | |
214 | | /** |
215 | | * Initializes a bitstream data structure |
216 | | */ |
217 | | static void bitstring_init(struct bitstring *bstr, |
218 | | const struct input *input, |
219 | | uint32_t bitstring_index) |
220 | 0 | { |
221 | 0 | bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index); |
222 | 0 | bstr->mask <<= sizeof(bstr->mask) * 8 - 16; |
223 | 0 | bitstring_index += 2; |
224 | |
|
225 | 0 | bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index); |
226 | 0 | bitstring_index += 2; |
227 | |
|
228 | 0 | bstr->bits = 32; |
229 | 0 | bstr->input = input; |
230 | 0 | bstr->bitstring_index = bitstring_index; |
231 | 0 | } |
232 | | |
233 | | /** |
234 | | * Returns the next n bits from the front of a bit string. |
235 | | */ |
236 | | static uint32_t bitstring_lookup(struct bitstring *bstr, uint32_t n) |
237 | 0 | { |
238 | 0 | if (n == 0 || bstr->bits < 0 || n > (uint32_t)bstr->bits) { |
239 | 0 | return 0; |
240 | 0 | } |
241 | 0 | return bstr->mask >> (sizeof(bstr->mask) * 8 - n); |
242 | 0 | } |
243 | | |
244 | | /** |
245 | | * Advances the bit string's cursor by n bits. |
246 | | */ |
247 | | static void bitstring_skip(struct bitstring *bstr, uint32_t n) |
248 | 0 | { |
249 | 0 | bstr->mask = bstr->mask << n; |
250 | 0 | bstr->bits = bstr->bits - n; |
251 | |
|
252 | 0 | if (bstr->bits < 16) { |
253 | 0 | bstr->mask += tvb_get_letohs(bstr->input->tvb, |
254 | 0 | bstr->input->offset + bstr->bitstring_index) |
255 | 0 | << (16 - bstr->bits); |
256 | 0 | bstr->bitstring_index = bstr->bitstring_index + 2; |
257 | 0 | bstr->bits = bstr->bits + 16; |
258 | 0 | } |
259 | 0 | } |
260 | | |
261 | | /** |
262 | | * Returns the symbol encoded by the next prefix code in a bit string. |
263 | | */ |
264 | | static int prefix_code_tree_decode_symbol(struct hf_tree *tree, |
265 | | struct bitstring *bstr, |
266 | | uint32_t *out_symbol) |
267 | 0 | { |
268 | 0 | uint32_t bit; |
269 | 0 | struct prefix_code_node *node = tree->root; |
270 | |
|
271 | 0 | do { |
272 | 0 | bit = bitstring_lookup(bstr, 1); |
273 | 0 | bitstring_skip(bstr, 1); |
274 | 0 | node = tree->nodes + node->child[bit]; |
275 | 0 | if (!is_node_valid(tree, node)) |
276 | 0 | return -1; |
277 | 0 | } while (node->leaf == false); |
278 | | |
279 | 0 | *out_symbol = node->symbol; |
280 | 0 | return 0; |
281 | 0 | } |
282 | | |
283 | | static bool do_uncompress(struct input *input, |
284 | | wmem_array_t *obuf) |
285 | 0 | { |
286 | 0 | uint32_t symbol; |
287 | 0 | uint32_t length; |
288 | 0 | int32_t match_offset; |
289 | 0 | int rc; |
290 | 0 | struct hf_tree tree = {0}; |
291 | 0 | struct bitstring bstr = {0}; |
292 | |
|
293 | 0 | if (!input->tvb) |
294 | 0 | return false; |
295 | | |
296 | 0 | if (!input->size || input->size > MAX_INPUT_SIZE) |
297 | 0 | return false; |
298 | | |
299 | 0 | rc = PrefixCodeTreeRebuild(&tree, input); |
300 | 0 | if (rc) |
301 | 0 | return false; |
302 | | |
303 | 0 | bitstring_init(&bstr, input, ENCODED_TREE_SIZE); |
304 | |
|
305 | 0 | while (1) { |
306 | 0 | rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol); |
307 | 0 | if (rc < 0) |
308 | 0 | return false; |
309 | | |
310 | 0 | if (symbol < 256) { |
311 | 0 | uint8_t v = symbol & 0xFF; |
312 | 0 | wmem_array_append_one(obuf, v); |
313 | 0 | } else { |
314 | 0 | if (symbol == 256) { |
315 | | /* EOF symbol and everything consumed */ |
316 | 0 | if (bstr.bitstring_index == bstr.input->size) { |
317 | 0 | return true; |
318 | 0 | } |
319 | 0 | } |
320 | 0 | symbol = symbol - 256; |
321 | 0 | length = symbol & 0xF; |
322 | 0 | symbol = symbol >> 4; |
323 | |
|
324 | 0 | match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol); |
325 | 0 | match_offset *= -1; |
326 | |
|
327 | 0 | if (length == 15) { |
328 | 0 | if (bstr.bitstring_index >= bstr.input->size) |
329 | 0 | return false; |
330 | 0 | length = tvb_get_uint8(bstr.input->tvb, |
331 | 0 | bstr.input->offset+bstr.bitstring_index) + 15; |
332 | 0 | bstr.bitstring_index += 1; |
333 | |
|
334 | 0 | if (length == 270) { |
335 | 0 | if (bstr.bitstring_index+1 >= bstr.input->size) |
336 | 0 | return false; |
337 | 0 | length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index); |
338 | 0 | bstr.bitstring_index += 2; |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | 0 | bitstring_skip(&bstr, symbol); |
343 | |
|
344 | 0 | length += 3; |
345 | 0 | do { |
346 | 0 | uint8_t byte; |
347 | 0 | unsigned elem_count = wmem_array_get_count(obuf)+match_offset; |
348 | |
|
349 | 0 | if (wmem_array_try_index(obuf, elem_count, &byte)) |
350 | 0 | return false; |
351 | 0 | wmem_array_append_one(obuf, byte); |
352 | 0 | length--; |
353 | 0 | } while (length != 0); |
354 | 0 | } |
355 | 0 | } |
356 | 0 | return true; |
357 | 0 | } |
358 | | |
359 | | tvbuff_t * |
360 | | tvb_uncompress_lz77huff(tvbuff_t *tvb, |
361 | | const unsigned offset, |
362 | | unsigned input_size) |
363 | 0 | { |
364 | 0 | volatile bool ok; |
365 | 0 | wmem_allocator_t *pool; |
366 | 0 | wmem_array_t *obuf; |
367 | 0 | tvbuff_t *out; |
368 | 0 | struct input input = { |
369 | 0 | .tvb = tvb, |
370 | 0 | .offset = offset, |
371 | 0 | .size = input_size |
372 | 0 | }; |
373 | |
|
374 | 0 | pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE); |
375 | 0 | obuf = wmem_array_sized_new(pool, 1, input_size*2); |
376 | |
|
377 | 0 | TRY { |
378 | 0 | ok = do_uncompress(&input, obuf); |
379 | 0 | } CATCH_ALL { |
380 | 0 | ok = false; |
381 | 0 | } |
382 | 0 | ENDTRY; |
383 | |
|
384 | 0 | if (ok) { |
385 | | /* |
386 | | * Cannot pass a tvb free callback that frees the wmem |
387 | | * pool, so we make an extra copy that uses bare |
388 | | * pointers. This could be optimized if tvb API had a |
389 | | * free pool callback of some sort. |
390 | | */ |
391 | 0 | unsigned size = wmem_array_get_count(obuf); |
392 | 0 | uint8_t *p = (uint8_t *)g_malloc(size); |
393 | 0 | memcpy(p, wmem_array_get_raw(obuf), size); |
394 | 0 | out = tvb_new_real_data(p, size, size); |
395 | 0 | tvb_set_free_cb(out, g_free); |
396 | 0 | } else { |
397 | 0 | out = NULL; |
398 | 0 | } |
399 | |
|
400 | 0 | wmem_destroy_allocator(pool); |
401 | |
|
402 | 0 | return out; |
403 | 0 | } |
404 | | |
405 | | tvbuff_t * |
406 | | tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const unsigned offset, unsigned in_size) |
407 | 0 | { |
408 | 0 | tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size); |
409 | 0 | if (new_tvb) |
410 | 0 | tvb_set_child_real_data_tvbuff(parent, new_tvb); |
411 | 0 | return new_tvb; |
412 | 0 | } |
413 | | |
414 | | /* |
415 | | * Editor modelines - https://www.wireshark.org/tools/modelines.html |
416 | | * |
417 | | * Local variables: |
418 | | * c-basic-offset: 8 |
419 | | * tab-width: 8 |
420 | | * indent-tabs-mode: t |
421 | | * End: |
422 | | * |
423 | | * vi: set shiftwidth=8 tabstop=8 noexpandtab: |
424 | | * :indentSize=8:tabSize=8:noTabs=false: |
425 | | */ |