Coverage Report

Created: 2025-12-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wireshark/epan/tvbuff_lz77huff.c
Line
Count
Source
1
/*
2
 * Decompression code for LZ77+Huffman. This encoding is used by
3
 * Microsoft in various file formats and protocols including SMB3.
4
 *
5
 * See MS-XCA.
6
 *
7
 * Initial code from Samba re-licensed with Samuel's permission.
8
 * Copyright (C) Samuel Cabrero 2017
9
 *
10
 * Glib-ification, extra error-checking and WS integration
11
 * Copyright (C) Aurélien Aptel 2019
12
 *
13
 * Wireshark - Network traffic analyzer
14
 * By Gerald Combs <gerald@wireshark.org>
15
 * Copyright 1998 Gerald Combs
16
 *
17
 * SPDX-License-Identifier: GPL-2.0-or-later
18
 */
19
20
#include <glib.h>
21
#include <stdlib.h> /* qsort */
22
#include <epan/exceptions.h>
23
#include <epan/tvbuff.h>
24
#include <epan/wmem_scopes.h>
25
26
0
#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
27
28
0
#define TREE_SIZE 1024
29
0
#define ENCODED_TREE_SIZE 256
30
0
#define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
31
32
struct input {
33
  tvbuff_t *tvb;
34
  int offset;
35
  size_t size;
36
};
37
38
/**
39
 * Represents a node in a Huffman prefix code tree
40
 */
41
struct prefix_code_node {
42
  /* Stores the symbol encoded by this node in the prefix code tree */
43
  uint16_t symbol;
44
45
  /* Indicates whether this node is a leaf in the tree */
46
  uint8_t leaf;
47
48
  /*
49
   * Points to the node's two children. Values are indexes in
50
   * the tree node array. The value -1 is used to indicate that
51
   * a particular child does not exist
52
   */
53
  int16_t child[2];
54
};
55
56
/**
57
 * Represent information about a Huffman-encoded symbol
58
 */
59
struct prefix_code_symbol {
60
  /* Stores the symbol */
61
  uint16_t symbol;
62
63
  /* Stores the symbol’s Huffman prefix code length */
64
  uint16_t length;
65
};
66
67
/**
68
 * Represent a byte array as a bit string from which individual bits can
69
 * be read
70
 */
71
struct bitstring {
72
  /* The byte array */
73
  const struct input *input;
74
75
  /* The index in source from which the next set of bits will be pulled
76
         * when the bits in mask have been consumed */
77
  uint32_t bitstring_index;
78
79
  /* Stores the next bits to be consumed in the bit string */
80
  uint32_t mask;
81
82
  /* Stores the number of bits in mask that remain to be consumed */
83
  int32_t bits;
84
};
85
86
struct hf_tree {
87
  struct prefix_code_node *root;
88
  struct prefix_code_node nodes[TREE_SIZE];
89
};
90
91
static bool is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
92
0
{
93
0
        return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
94
0
}
95
96
/**
97
 * Links a symbol's prefix_code_node into its correct position in a Huffman
98
 * prefix code tree
99
 */
100
static int prefix_code_tree_add_leaf(struct hf_tree *tree,
101
             uint32_t leaf_index,
102
             uint32_t mask,
103
             uint32_t bits,
104
             uint32_t *out_index)
105
0
{
106
0
  struct prefix_code_node *node = &tree->nodes[0];
107
0
  uint32_t i = leaf_index + 1;
108
0
  uint32_t child_index;
109
110
0
  if (leaf_index >= TREE_SIZE)
111
0
    return -1;
112
113
0
  while (bits > 1) {
114
0
    bits = bits - 1;
115
0
    child_index = (mask >> bits) & 1;
116
0
    if (node->child[child_index] < 0) {
117
0
      if (i >= TREE_SIZE)
118
0
        return -1;
119
0
      node->child[child_index] = i;
120
0
      tree->nodes[i].leaf = false;
121
0
      i = i + 1;
122
0
    }
123
0
    node = tree->nodes + node->child[child_index];
124
0
    if (!is_node_valid(tree, node))
125
0
      return -1;
126
0
  }
127
128
0
  node->child[mask & 1] = leaf_index;
129
130
0
  *out_index = i;
131
0
  return 0;
132
0
}
133
134
/**
135
 * Determines the sort order of one prefix_code_symbol relative to another
136
 */
137
static int compare_symbols(const void *ve1, const void *ve2)
138
0
{
139
0
  const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
140
0
  const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
141
142
0
  if (e1->length < e2->length)
143
0
    return -1;
144
0
  else if (e1->length > e2->length)
145
0
    return 1;
146
0
  else if (e1->symbol < e2->symbol)
147
0
    return -1;
148
0
  else if (e1->symbol > e2->symbol)
149
0
    return 1;
150
0
  else
151
0
    return 0;
152
0
}
153
154
/**
155
 * Rebuilds the Huffman prefix code tree that will be used to decode symbols
156
 * during decompression
157
 */
158
static int PrefixCodeTreeRebuild( struct hf_tree *tree,
159
         const struct input *input)
160
0
{
161
0
  struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
162
0
  uint32_t i, j, mask, bits;
163
0
  int rc;
164
165
0
  for (i = 0; i < TREE_SIZE; i++) {
166
0
    tree->nodes[i].symbol = 0;
167
0
    tree->nodes[i].leaf = false;
168
0
    tree->nodes[i].child[0] = -1;
169
0
    tree->nodes[i].child[1] = -1;
170
0
  }
171
172
0
  if (input->size < ENCODED_TREE_SIZE)
173
0
    return -1;
174
175
0
  for (i = 0; i < ENCODED_TREE_SIZE; i++) {
176
0
    symbolInfo[2*i].symbol = 2*i;
177
0
    symbolInfo[2*i].length = tvb_get_uint8(input->tvb, input->offset+i) & 15;
178
0
    symbolInfo[2*i+1].symbol = 2*i+1;
179
0
    symbolInfo[2*i+1].length = tvb_get_uint8(input->tvb, input->offset+i) >> 4;
180
0
  }
181
182
0
  qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
183
184
0
  i = 0;
185
0
  while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
186
0
    i = i + 1;
187
0
  }
188
189
0
  mask = 0;
190
0
  bits = 1;
191
192
0
  tree->root = &tree->nodes[0];
193
0
  tree->root->leaf = false;
194
195
0
  j = 1;
196
0
  for (; i < 512; i++) {
197
    //ws_assert(j < TREE_SIZE);
198
0
    if (j >= TREE_SIZE) {
199
0
      return -1;
200
0
    }
201
0
    tree->nodes[j].symbol = symbolInfo[i].symbol;
202
0
    tree->nodes[j].leaf = true;
203
0
    mask <<= symbolInfo[i].length - bits;
204
0
    bits = symbolInfo[i].length;
205
0
    rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
206
0
    if (rc)
207
0
      return rc;
208
0
    mask += 1;
209
0
  }
210
211
0
  return 0;
212
0
}
213
214
/**
215
 * Initializes a bitstream data structure
216
 */
217
static void bitstring_init(struct bitstring *bstr,
218
         const struct input *input,
219
         uint32_t bitstring_index)
220
0
{
221
0
  bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
222
0
  bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
223
0
  bitstring_index += 2;
224
225
0
  bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
226
0
  bitstring_index += 2;
227
228
0
  bstr->bits = 32;
229
0
  bstr->input = input;
230
0
  bstr->bitstring_index = bitstring_index;
231
0
}
232
233
/**
234
 * Returns the next n bits from the front of a bit string.
235
 */
236
static uint32_t bitstring_lookup(struct bitstring *bstr, uint32_t n)
237
0
{
238
0
  if (n == 0 || bstr->bits < 0 || n > (uint32_t)bstr->bits) {
239
0
    return 0;
240
0
  }
241
0
  return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
242
0
}
243
244
/**
245
 * Advances the bit string's cursor by n bits.
246
 */
247
static void bitstring_skip(struct bitstring *bstr, uint32_t n)
248
0
{
249
0
  bstr->mask = bstr->mask << n;
250
0
  bstr->bits = bstr->bits - n;
251
252
0
  if (bstr->bits < 16) {
253
0
    bstr->mask += tvb_get_letohs(bstr->input->tvb,
254
0
               bstr->input->offset + bstr->bitstring_index)
255
0
      << (16 - bstr->bits);
256
0
    bstr->bitstring_index = bstr->bitstring_index + 2;
257
0
    bstr->bits = bstr->bits + 16;
258
0
  }
259
0
}
260
261
/**
262
 * Returns the symbol encoded by the next prefix code in a bit string.
263
 */
264
static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
265
            struct bitstring *bstr,
266
            uint32_t *out_symbol)
267
0
{
268
0
  uint32_t bit;
269
0
  struct prefix_code_node *node = tree->root;
270
271
0
  do {
272
0
    bit = bitstring_lookup(bstr, 1);
273
0
    bitstring_skip(bstr, 1);
274
0
    node = tree->nodes + node->child[bit];
275
0
    if (!is_node_valid(tree, node))
276
0
      return -1;
277
0
  } while (node->leaf == false);
278
279
0
  *out_symbol = node->symbol;
280
0
  return 0;
281
0
}
282
283
static bool do_uncompress(struct input *input,
284
            wmem_array_t *obuf)
285
0
{
286
0
  uint32_t symbol;
287
0
  uint32_t length;
288
0
  int32_t match_offset;
289
0
  int rc;
290
0
  struct hf_tree tree = {0};
291
0
  struct bitstring bstr = {0};
292
293
0
  if (!input->tvb)
294
0
    return false;
295
296
0
  if (!input->size || input->size > MAX_INPUT_SIZE)
297
0
    return false;
298
299
0
  rc = PrefixCodeTreeRebuild(&tree, input);
300
0
  if (rc)
301
0
    return false;
302
303
0
  bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
304
305
0
  while (1) {
306
0
    rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
307
0
    if (rc < 0)
308
0
      return false;
309
310
0
    if (symbol < 256) {
311
0
      uint8_t v = symbol & 0xFF;
312
0
      wmem_array_append_one(obuf, v);
313
0
    } else {
314
0
      if (symbol == 256) {
315
        /* EOF symbol and everything consumed */
316
0
        if (bstr.bitstring_index == bstr.input->size) {
317
0
          return true;
318
0
        }
319
0
      }
320
0
      symbol = symbol - 256;
321
0
      length = symbol & 0xF;
322
0
      symbol = symbol >> 4;
323
324
0
      match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
325
0
      match_offset *= -1;
326
327
0
      if (length == 15) {
328
0
        if (bstr.bitstring_index >= bstr.input->size)
329
0
          return false;
330
0
        length = tvb_get_uint8(bstr.input->tvb,
331
0
              bstr.input->offset+bstr.bitstring_index) + 15;
332
0
        bstr.bitstring_index += 1;
333
334
0
        if (length == 270) {
335
0
          if (bstr.bitstring_index+1 >= bstr.input->size)
336
0
            return false;
337
0
          length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
338
0
          bstr.bitstring_index += 2;
339
0
        }
340
0
      }
341
342
0
      bitstring_skip(&bstr, symbol);
343
344
0
      length += 3;
345
0
      do {
346
0
        uint8_t byte;
347
0
        unsigned elem_count = wmem_array_get_count(obuf)+match_offset;
348
349
0
        if (wmem_array_try_index(obuf, elem_count, &byte))
350
0
          return false;
351
0
        wmem_array_append_one(obuf, byte);
352
0
        length--;
353
0
      } while (length != 0);
354
0
    }
355
0
  }
356
0
  return true;
357
0
}
358
359
tvbuff_t *
360
tvb_uncompress_lz77huff(tvbuff_t *tvb,
361
      const unsigned offset,
362
      unsigned input_size)
363
0
{
364
0
  volatile bool ok;
365
0
  wmem_allocator_t *pool;
366
0
  wmem_array_t *obuf;
367
0
  tvbuff_t *out;
368
0
  struct input input = {
369
0
            .tvb = tvb,
370
0
            .offset = offset,
371
0
            .size = input_size
372
0
  };
373
374
0
  pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
375
0
  obuf = wmem_array_sized_new(pool, 1, input_size*2);
376
377
0
  TRY {
378
0
    ok = do_uncompress(&input, obuf);
379
0
  } CATCH_ALL {
380
0
    ok = false;
381
0
  }
382
0
  ENDTRY;
383
384
0
  if (ok) {
385
    /*
386
     * Cannot pass a tvb free callback that frees the wmem
387
     * pool, so we make an extra copy that uses bare
388
     * pointers. This could be optimized if tvb API had a
389
     * free pool callback of some sort.
390
     */
391
0
    unsigned size = wmem_array_get_count(obuf);
392
0
    uint8_t *p = (uint8_t *)g_malloc(size);
393
0
    memcpy(p, wmem_array_get_raw(obuf), size);
394
0
    out = tvb_new_real_data(p, size, size);
395
0
    tvb_set_free_cb(out, g_free);
396
0
  } else {
397
0
    out = NULL;
398
0
  }
399
400
0
  wmem_destroy_allocator(pool);
401
402
0
  return out;
403
0
}
404
405
tvbuff_t *
406
tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const unsigned offset, unsigned in_size)
407
0
{
408
0
  tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
409
0
  if (new_tvb)
410
0
    tvb_set_child_real_data_tvbuff(parent, new_tvb);
411
0
  return new_tvb;
412
0
}
413
414
/*
415
 * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
416
 *
417
 * Local variables:
418
 * c-basic-offset: 8
419
 * tab-width: 8
420
 * indent-tabs-mode: t
421
 * End:
422
 *
423
 * vi: set shiftwidth=8 tabstop=8 noexpandtab:
424
 * :indentSize=8:tabSize=8:noTabs=false:
425
 */