Coverage Report

Created: 2026-05-30 08:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/contrib/contrib-build/libtheora/lib/huffdec.c
Line
Count
Source
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009,2025           *
9
 * by the Xiph.Org Foundation and contributors                      *
10
 * https://www.xiph.org/                                            *
11
 *                                                                  *
12
 ********************************************************************
13
14
  function:
15
16
 ********************************************************************/
17
18
#include <stdlib.h>
19
#include <string.h>
20
#include <ogg/ogg.h>
21
#include "huffdec.h"
22
#include "decint.h"
23
24
25
26
/*Instead of storing every branching in the tree, subtrees can be collapsed
27
   into one node, with a table of size 1<<nbits pointing directly to its
28
   descedents nbits levels down.
29
  This allows more than one bit to be read at a time, and avoids following all
30
   the intermediate branches with next to no increased code complexity once
31
   the collapsed tree has been built.
32
  We do _not_ require that a subtree be complete to be collapsed, but instead
33
   store duplicate pointers in the table, and record the actual depth of the
34
   node below its parent.
35
  This tells us the number of bits to advance the stream after reaching it.
36
37
  This turns out to be equivalent to the method described in \cite{Hash95},
38
   without the requirement that codewords be sorted by length.
39
  If the codewords were sorted by length (so-called ``canonical-codes''), they
40
   could be decoded much faster via either Lindell and Moffat's approach or
41
   Hashemian's Condensed Huffman Code approach, the latter of which has an
42
   extremely small memory footprint.
43
  We can't use Choueka et al.'s finite state machine approach, which is
44
   extremely fast, because we can't allow multiple symbols to be output at a
45
   time; the codebook can and does change between symbols.
46
  It also has very large memory requirements, which impairs cache coherency.
47
48
  We store the tree packed in an array of 16-bit integers (words).
49
  Each node consists of a single word, followed consecutively by two or more
50
   indices of its children.
51
  Let n be the value of this first word.
52
  This is the number of bits that need to be read to traverse the node, and
53
   must be positive.
54
  1<<n entries follow in the array, each an index to a child node.
55
  If the child is positive, then it is the index of another internal node in
56
   the table.
57
  If the child is negative or zero, then it is a leaf node.
58
  These are stored directly in the child pointer to save space, since they only
59
   require a single word.
60
  If a leaf node would have been encountered before reading n bits, then it is
61
   duplicated the necessary number of times in this table.
62
  Leaf nodes pack both a token value and their actual depth in the tree.
63
  The token in the leaf node is (-leaf&255).
64
  The number of bits that need to be consumed to reach the leaf, starting from
65
   the current node, is (-leaf>>8).
66
67
  @ARTICLE{Hash95,
68
    author="Reza Hashemian",
69
    title="Memory Efficient and High-Speed Search {Huffman} Coding",
70
    journal="{IEEE} Transactions on Communications",
71
    volume=43,
72
    number=10,
73
    pages="2576--2581",
74
    month=Oct,
75
    year=1995
76
  }*/
77
78
79
80
/*The map from external spec-defined tokens to internal tokens.
81
  This is constructed so that any extra bits read with the original token value
82
   can be masked off the least significant bits of its internal token index.
83
  In addition, all of the tokens which require additional extra bits are placed
84
   at the start of the list, and grouped by type.
85
  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
86
   giving it index 0 may simplify comparisons on some architectures.
87
  These requirements require some substantial reordering.*/
88
static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
89
  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
90
  15,
91
  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
92
  16,
93
  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
94
  17,
95
  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
96
  88,
97
  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
98
  80,
99
  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
100
   1,
101
  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
102
   0,
103
  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
104
  48,
105
  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
106
  14,
107
  /*OC_ONE_TOKEN (0 extra bits)*/
108
  56,
109
  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
110
  57,
111
  /*OC_TWO_TOKEN (0 extra bits)*/
112
  58,
113
  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
114
  59,
115
  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
116
  60,
117
  62,
118
  64,
119
  66,
120
  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
121
  68,
122
  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
123
  72,
124
  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
125
   2,
126
  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
127
   4,
128
  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
129
   6,
130
  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
131
   8,
132
  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
133
  18,
134
  20,
135
  22,
136
  24,
137
  26,
138
  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
139
  32,
140
  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
141
  12,
142
  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
143
  28,
144
  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
145
  40
146
};
147
148
/*The log base 2 of number of internal tokens associated with each of the spec
149
   tokens (i.e., how many of the extra bits are folded into the token value).
150
  Increasing the maximum value beyond 3 will enlarge the amount of stack
151
   required for tree construction.*/
152
static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
153
  0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
154
};
155
156
157
/*The size a lookup table is allowed to grow to relative to the number of
158
   unique nodes it contains.
159
  E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
160
   wasted (1/4 of the space must be used).
161
  Larger numbers can decode tokens with fewer read operations, while smaller
162
   numbers may save more space.
163
  With a sample file:
164
  32233473 read calls are required when no tree collapsing is done (100.0%).
165
  19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
166
  11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
167
  10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
168
  10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
169
  Since a value of 2 gets us the vast majority of the speed-up with only a
170
   small amount of wasted memory, this is what we use.
171
  This value must be less than 128, or you could create a tree with more than
172
   32767 entries, which would overflow the 16-bit words used to index it.*/
173
449k
#define OC_HUFF_SLUSH (2)
174
/*The root of the tree is on the fast path, and a larger value here is more
175
   beneficial than elsewhere in the tree.
176
  7 appears to give the best performance, trading off between increased use of
177
   the single-read fast path and cache footprint for the tables, though
178
   obviously this will depend on your cache size.
179
  Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
180
531k
#define OC_ROOT_HUFF_SLUSH (7)
181
182
183
184
/*Unpacks a Huffman codebook.
185
  _opb:    The buffer to unpack from.
186
  _tokens: Stores a list of internal tokens, in the order they were found in
187
            the codebook, and the lengths of their corresponding codewords.
188
           This is enough to completely define the codebook, while minimizing
189
            stack usage and avoiding temporary allocations (for platforms
190
            where free() is a no-op).
191
  Return: The number of internal tokens in the codebook, or a negative value
192
   on error.*/
193
20.4k
int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
194
20.4k
  ogg_uint32_t code;
195
20.4k
  int          len;
196
20.4k
  int          ntokens;
197
20.4k
  int          nleaves;
198
20.4k
  code=0;
199
20.4k
  len=ntokens=nleaves=0;
200
1.26M
  for(;;){
201
1.26M
    long bits;
202
1.26M
    bits=oc_pack_read1(_opb);
203
    /*Only process nodes so long as there's more bits in the buffer.*/
204
1.26M
    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
205
    /*Read an internal node:*/
206
1.26M
    if(!bits){
207
623k
      len++;
208
      /*Don't allow codewords longer than 32 bits.*/
209
623k
      if(len>32)return TH_EBADHEADER;
210
623k
    }
211
    /*Read a leaf node:*/
212
643k
    else{
213
643k
      ogg_uint32_t code_bit;
214
643k
      int          neb;
215
643k
      int          nentries;
216
643k
      int          token;
217
      /*Don't allow more than 32 spec-tokens per codebook.*/
218
643k
      if(++nleaves>32)return TH_EBADHEADER;
219
643k
      bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
220
643k
      neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
221
643k
      token=OC_DCT_TOKEN_MAP[bits];
222
643k
      nentries=1<<neb;
223
2.49M
      while(nentries-->0){
224
1.85M
        _tokens[ntokens][0]=(unsigned char)token++;
225
1.85M
        _tokens[ntokens][1]=(unsigned char)(len+neb);
226
1.85M
        ntokens++;
227
1.85M
      }
228
643k
      if(len<=0)break;
229
643k
      code_bit=0x80000000U>>len-1;
230
1.26M
      while(len>0&&(code&code_bit)){
231
623k
        code^=code_bit;
232
623k
        code_bit<<=1;
233
623k
        len--;
234
623k
      }
235
643k
      if(len<=0)break;
236
623k
      code|=code_bit;
237
623k
    }
238
1.26M
  }
239
20.4k
  return ntokens;
240
20.4k
}
241
242
/*Count how many tokens would be required to fill a subtree at depth _depth.
243
  _tokens: A list of internal tokens, in the order they are found in the
244
            codebook, and the lengths of their corresponding codewords.
245
  _depth:  The depth of the desired node in the corresponding tree structure.
246
  Return: The number of tokens that belong to that subtree.*/
247
4.01M
static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
248
4.01M
  ogg_uint32_t code;
249
4.01M
  int          ti;
250
4.01M
  code=0;
251
4.01M
  ti=0;
252
29.2M
  do{
253
29.2M
    if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
254
0
    else{
255
      /*Because of the expanded internal tokens, we can have codewords as long
256
         as 35 bits.
257
        A single recursion here is enough to advance past them.*/
258
0
      code++;
259
0
      ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
260
0
    }
261
29.2M
  }
262
29.2M
  while(code<0x80000000U);
263
4.01M
  return ti;
264
4.01M
}
265
266
/*Compute the number of bits to use for a collapsed tree node at the given
267
   depth.
268
  _tokens:  A list of internal tokens, in the order they are found in the
269
             codebook, and the lengths of their corresponding codewords.
270
  _ntokens: The number of tokens corresponding to this tree node.
271
  _depth:   The depth of this tree node.
272
  Return: The number of bits to use for a collapsed tree node rooted here.
273
          This is always at least one, even if this was a leaf node.*/
274
static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
275
490k
 int _ntokens,int _depth){
276
490k
  int got_leaves;
277
490k
  int loccupancy;
278
490k
  int occupancy;
279
490k
  int slush;
280
490k
  int nbits;
281
490k
  int best_nbits;
282
490k
  slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
283
  /*It's legal to have a tree with just a single node, which requires no bits
284
     to decode and always returns the same token.
285
    However, no encoder actually does this (yet).
286
    To avoid a special case in oc_huff_token_decode(), we force the number of
287
     lookahead bits to be at least one.
288
    This will produce a tree that looks ahead one bit and then advances the
289
     stream zero bits.*/
290
490k
  nbits=1;
291
490k
  occupancy=2;
292
490k
  got_leaves=1;
293
1.15M
  do{
294
1.15M
    int ti;
295
1.15M
    if(got_leaves)best_nbits=nbits;
296
1.15M
    nbits++;
297
1.15M
    got_leaves=0;
298
1.15M
    loccupancy=occupancy;
299
15.8M
    for(occupancy=ti=0;ti<_ntokens;occupancy++){
300
14.6M
      if(_tokens[ti][1]<_depth+nbits)ti++;
301
7.31M
      else if(_tokens[ti][1]==_depth+nbits){
302
3.75M
        got_leaves=1;
303
3.75M
        ti++;
304
3.75M
      }
305
3.56M
      else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
306
14.6M
    }
307
1.15M
  }
308
1.15M
  while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
309
490k
  return best_nbits;
310
490k
}
311
312
/*Determines the size in words of a Huffman tree node that represents a
313
   subtree of depth _nbits.
314
  _nbits: The depth of the subtree.
315
          This must be greater than zero.
316
  Return: The number of words required to store the node.*/
317
605k
static size_t oc_huff_node_size(int _nbits){
318
605k
  return 1+(1<<_nbits);
319
605k
}
320
321
/*Produces a collapsed-tree representation of the given token list.
322
  _tree: The storage for the collapsed Huffman tree.
323
         This may be NULL to compute the required storage size instead of
324
          constructing the tree.
325
  _tokens:  A list of internal tokens, in the order they are found in the
326
             codebook, and the lengths of their corresponding codewords.
327
  _ntokens: The number of tokens corresponding to this tree node.
328
  Return: The number of words required to store the tree.*/
329
static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
330
40.8k
 unsigned char _tokens[][2],int _ntokens){
331
40.8k
  ogg_int16_t   node[34];
332
40.8k
  unsigned char depth[34];
333
40.8k
  unsigned char last[34];
334
40.8k
  size_t        ntree;
335
40.8k
  int           ti;
336
40.8k
  int           l;
337
40.8k
  depth[0]=0;
338
40.8k
  last[0]=(unsigned char)(_ntokens-1);
339
40.8k
  ntree=0;
340
40.8k
  ti=0;
341
40.8k
  l=0;
342
490k
  do{
343
490k
    int nbits;
344
490k
    nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
345
490k
    node[l]=(ogg_int16_t)ntree;
346
490k
    ntree+=oc_huff_node_size(nbits);
347
490k
    if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
348
939k
    do{
349
4.64M
      while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
350
3.70M
        if(_tree!=NULL){
351
1.85M
          ogg_int16_t leaf;
352
1.85M
          int         nentries;
353
1.85M
          nentries=1<<depth[l]+nbits-_tokens[ti][1];
354
1.85M
          leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
355
8.74M
          while(nentries-->0)_tree[node[l]++]=leaf;
356
1.85M
        }
357
3.70M
        ti++;
358
3.70M
      }
359
939k
      if(ti<=last[l]){
360
        /*We need to recurse*/
361
449k
        depth[l+1]=(unsigned char)(depth[l]+nbits);
362
449k
        if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
363
449k
        l++;
364
449k
        last[l]=
365
449k
         (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
366
449k
        break;
367
449k
      }
368
      /*Pop back up a level of recursion.*/
369
490k
      else if(l-->0)nbits=depth[l+1]-depth[l];
370
939k
    }
371
490k
    while(l>=0);
372
490k
  }
373
490k
  while(l>=0);
374
40.8k
  return ntree;
375
40.8k
}
376
377
/*Unpacks a set of Huffman trees, and reduces them to a collapsed
378
   representation.
379
  _opb:   The buffer to unpack the trees from.
380
  _nodes: The table to fill with the Huffman trees.
381
  Return: 0 on success, or a negative value on error.
382
          The caller is responsible for cleaning up any partially initialized
383
           _nodes on failure.*/
384
int oc_huff_trees_unpack(oc_pack_buf *_opb,
385
259
 ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
386
259
  int i;
387
20.6k
  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
388
20.4k
    unsigned char  tokens[256][2];
389
20.4k
    int            ntokens;
390
20.4k
    ogg_int16_t   *tree;
391
20.4k
    size_t         size;
392
    /*Unpack the full tree into a temporary buffer.*/
393
20.4k
    ntokens=oc_huff_tree_unpack(_opb,tokens);
394
20.4k
    if(ntokens<0)return ntokens;
395
    /*Figure out how big the collapsed tree will be and allocate space for it.*/
396
20.4k
    size=oc_huff_tree_collapse(NULL,tokens,ntokens);
397
    /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
398
       OC_ROOT_HUFF_SLUSH too large.*/
399
20.4k
    if(size>32767)return TH_EIMPL;
400
20.4k
    tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
401
20.4k
    if(tree==NULL)return TH_EFAULT;
402
    /*Construct the collapsed the tree.*/
403
20.4k
    oc_huff_tree_collapse(tree,tokens,ntokens);
404
20.4k
    _nodes[i]=tree;
405
20.4k
  }
406
250
  return 0;
407
259
}
408
409
/*Determines the size in words of a Huffman subtree.
410
  _tree: The complete Huffman tree.
411
  _node: The index of the root of the desired subtree.
412
  Return: The number of words required to store the tree.*/
413
115k
static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
414
115k
  size_t size;
415
115k
  int    nchildren;
416
115k
  int    n;
417
115k
  int    i;
418
115k
  n=_tree[_node];
419
115k
  size=oc_huff_node_size(n);
420
115k
  nchildren=1<<n;
421
115k
  i=0;
422
977k
  do{
423
977k
    int child;
424
977k
    child=_tree[_node+i+1];
425
977k
    if(child<=0)i+=1<<n-(-child>>8);
426
105k
    else{
427
105k
      size+=oc_huff_tree_size(_tree,child);
428
105k
      i++;
429
105k
    }
430
977k
  }
431
977k
  while(i<nchildren);
432
115k
  return size;
433
115k
}
434
435
/*Makes a copy of the given set of Huffman trees.
436
  _dst: The array to store the copy in.
437
  _src: The array of trees to copy.*/
438
int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
439
120
 const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
440
120
  int i;
441
9.72k
  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
442
9.60k
    size_t size;
443
9.60k
    size=oc_huff_tree_size(_src[i],0);
444
9.60k
    _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
445
9.60k
    if(_dst[i]==NULL){
446
0
      while(i-->0)_ogg_free(_dst[i]);
447
0
      return TH_EFAULT;
448
0
    }
449
9.60k
    memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
450
9.60k
  }
451
120
  return 0;
452
120
}
453
454
/*Frees the memory used by a set of Huffman trees.
455
  _nodes: The array of trees to free.*/
456
380
void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
457
380
  int i;
458
30.7k
  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
459
380
}
460
461
462
/*Unpacks a single token using the given Huffman tree.
463
  _opb:  The buffer to unpack the token from.
464
  _node: The tree to unpack the token with.
465
  Return: The token value.*/
466
395M
int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
467
395M
  const unsigned char *ptr;
468
395M
  const unsigned char *stop;
469
395M
  oc_pb_window         window;
470
395M
  int                  available;
471
395M
  long                 bits;
472
395M
  int                  node;
473
395M
  int                  n;
474
395M
  ptr=_opb->ptr;
475
395M
  window=_opb->window;
476
395M
  stop=_opb->stop;
477
395M
  available=_opb->bits;
478
395M
  node=0;
479
396M
  for(;;){
480
396M
    n=_tree[node];
481
396M
    if(n>available){
482
596k
      unsigned shift;
483
596k
      shift=OC_PB_WINDOW_SIZE-available;
484
4.19M
      do{
485
        /*We don't bother setting eof because we won't check for it after we've
486
           started decoding DCT tokens.*/
487
4.19M
        if(ptr>=stop){
488
824
          shift=(unsigned)-OC_LOTS_OF_BITS;
489
824
          break;
490
824
        }
491
4.19M
        shift-=8;
492
4.19M
        window|=(oc_pb_window)*ptr++<<shift;
493
4.19M
      }
494
4.19M
      while(shift>=8);
495
      /*Note: We never request more than 24 bits, so there's no need to fill in
496
         the last partial byte here.*/
497
596k
      available=OC_PB_WINDOW_SIZE-shift;
498
596k
    }
499
396M
    bits=window>>OC_PB_WINDOW_SIZE-n;
500
396M
    node=_tree[node+1+bits];
501
396M
    if(node<=0)break;
502
845k
    window<<=n;
503
845k
    available-=n;
504
845k
  }
505
395M
  node=-node;
506
395M
  n=node>>8;
507
395M
  window<<=n;
508
395M
  available-=n;
509
395M
  _opb->ptr=ptr;
510
395M
  _opb->window=window;
511
395M
  _opb->bits=available;
512
395M
  return node&255;
513
395M
}