Coverage Report

Created: 2026-01-25 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ots/subprojects/brotli-1.1.0/c/common/shared_dictionary.c
Line
Count
Source
1
/* Copyright 2017 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Shared Dictionary definition and utilities. */
8
9
#include <brotli/shared_dictionary.h>
10
11
#include <memory.h>
12
#include <stdlib.h>  /* malloc, free */
13
#include <stdio.h>
14
15
#include "dictionary.h"
16
#include "platform.h"
17
#include "shared_dictionary_internal.h"
18
19
#if defined(__cplusplus) || defined(c_plusplus)
20
extern "C" {
21
#endif
22
23
#if defined(BROTLI_EXPERIMENTAL)
24
25
#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
26
    - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
27
28
/* Max allowed by spec */
29
#define BROTLI_MAX_SIZE_BITS 15u
30
31
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
32
static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
33
    BROTLI_BOOL* result) {
34
  uint8_t value;
35
  size_t position = *pos;
36
  if (position >= size) return BROTLI_FALSE;  /* past file end */
37
  value = encoded[position++];
38
  if (value > 1) return BROTLI_FALSE;  /* invalid bool */
39
  *result = TO_BROTLI_BOOL(value);
40
  *pos = position;
41
  return BROTLI_TRUE;  /* success */
42
}
43
44
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
45
static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
46
    uint8_t* result) {
47
  size_t position = *pos;
48
  if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
49
  *result = encoded[position++];
50
  *pos = position;
51
  return BROTLI_TRUE;
52
}
53
54
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
55
static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
56
    uint16_t* result) {
57
  size_t position = *pos;
58
  if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
59
  *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
60
  position += 2;
61
  *pos = position;
62
  return BROTLI_TRUE;
63
}
64
65
/* Reads a varint into a uint32_t, and returns error if it's too large */
66
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
67
static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
68
    size_t* pos, uint32_t* result) {
69
  int num = 0;
70
  uint8_t byte;
71
  *result = 0;
72
  for (;;) {
73
    if (*pos >= size) return BROTLI_FALSE;
74
    byte = encoded[(*pos)++];
75
    if (num == 4 && byte > 15) return BROTLI_FALSE;
76
    *result |= (uint32_t)(byte & 127) << (num * 7);
77
    if (byte < 128) return BROTLI_TRUE;
78
    num++;
79
  }
80
}
81
82
/* Returns the total length of word list. */
83
static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
84
    uint32_t* offsets_by_length) {
85
  uint32_t pos = 0;
86
  uint32_t i;
87
  for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
88
    offsets_by_length[i] = pos;
89
    if (size_bits_by_length[i] != 0) {
90
      pos += i << size_bits_by_length[i];
91
    }
92
  }
93
  return pos;
94
}
95
96
static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
97
    size_t* pos, BrotliDictionary* out) {
98
  size_t offset;
99
  size_t i;
100
  size_t position = *pos;
101
  if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
102
    return BROTLI_FALSE;
103
  }
104
105
  memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
106
  memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
107
      &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
108
  for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
109
      i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
110
    if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
111
      return BROTLI_FALSE;
112
    }
113
  }
114
  position += BROTLI_NUM_ENCODED_LENGTHS;
115
  offset = BrotliSizeBitsToOffsets(
116
      out->size_bits_by_length, out->offsets_by_length);
117
118
  out->data = &encoded[position];
119
  out->data_size = offset;
120
  position += offset;
121
  if (position > size) return BROTLI_FALSE;
122
  *pos = position;
123
  return BROTLI_TRUE;
124
}
125
126
/* Computes the cutOffTransforms of a BrotliTransforms which already has the
127
   transforms data correctly filled in. */
128
static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
129
  uint32_t i;
130
  for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
131
    transforms->cutOffTransforms[i] = -1;
132
  }
133
  for (i = 0; i < transforms->num_transforms; i++) {
134
    const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
135
    uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
136
    const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
137
    if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
138
        transforms->cutOffTransforms[type] == -1) {
139
      transforms->cutOffTransforms[type] = (int16_t)i;
140
    }
141
  }
142
}
143
144
static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
145
    size_t* pos, BrotliTransforms* out, uint16_t* out_table,
146
    size_t* out_table_size) {
147
  size_t position = *pos;
148
  size_t offset = 0;
149
  size_t stringlet_count = 0;  /* NUM_PREFIX_SUFFIX */
150
  size_t data_length = 0;
151
152
  /* PREFIX_SUFFIX_LENGTH */
153
  if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
154
    return BROTLI_FALSE;
155
  }
156
  data_length = out->prefix_suffix_size;
157
158
  /* Must at least have space for null terminator. */
159
  if (data_length < 1) return BROTLI_FALSE;
160
  out->prefix_suffix = &encoded[position];
161
  if (position + data_length >= size) return BROTLI_FALSE;
162
  while (BROTLI_TRUE) {
163
    /* STRING_LENGTH */
164
    size_t stringlet_len = encoded[position + offset];
165
    out_table[stringlet_count] = (uint16_t)offset;
166
    stringlet_count++;
167
    offset++;
168
    if (stringlet_len == 0) {
169
      if (offset == data_length) {
170
        break;
171
      } else {
172
        return BROTLI_FALSE;
173
      }
174
    }
175
    if (stringlet_count > 255) return BROTLI_FALSE;
176
    offset += stringlet_len;
177
    if (offset >= data_length) return BROTLI_FALSE;
178
  }
179
180
  position += data_length;
181
  *pos = position;
182
  *out_table_size = (uint16_t)stringlet_count;
183
  return BROTLI_TRUE;
184
}
185
186
static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
187
    size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
188
    size_t* prefix_suffix_count) {
189
  uint32_t i;
190
  BROTLI_BOOL has_params = BROTLI_FALSE;
191
  BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
192
  size_t position = *pos;
193
  size_t stringlet_cnt = 0;
194
  if (position >= size) return BROTLI_FALSE;
195
196
  prefix_suffix_ok = ParsePrefixSuffixTable(
197
      size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
198
  if (!prefix_suffix_ok) return BROTLI_FALSE;
199
  out->prefix_suffix_map = prefix_suffix_table;
200
  *prefix_suffix_count = stringlet_cnt;
201
202
  out->num_transforms = encoded[position++];
203
  out->transforms = &encoded[position];
204
  position += (size_t)out->num_transforms * 3;
205
  if (position > size) return BROTLI_FALSE;
206
  /* Check for errors and read extra parameters. */
207
  for (i = 0; i < out->num_transforms; i++) {
208
    uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
209
    uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
210
    uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
211
    if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
212
    if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
213
    if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
214
    if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
215
        type == BROTLI_TRANSFORM_SHIFT_ALL) {
216
      has_params = BROTLI_TRUE;
217
    }
218
  }
219
  if (has_params) {
220
    out->params = &encoded[position];
221
    position += (size_t)out->num_transforms * 2;
222
    if (position > size) return BROTLI_FALSE;
223
    for (i = 0; i < out->num_transforms; i++) {
224
      uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
225
      if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
226
          type != BROTLI_TRANSFORM_SHIFT_ALL) {
227
        if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
228
          return BROTLI_FALSE;
229
        }
230
      }
231
    }
232
  } else {
233
    out->params = NULL;
234
  }
235
  ComputeCutoffTransforms(out);
236
  *pos = position;
237
  return BROTLI_TRUE;
238
}
239
240
static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
241
    size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
242
  size_t pos = 0;
243
  uint32_t chunk_size = 0;
244
  uint8_t num_word_lists;
245
  uint8_t num_transform_lists;
246
  *is_custom_static_dict = BROTLI_FALSE;
247
  *num_prefix = 0;
248
249
  /* Skip magic header bytes. */
250
  pos += 2;
251
252
  /* LZ77_DICTIONARY_LENGTH */
253
  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
254
  if (chunk_size != 0) {
255
    /* This limitation is not specified but the 32-bit Brotli decoder for now */
256
    if (chunk_size > 1073741823) return BROTLI_FALSE;
257
    *num_prefix = 1;
258
    if (pos + chunk_size > size) return BROTLI_FALSE;
259
    pos += chunk_size;
260
  }
261
262
  if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
263
    return BROTLI_FALSE;
264
  }
265
  if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
266
    return BROTLI_FALSE;
267
  }
268
269
  if (num_word_lists > 0 || num_transform_lists > 0) {
270
    *is_custom_static_dict = BROTLI_TRUE;
271
  }
272
273
  return BROTLI_TRUE;
274
}
275
276
static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
277
    BrotliSharedDictionary* dict) {
278
  uint32_t i;
279
  size_t pos = 0;
280
  uint32_t chunk_size = 0;
281
  size_t total_prefix_suffix_count = 0;
282
  size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
283
  uint16_t temporary_prefix_suffix_table[256];
284
285
  /* Skip magic header bytes. */
286
  pos += 2;
287
288
  /* LZ77_DICTIONARY_LENGTH */
289
  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
290
  if (chunk_size != 0) {
291
    if (pos + chunk_size > size) return BROTLI_FALSE;
292
    dict->prefix_size[dict->num_prefix] = chunk_size;
293
    dict->prefix[dict->num_prefix] = &encoded[pos];
294
    dict->num_prefix++;
295
    /* LZ77_DICTIONARY_LENGTH bytes. */
296
    pos += chunk_size;
297
  }
298
299
  /* NUM_WORD_LISTS */
300
  if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
301
    return BROTLI_FALSE;
302
  }
303
  if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
304
    return BROTLI_FALSE;
305
  }
306
307
  if (dict->num_word_lists != 0) {
308
    dict->words_instances = (BrotliDictionary*)dict->alloc_func(
309
        dict->memory_manager_opaque,
310
        dict->num_word_lists * sizeof(*dict->words_instances));
311
    if (!dict->words_instances) return BROTLI_FALSE;  /* OOM */
312
  }
313
  for (i = 0; i < dict->num_word_lists; i++) {
314
    if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
315
      return BROTLI_FALSE;
316
    }
317
  }
318
319
  /* NUM_TRANSFORM_LISTS */
320
  if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
321
    return BROTLI_FALSE;
322
  }
323
  if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
324
    return BROTLI_FALSE;
325
  }
326
327
  if (dict->num_transform_lists != 0) {
328
    dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
329
        dict->memory_manager_opaque,
330
        dict->num_transform_lists * sizeof(*dict->transforms_instances));
331
    if (!dict->transforms_instances) return BROTLI_FALSE;  /* OOM */
332
  }
333
  for (i = 0; i < dict->num_transform_lists; i++) {
334
    BROTLI_BOOL ok = BROTLI_FALSE;
335
    size_t prefix_suffix_count = 0;
336
    trasform_list_start[i] = pos;
337
    dict->transforms_instances[i].prefix_suffix_map =
338
        temporary_prefix_suffix_table;
339
    ok = ParseTransformsList(
340
        size, encoded, &pos, &dict->transforms_instances[i],
341
        temporary_prefix_suffix_table, &prefix_suffix_count);
342
    if (!ok) return BROTLI_FALSE;
343
    total_prefix_suffix_count += prefix_suffix_count;
344
  }
345
  if (total_prefix_suffix_count != 0) {
346
    dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
347
        dict->memory_manager_opaque,
348
        total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
349
    if (!dict->prefix_suffix_maps) return BROTLI_FALSE;  /* OOM */
350
  }
351
  total_prefix_suffix_count = 0;
352
  for (i = 0; i < dict->num_transform_lists; i++) {
353
    size_t prefix_suffix_count = 0;
354
    size_t position = trasform_list_start[i];
355
    uint16_t* prefix_suffix_map =
356
      &dict->prefix_suffix_maps[total_prefix_suffix_count];
357
    BROTLI_BOOL ok = ParsePrefixSuffixTable(
358
        size, encoded, &position, &dict->transforms_instances[i],
359
        prefix_suffix_map, &prefix_suffix_count);
360
    if (!ok) return BROTLI_FALSE;
361
    dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
362
    total_prefix_suffix_count += prefix_suffix_count;
363
  }
364
365
  if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
366
    if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
367
      return BROTLI_FALSE;
368
    }
369
    if (dict->num_dictionaries == 0 ||
370
        dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
371
      return BROTLI_FALSE;
372
    }
373
    for (i = 0; i < dict->num_dictionaries; i++) {
374
      uint8_t words_index;
375
      uint8_t transforms_index;
376
      if (!ReadUint8(encoded, size, &pos, &words_index)) {
377
        return BROTLI_FALSE;
378
      }
379
      if (words_index > dict->num_word_lists) return BROTLI_FALSE;
380
      if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
381
        return BROTLI_FALSE;
382
      }
383
      if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
384
      dict->words[i] = words_index == dict->num_word_lists ?
385
          BrotliGetDictionary() : &dict->words_instances[words_index];
386
      dict->transforms[i] = transforms_index == dict->num_transform_lists ?
387
          BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
388
    }
389
    /* CONTEXT_ENABLED */
390
    if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
391
      return BROTLI_FALSE;
392
    }
393
394
    /* CONTEXT_MAP */
395
    if (dict->context_based) {
396
      for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
397
        if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
398
          return BROTLI_FALSE;
399
        }
400
        if (dict->context_map[i] >= dict->num_dictionaries) {
401
          return BROTLI_FALSE;
402
        }
403
      }
404
    }
405
  } else {
406
    dict->context_based = BROTLI_FALSE;
407
    dict->num_dictionaries = 1;
408
    dict->words[0] = BrotliGetDictionary();
409
    dict->transforms[0] = BrotliGetTransforms();
410
  }
411
412
  return BROTLI_TRUE;
413
}
414
415
/* Decodes shared dictionary and verifies correctness.
416
   Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
417
   The BrotliSharedDictionary must already have been initialized. If the
418
   BrotliSharedDictionary already contains data, compound dictionaries
419
   will be appended, but an error will be returned if it already has
420
   custom words or transforms.
421
   TODO(lode): link to RFC for shared brotli once published. */
422
static BROTLI_BOOL DecodeSharedDictionary(
423
    const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
424
  uint32_t num_prefix = 0;
425
  BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
426
  BROTLI_BOOL has_custom_static_dict =
427
      dict->num_word_lists > 0 || dict->num_transform_lists > 0;
428
429
  /* Check magic header bytes. */
430
  if (size < 2) return BROTLI_FALSE;
431
  if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
432
433
  if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
434
    return BROTLI_FALSE;
435
  }
436
437
  if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
438
    return BROTLI_FALSE;
439
  }
440
441
  /* Cannot combine different static dictionaries, only prefix dictionaries */
442
  if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
443
444
  return ParseDictionary(encoded, size, dict);
445
}
446
447
#endif  /* BROTLI_EXPERIMENTAL */
448
449
void BrotliSharedDictionaryDestroyInstance(
450
3.26k
    BrotliSharedDictionary* dict) {
451
3.26k
  if (!dict) {
452
0
    return;
453
3.26k
  } else {
454
3.26k
    brotli_free_func free_func = dict->free_func;
455
3.26k
    void* opaque = dict->memory_manager_opaque;
456
    /* Cleanup. */
457
3.26k
    free_func(opaque, dict->words_instances);
458
3.26k
    free_func(opaque, dict->transforms_instances);
459
3.26k
    free_func(opaque, dict->prefix_suffix_maps);
460
    /* Self-destruction. */
461
3.26k
    free_func(opaque, dict);
462
3.26k
  }
463
3.26k
}
464
465
BROTLI_BOOL BrotliSharedDictionaryAttach(
466
    BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
467
0
    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
468
0
  if (!dict) {
469
0
    return BROTLI_FALSE;
470
0
  }
471
#if defined(BROTLI_EXPERIMENTAL)
472
  if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
473
    return DecodeSharedDictionary(data, data_size, dict);
474
  }
475
#endif  /* BROTLI_EXPERIMENTAL */
476
0
  if (type == BROTLI_SHARED_DICTIONARY_RAW) {
477
0
    if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
478
0
      return BROTLI_FALSE;
479
0
    }
480
0
    dict->prefix_size[dict->num_prefix] = data_size;
481
0
    dict->prefix[dict->num_prefix] = data;
482
0
    dict->num_prefix++;
483
0
    return BROTLI_TRUE;
484
0
  }
485
0
  return BROTLI_FALSE;
486
0
}
487
488
BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
489
3.26k
    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
490
3.26k
  BrotliSharedDictionary* dict = 0;
491
3.26k
  if (!alloc_func && !free_func) {
492
3.26k
    dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
493
3.26k
  } else if (alloc_func && free_func) {
494
0
    dict = (BrotliSharedDictionary*)alloc_func(
495
0
        opaque, sizeof(BrotliSharedDictionary));
496
0
  }
497
3.26k
  if (dict == 0) {
498
0
    return 0;
499
0
  }
500
501
  /* TODO(eustas): explicitly initialize all the fields? */
502
3.26k
  memset(dict, 0, sizeof(BrotliSharedDictionary));
503
504
3.26k
  dict->context_based = BROTLI_FALSE;
505
3.26k
  dict->num_dictionaries = 1;
506
3.26k
  dict->num_word_lists = 0;
507
3.26k
  dict->num_transform_lists = 0;
508
509
3.26k
  dict->words[0] = BrotliGetDictionary();
510
3.26k
  dict->transforms[0] = BrotliGetTransforms();
511
512
3.26k
  dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
513
3.26k
  dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
514
3.26k
  dict->memory_manager_opaque = opaque;
515
516
3.26k
  return dict;
517
3.26k
}
518
519
#if defined(__cplusplus) || defined(c_plusplus)
520
}  /* extern "C" */
521
#endif