Coverage Report

Created: 2025-10-10 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/brotli/c/common/shared_dictionary.c
Line
Count
Source
1
/* Copyright 2017 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Shared Dictionary definition and utilities. */
8
9
#include <brotli/shared_dictionary.h>
10
11
#include "dictionary.h"
12
#include "platform.h"
13
#include "shared_dictionary_internal.h"
14
15
#if defined(__cplusplus) || defined(c_plusplus)
16
extern "C" {
17
#endif
18
19
#if defined(BROTLI_EXPERIMENTAL)
20
21
#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
22
    - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
23
24
/* Max allowed by spec */
25
#define BROTLI_MAX_SIZE_BITS 15u
26
27
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
28
static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
29
    BROTLI_BOOL* result) {
30
  uint8_t value;
31
  size_t position = *pos;
32
  if (position >= size) return BROTLI_FALSE;  /* past file end */
33
  value = encoded[position++];
34
  if (value > 1) return BROTLI_FALSE;  /* invalid bool */
35
  *result = TO_BROTLI_BOOL(value);
36
  *pos = position;
37
  return BROTLI_TRUE;  /* success */
38
}
39
40
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
41
static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
42
    uint8_t* result) {
43
  size_t position = *pos;
44
  if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
45
  *result = encoded[position++];
46
  *pos = position;
47
  return BROTLI_TRUE;
48
}
49
50
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
51
static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
52
    uint16_t* result) {
53
  size_t position = *pos;
54
  if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
55
  *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
56
  position += 2;
57
  *pos = position;
58
  return BROTLI_TRUE;
59
}
60
61
/* Reads a varint into a uint32_t, and returns error if it's too large */
62
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
63
static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
64
    size_t* pos, uint32_t* result) {
65
  int num = 0;
66
  uint8_t byte;
67
  *result = 0;
68
  for (;;) {
69
    if (*pos >= size) return BROTLI_FALSE;
70
    byte = encoded[(*pos)++];
71
    if (num == 4 && byte > 15) return BROTLI_FALSE;
72
    *result |= (uint32_t)(byte & 127) << (num * 7);
73
    if (byte < 128) return BROTLI_TRUE;
74
    num++;
75
  }
76
}
77
78
/* Returns the total length of word list. */
79
static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
80
    uint32_t* offsets_by_length) {
81
  uint32_t pos = 0;
82
  uint32_t i;
83
  for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
84
    offsets_by_length[i] = pos;
85
    if (size_bits_by_length[i] != 0) {
86
      pos += i << size_bits_by_length[i];
87
    }
88
  }
89
  return pos;
90
}
91
92
static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
93
    size_t* pos, BrotliDictionary* out) {
94
  size_t offset;
95
  size_t i;
96
  size_t position = *pos;
97
  if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
98
    return BROTLI_FALSE;
99
  }
100
101
  memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
102
  memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
103
      &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
104
  for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
105
      i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
106
    if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
107
      return BROTLI_FALSE;
108
    }
109
  }
110
  position += BROTLI_NUM_ENCODED_LENGTHS;
111
  offset = BrotliSizeBitsToOffsets(
112
      out->size_bits_by_length, out->offsets_by_length);
113
114
  out->data = &encoded[position];
115
  out->data_size = offset;
116
  position += offset;
117
  if (position > size) return BROTLI_FALSE;
118
  *pos = position;
119
  return BROTLI_TRUE;
120
}
121
122
/* Computes the cutOffTransforms of a BrotliTransforms which already has the
123
   transforms data correctly filled in. */
124
static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
125
  uint32_t i;
126
  for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
127
    transforms->cutOffTransforms[i] = -1;
128
  }
129
  for (i = 0; i < transforms->num_transforms; i++) {
130
    const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
131
    uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
132
    const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
133
    if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
134
        transforms->cutOffTransforms[type] == -1) {
135
      transforms->cutOffTransforms[type] = (int16_t)i;
136
    }
137
  }
138
}
139
140
static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
141
    size_t* pos, BrotliTransforms* out, uint16_t* out_table,
142
    size_t* out_table_size) {
143
  size_t position = *pos;
144
  size_t offset = 0;
145
  size_t stringlet_count = 0;  /* NUM_PREFIX_SUFFIX */
146
  size_t data_length = 0;
147
148
  /* PREFIX_SUFFIX_LENGTH */
149
  if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
150
    return BROTLI_FALSE;
151
  }
152
  data_length = out->prefix_suffix_size;
153
154
  /* Must at least have space for null terminator. */
155
  if (data_length < 1) return BROTLI_FALSE;
156
  out->prefix_suffix = &encoded[position];
157
  if (position + data_length >= size) return BROTLI_FALSE;
158
  while (BROTLI_TRUE) {
159
    /* STRING_LENGTH */
160
    size_t stringlet_len = encoded[position + offset];
161
    out_table[stringlet_count] = (uint16_t)offset;
162
    stringlet_count++;
163
    offset++;
164
    if (stringlet_len == 0) {
165
      if (offset == data_length) {
166
        break;
167
      } else {
168
        return BROTLI_FALSE;
169
      }
170
    }
171
    if (stringlet_count > 255) return BROTLI_FALSE;
172
    offset += stringlet_len;
173
    if (offset >= data_length) return BROTLI_FALSE;
174
  }
175
176
  position += data_length;
177
  *pos = position;
178
  *out_table_size = (uint16_t)stringlet_count;
179
  return BROTLI_TRUE;
180
}
181
182
static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
183
    size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
184
    size_t* prefix_suffix_count) {
185
  uint32_t i;
186
  BROTLI_BOOL has_params = BROTLI_FALSE;
187
  BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
188
  size_t position = *pos;
189
  size_t stringlet_cnt = 0;
190
  if (position >= size) return BROTLI_FALSE;
191
192
  prefix_suffix_ok = ParsePrefixSuffixTable(
193
      size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
194
  if (!prefix_suffix_ok) return BROTLI_FALSE;
195
  out->prefix_suffix_map = prefix_suffix_table;
196
  *prefix_suffix_count = stringlet_cnt;
197
198
  out->num_transforms = encoded[position++];
199
  out->transforms = &encoded[position];
200
  position += (size_t)out->num_transforms * 3;
201
  if (position > size) return BROTLI_FALSE;
202
  /* Check for errors and read extra parameters. */
203
  for (i = 0; i < out->num_transforms; i++) {
204
    uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
205
    uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
206
    uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
207
    if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
208
    if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
209
    if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
210
    if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
211
        type == BROTLI_TRANSFORM_SHIFT_ALL) {
212
      has_params = BROTLI_TRUE;
213
    }
214
  }
215
  if (has_params) {
216
    out->params = &encoded[position];
217
    position += (size_t)out->num_transforms * 2;
218
    if (position > size) return BROTLI_FALSE;
219
    for (i = 0; i < out->num_transforms; i++) {
220
      uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
221
      if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
222
          type != BROTLI_TRANSFORM_SHIFT_ALL) {
223
        if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
224
          return BROTLI_FALSE;
225
        }
226
      }
227
    }
228
  } else {
229
    out->params = NULL;
230
  }
231
  ComputeCutoffTransforms(out);
232
  *pos = position;
233
  return BROTLI_TRUE;
234
}
235
236
static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
237
    size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
238
  size_t pos = 0;
239
  uint32_t chunk_size = 0;
240
  uint8_t num_word_lists;
241
  uint8_t num_transform_lists;
242
  *is_custom_static_dict = BROTLI_FALSE;
243
  *num_prefix = 0;
244
245
  /* Skip magic header bytes. */
246
  pos += 2;
247
248
  /* LZ77_DICTIONARY_LENGTH */
249
  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
250
  if (chunk_size != 0) {
251
    /* This limitation is not specified but the 32-bit Brotli decoder for now */
252
    if (chunk_size > 1073741823) return BROTLI_FALSE;
253
    *num_prefix = 1;
254
    if (pos + chunk_size > size) return BROTLI_FALSE;
255
    pos += chunk_size;
256
  }
257
258
  if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
259
    return BROTLI_FALSE;
260
  }
261
  if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
262
    return BROTLI_FALSE;
263
  }
264
265
  if (num_word_lists > 0 || num_transform_lists > 0) {
266
    *is_custom_static_dict = BROTLI_TRUE;
267
  }
268
269
  return BROTLI_TRUE;
270
}
271
272
static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
273
    BrotliSharedDictionary* dict) {
274
  uint32_t i;
275
  size_t pos = 0;
276
  uint32_t chunk_size = 0;
277
  size_t total_prefix_suffix_count = 0;
278
  size_t transform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
279
  uint16_t temporary_prefix_suffix_table[256];
280
281
  /* Skip magic header bytes. */
282
  pos += 2;
283
284
  /* LZ77_DICTIONARY_LENGTH */
285
  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
286
  if (chunk_size != 0) {
287
    if (pos + chunk_size > size) return BROTLI_FALSE;
288
    dict->prefix_size[dict->num_prefix] = chunk_size;
289
    dict->prefix[dict->num_prefix] = &encoded[pos];
290
    dict->num_prefix++;
291
    /* LZ77_DICTIONARY_LENGTH bytes. */
292
    pos += chunk_size;
293
  }
294
295
  /* NUM_WORD_LISTS */
296
  if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
297
    return BROTLI_FALSE;
298
  }
299
  if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
300
    return BROTLI_FALSE;
301
  }
302
303
  if (dict->num_word_lists != 0) {
304
    dict->words_instances = (BrotliDictionary*)dict->alloc_func(
305
        dict->memory_manager_opaque,
306
        dict->num_word_lists * sizeof(*dict->words_instances));
307
    if (!dict->words_instances) return BROTLI_FALSE;  /* OOM */
308
  }
309
  for (i = 0; i < dict->num_word_lists; i++) {
310
    if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
311
      return BROTLI_FALSE;
312
    }
313
  }
314
315
  /* NUM_TRANSFORM_LISTS */
316
  if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
317
    return BROTLI_FALSE;
318
  }
319
  if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
320
    return BROTLI_FALSE;
321
  }
322
323
  if (dict->num_transform_lists != 0) {
324
    dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
325
        dict->memory_manager_opaque,
326
        dict->num_transform_lists * sizeof(*dict->transforms_instances));
327
    if (!dict->transforms_instances) return BROTLI_FALSE;  /* OOM */
328
  }
329
  for (i = 0; i < dict->num_transform_lists; i++) {
330
    BROTLI_BOOL ok = BROTLI_FALSE;
331
    size_t prefix_suffix_count = 0;
332
    transform_list_start[i] = pos;
333
    dict->transforms_instances[i].prefix_suffix_map =
334
        temporary_prefix_suffix_table;
335
    ok = ParseTransformsList(
336
        size, encoded, &pos, &dict->transforms_instances[i],
337
        temporary_prefix_suffix_table, &prefix_suffix_count);
338
    if (!ok) return BROTLI_FALSE;
339
    total_prefix_suffix_count += prefix_suffix_count;
340
  }
341
  if (total_prefix_suffix_count != 0) {
342
    dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
343
        dict->memory_manager_opaque,
344
        total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
345
    if (!dict->prefix_suffix_maps) return BROTLI_FALSE;  /* OOM */
346
  }
347
  total_prefix_suffix_count = 0;
348
  for (i = 0; i < dict->num_transform_lists; i++) {
349
    size_t prefix_suffix_count = 0;
350
    size_t position = transform_list_start[i];
351
    uint16_t* prefix_suffix_map =
352
      &dict->prefix_suffix_maps[total_prefix_suffix_count];
353
    BROTLI_BOOL ok = ParsePrefixSuffixTable(
354
        size, encoded, &position, &dict->transforms_instances[i],
355
        prefix_suffix_map, &prefix_suffix_count);
356
    if (!ok) return BROTLI_FALSE;
357
    dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
358
    total_prefix_suffix_count += prefix_suffix_count;
359
  }
360
361
  if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
362
    if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
363
      return BROTLI_FALSE;
364
    }
365
    if (dict->num_dictionaries == 0 ||
366
        dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
367
      return BROTLI_FALSE;
368
    }
369
    for (i = 0; i < dict->num_dictionaries; i++) {
370
      uint8_t words_index;
371
      uint8_t transforms_index;
372
      if (!ReadUint8(encoded, size, &pos, &words_index)) {
373
        return BROTLI_FALSE;
374
      }
375
      if (words_index > dict->num_word_lists) return BROTLI_FALSE;
376
      if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
377
        return BROTLI_FALSE;
378
      }
379
      if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
380
      dict->words[i] = words_index == dict->num_word_lists ?
381
          BrotliGetDictionary() : &dict->words_instances[words_index];
382
      dict->transforms[i] = transforms_index == dict->num_transform_lists ?
383
          BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
384
    }
385
    /* CONTEXT_ENABLED */
386
    if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
387
      return BROTLI_FALSE;
388
    }
389
390
    /* CONTEXT_MAP */
391
    if (dict->context_based) {
392
      for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
393
        if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
394
          return BROTLI_FALSE;
395
        }
396
        if (dict->context_map[i] >= dict->num_dictionaries) {
397
          return BROTLI_FALSE;
398
        }
399
      }
400
    }
401
  } else {
402
    dict->context_based = BROTLI_FALSE;
403
    dict->num_dictionaries = 1;
404
    dict->words[0] = BrotliGetDictionary();
405
    dict->transforms[0] = BrotliGetTransforms();
406
  }
407
408
  return BROTLI_TRUE;
409
}
410
411
/* Decodes shared dictionary and verifies correctness.
412
   Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
413
   The BrotliSharedDictionary must already have been initialized. If the
414
   BrotliSharedDictionary already contains data, compound dictionaries
415
   will be appended, but an error will be returned if it already has
416
   custom words or transforms.
417
   TODO(lode): link to RFC for shared brotli once published. */
418
static BROTLI_BOOL DecodeSharedDictionary(
419
    const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
420
  uint32_t num_prefix = 0;
421
  BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
422
  BROTLI_BOOL has_custom_static_dict =
423
      dict->num_word_lists > 0 || dict->num_transform_lists > 0;
424
425
  /* Check magic header bytes. */
426
  if (size < 2) return BROTLI_FALSE;
427
  if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
428
429
  if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
430
    return BROTLI_FALSE;
431
  }
432
433
  if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
434
    return BROTLI_FALSE;
435
  }
436
437
  /* Cannot combine different static dictionaries, only prefix dictionaries */
438
  if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
439
440
  return ParseDictionary(encoded, size, dict);
441
}
442
443
#endif  /* BROTLI_EXPERIMENTAL */
444
445
void BrotliSharedDictionaryDestroyInstance(
446
4.24k
    BrotliSharedDictionary* dict) {
447
4.24k
  if (!dict) {
448
0
    return;
449
4.24k
  } else {
450
4.24k
    brotli_free_func free_func = dict->free_func;
451
4.24k
    void* opaque = dict->memory_manager_opaque;
452
    /* Cleanup. */
453
4.24k
    free_func(opaque, dict->words_instances);
454
4.24k
    free_func(opaque, dict->transforms_instances);
455
4.24k
    free_func(opaque, dict->prefix_suffix_maps);
456
    /* Self-destruction. */
457
4.24k
    free_func(opaque, dict);
458
4.24k
  }
459
4.24k
}
460
461
BROTLI_BOOL BrotliSharedDictionaryAttach(
462
    BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
463
0
    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
464
0
  if (!dict) {
465
0
    return BROTLI_FALSE;
466
0
  }
467
#if defined(BROTLI_EXPERIMENTAL)
468
  if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
469
    return DecodeSharedDictionary(data, data_size, dict);
470
  }
471
#endif  /* BROTLI_EXPERIMENTAL */
472
0
  if (type == BROTLI_SHARED_DICTIONARY_RAW) {
473
0
    if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
474
0
      return BROTLI_FALSE;
475
0
    }
476
0
    dict->prefix_size[dict->num_prefix] = data_size;
477
0
    dict->prefix[dict->num_prefix] = data;
478
0
    dict->num_prefix++;
479
0
    return BROTLI_TRUE;
480
0
  }
481
0
  return BROTLI_FALSE;
482
0
}
483
484
BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
485
4.24k
    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
486
4.24k
  BrotliSharedDictionary* dict = 0;
487
4.24k
  if (!alloc_func && !free_func) {
488
4.24k
    dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
489
4.24k
  } else if (alloc_func && free_func) {
490
0
    dict = (BrotliSharedDictionary*)alloc_func(
491
0
        opaque, sizeof(BrotliSharedDictionary));
492
0
  }
493
4.24k
  if (dict == 0) {
494
0
    return 0;
495
0
  }
496
497
  /* TODO(eustas): explicitly initialize all the fields? */
498
4.24k
  memset(dict, 0, sizeof(BrotliSharedDictionary));
499
500
4.24k
  dict->context_based = BROTLI_FALSE;
501
4.24k
  dict->num_dictionaries = 1;
502
4.24k
  dict->num_word_lists = 0;
503
4.24k
  dict->num_transform_lists = 0;
504
505
4.24k
  dict->words[0] = BrotliGetDictionary();
506
4.24k
  dict->transforms[0] = BrotliGetTransforms();
507
508
4.24k
  dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
509
4.24k
  dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
510
4.24k
  dict->memory_manager_opaque = opaque;
511
512
4.24k
  return dict;
513
4.24k
}
514
515
#if defined(__cplusplus) || defined(c_plusplus)
516
}  /* extern "C" */
517
#endif