Coverage Report

Created: 2026-05-14 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp
Line
Count
Source
1
/* Copyright 2017 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
#include "compound_dictionary.h"
8
9
#include <brotli/types.h>
10
11
#include "../common/brotli_platform.h"
12
#include "memory.h"
13
#include "quality.h"
14
15
using namespace duckdb_brotli;
16
17
static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
18
    const uint8_t* source, size_t source_size, uint32_t bucket_bits,
19
0
    uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
20
  /* Step 1: create "bloated" hasher. */
21
0
  uint32_t num_slots = 1u << slot_bits;
22
0
  uint32_t num_buckets = 1u << bucket_bits;
23
0
  uint32_t hash_shift = 64u - bucket_bits;
24
0
  uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
25
0
  uint32_t slot_mask = num_slots - 1;
26
0
  size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
27
0
      (sizeof(uint32_t) << slot_bits) +
28
0
      (sizeof(uint16_t) << bucket_bits) +
29
0
      (sizeof(uint32_t) << bucket_bits) +
30
0
      (sizeof(uint32_t) * source_size);
31
0
  uint8_t* flat = NULL;
32
0
  PreparedDictionary* result = NULL;
33
0
  uint16_t* num = NULL;
34
0
  uint32_t* bucket_heads = NULL;
35
0
  uint32_t* next_bucket = NULL;
36
0
  uint32_t* slot_offsets = NULL;
37
0
  uint16_t* heads = NULL;
38
0
  uint32_t* items = NULL;
39
0
  uint8_t** source_ref = NULL;
40
0
  uint32_t i;
41
0
  uint32_t* slot_size = NULL;
42
0
  uint32_t* slot_limit = NULL;
43
0
  uint32_t total_items = 0;
44
0
  if (slot_bits > 16) return NULL;
45
0
  if (slot_bits > bucket_bits) return NULL;
46
0
  if (bucket_bits - slot_bits >= 16) return NULL;
47
48
0
  flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
49
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
50
51
0
  slot_size = (uint32_t*)flat;
52
0
  slot_limit = (uint32_t*)(&slot_size[num_slots]);
53
0
  num = (uint16_t*)(&slot_limit[num_slots]);
54
0
  bucket_heads = (uint32_t*)(&num[num_buckets]);
55
0
  next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
56
0
  memset(num, 0, num_buckets * sizeof(num[0]));
57
58
  /* TODO(eustas): apply custom "store" order. */
59
0
  for (i = 0; i + 7 < source_size; ++i) {
60
0
    const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
61
0
        kPreparedDictionaryHashMul64Long;
62
0
    const uint32_t key = (uint32_t)(h >> hash_shift);
63
0
    uint16_t count = num[key];
64
0
    next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
65
0
    bucket_heads[key] = i;
66
0
    count++;
67
0
    if (count > bucket_limit) count = bucket_limit;
68
0
    num[key] = count;
69
0
  }
70
71
  /* Step 2: find slot limits. */
72
0
  for (i = 0; i < num_slots; ++i) {
73
0
    BROTLI_BOOL overflow = BROTLI_FALSE;
74
0
    slot_limit[i] = bucket_limit;
75
0
    while (BROTLI_TRUE) {
76
0
      uint32_t limit = slot_limit[i];
77
0
      size_t j;
78
0
      uint32_t count = 0;
79
0
      overflow = BROTLI_FALSE;
80
0
      for (j = i; j < num_buckets; j += num_slots) {
81
0
        uint32_t size = num[j];
82
        /* Last chain may span behind 64K limit; overflow happens only if
83
           we are about to use 0xFFFF+ as item offset. */
84
0
        if (count >= 0xFFFF) {
85
0
          overflow = BROTLI_TRUE;
86
0
          break;
87
0
        }
88
0
        if (size > limit) size = limit;
89
0
        count += size;
90
0
      }
91
0
      if (!overflow) {
92
0
        slot_size[i] = count;
93
0
        total_items += count;
94
0
        break;
95
0
      }
96
0
      slot_limit[i]--;
97
0
    }
98
0
  }
99
100
  /* Step 3: transfer data to "slim" hasher. */
101
0
  alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
102
0
      (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
103
0
      sizeof(uint8_t*);
104
105
0
  result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
106
0
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
107
0
    BROTLI_FREE(m, flat);
108
0
    return NULL;
109
0
  }
110
0
  slot_offsets = (uint32_t*)(&result[1]);
111
0
  heads = (uint16_t*)(&slot_offsets[num_slots]);
112
0
  items = (uint32_t*)(&heads[num_buckets]);
113
0
  source_ref = (uint8_t**)(&items[total_items]);
114
115
0
  result->magic = kLeanPreparedDictionaryMagic;
116
0
  result->num_items = total_items;
117
0
  result->source_size = (uint32_t)source_size;
118
0
  result->hash_bits = hash_bits;
119
0
  result->bucket_bits = bucket_bits;
120
0
  result->slot_bits = slot_bits;
121
0
  BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
122
123
0
  total_items = 0;
124
0
  for (i = 0; i < num_slots; ++i) {
125
0
    slot_offsets[i] = total_items;
126
0
    total_items += slot_size[i];
127
0
    slot_size[i] = 0;
128
0
  }
129
0
  for (i = 0; i < num_buckets; ++i) {
130
0
    uint32_t slot = i & slot_mask;
131
0
    uint32_t count = num[i];
132
0
    uint32_t pos;
133
0
    size_t j;
134
0
    size_t cursor = slot_size[slot];
135
0
    if (count > slot_limit[slot]) count = slot_limit[slot];
136
0
    if (count == 0) {
137
0
      heads[i] = 0xFFFF;
138
0
      continue;
139
0
    }
140
0
    heads[i] = (uint16_t)cursor;
141
0
    cursor += slot_offsets[slot];
142
0
    slot_size[slot] += count;
143
0
    pos = bucket_heads[i];
144
0
    for (j = 0; j < count; j++) {
145
0
      items[cursor++] = pos;
146
0
      pos = next_bucket[pos];
147
0
    }
148
0
    items[cursor - 1] |= 0x80000000;
149
0
  }
150
151
0
  BROTLI_FREE(m, flat);
152
0
  return result;
153
0
}
154
155
PreparedDictionary* duckdb_brotli::CreatePreparedDictionary(MemoryManager* m,
156
0
    const uint8_t* source, size_t source_size) {
157
0
  uint32_t bucket_bits = 17;
158
0
  uint32_t slot_bits = 7;
159
0
  uint32_t hash_bits = 40;
160
0
  uint16_t bucket_limit = 32;
161
0
  size_t volume = 16u << bucket_bits;
162
  /* Tune parameters to fit dictionary size. */
163
0
  while (volume < source_size && bucket_bits < 22) {
164
0
    bucket_bits++;
165
0
    slot_bits++;
166
0
    volume <<= 1;
167
0
  }
168
0
  return CreatePreparedDictionaryWithParams(m,
169
0
      source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
170
0
}
171
172
void duckdb_brotli::DestroyPreparedDictionary(MemoryManager* m,
173
0
    PreparedDictionary* dictionary) {
174
0
  if (!dictionary) return;
175
0
  BROTLI_FREE(m, dictionary);
176
0
}
177
178
BROTLI_BOOL duckdb_brotli::AttachPreparedDictionary(
179
0
    CompoundDictionary* compound, const PreparedDictionary* dictionary) {
180
0
  size_t length = 0;
181
0
  size_t index = 0;
182
183
0
  if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
184
0
    return BROTLI_FALSE;
185
0
  }
186
187
0
  if (!dictionary) return BROTLI_FALSE;
188
189
0
  length = dictionary->source_size;
190
0
  index = compound->num_chunks;
191
0
  compound->total_size += length;
192
0
  compound->chunks[index] = dictionary;
193
0
  compound->chunk_offsets[index + 1] = compound->total_size;
194
0
  {
195
0
    uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
196
0
    uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
197
0
    uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
198
0
    const void* tail = (void*)&items[dictionary->num_items];
199
0
    if (dictionary->magic == kPreparedDictionaryMagic) {
200
0
      compound->chunk_source[index] = (const uint8_t*)tail;
201
0
    } else {
202
      /* dictionary->magic == kLeanPreparedDictionaryMagic */
203
0
      compound->chunk_source[index] =
204
0
          (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
205
0
    }
206
0
  }
207
0
  compound->num_chunks++;
208
0
  return BROTLI_TRUE;
209
0
}