Coverage Report

Created: 2025-11-24 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/h2o/deps/brotli/c/enc/hash.h
Line
Count
Source
1
/* Copyright 2010 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* A (forgetful) hash table to the data seen by the compressor, to
8
   help create backward references to previous data. */
9
10
#ifndef BROTLI_ENC_HASH_H_
11
#define BROTLI_ENC_HASH_H_
12
13
#include <string.h>  /* memcmp, memset */
14
15
#include "../common/constants.h"
16
#include "../common/dictionary.h"
17
#include <brotli/types.h>
18
#include "./fast_log.h"
19
#include "./find_match_length.h"
20
#include "./memory.h"
21
#include "./port.h"
22
#include "./quality.h"
23
#include "./static_dict.h"
24
25
#if defined(__cplusplus) || defined(c_plusplus)
26
extern "C" {
27
#endif
28
29
/* Pointer to hasher data.
30
 *
31
 * Excluding initialization and destruction, hasher can be passed as
32
 * HasherHandle by value.
33
 *
34
 * Typically hasher data consists of 3 sections:
35
 * * HasherCommon structure
36
 * * private structured hasher data, depending on hasher type
37
 * * private dynamic hasher data, depending on hasher type and parameters
38
 */
39
typedef uint8_t* HasherHandle;
40
41
typedef struct {
42
  BrotliHasherParams params;
43
44
  /* False if hasher needs to be "prepared" before use. */
45
  BROTLI_BOOL is_prepared_;
46
47
  size_t dict_num_lookups;
48
  size_t dict_num_matches;
49
} HasherCommon;
50
51
0
static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
52
0
  return (HasherCommon*)handle;
53
0
}
Unexecuted instantiation: backward_references.c:GetHasherCommon
Unexecuted instantiation: backward_references_hq.c:GetHasherCommon
Unexecuted instantiation: encode.c:GetHasherCommon
54
55
0
#define score_t size_t
56
57
static const uint32_t kCutoffTransformsCount = 10;
58
/*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
59
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
60
static const uint64_t kCutoffTransforms =
61
    BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
62
63
typedef struct HasherSearchResult {
64
  size_t len;
65
  size_t distance;
66
  score_t score;
67
  int len_code_delta; /* == len_code - len */
68
} HasherSearchResult;
69
70
/* kHashMul32 multiplier has these properties:
71
   * The multiplier must be odd. Otherwise we may lose the highest bit.
72
   * No long streaks of ones or zeros.
73
   * There is no effort to ensure that it is a prime, the oddity is enough
74
     for this use.
75
   * The number has been tuned heuristically against compression benchmarks. */
76
static const uint32_t kHashMul32 = 0x1e35a7bd;
77
static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
78
static const uint64_t kHashMul64Long =
79
    BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
80
81
0
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
82
0
  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
83
  /* The higher bits contain more mixture from the multiplication,
84
     so we take our results from there. */
85
0
  return h >> (32 - 14);
86
0
}
Unexecuted instantiation: backward_references.c:Hash14
Unexecuted instantiation: backward_references_hq.c:Hash14
Unexecuted instantiation: encode.c:Hash14
87
88
static BROTLI_INLINE void PrepareDistanceCache(
89
0
    int* BROTLI_RESTRICT distance_cache, const int num_distances) {
90
0
  if (num_distances > 4) {
91
0
    int last_distance = distance_cache[0];
92
0
    distance_cache[4] = last_distance - 1;
93
0
    distance_cache[5] = last_distance + 1;
94
0
    distance_cache[6] = last_distance - 2;
95
0
    distance_cache[7] = last_distance + 2;
96
0
    distance_cache[8] = last_distance - 3;
97
0
    distance_cache[9] = last_distance + 3;
98
0
    if (num_distances > 10) {
99
0
      int next_last_distance = distance_cache[1];
100
0
      distance_cache[10] = next_last_distance - 1;
101
0
      distance_cache[11] = next_last_distance + 1;
102
0
      distance_cache[12] = next_last_distance - 2;
103
0
      distance_cache[13] = next_last_distance + 2;
104
0
      distance_cache[14] = next_last_distance - 3;
105
0
      distance_cache[15] = next_last_distance + 3;
106
0
    }
107
0
  }
108
0
}
Unexecuted instantiation: backward_references.c:PrepareDistanceCache
Unexecuted instantiation: backward_references_hq.c:PrepareDistanceCache
Unexecuted instantiation: encode.c:PrepareDistanceCache
109
110
0
#define BROTLI_LITERAL_BYTE_SCORE 135
111
0
#define BROTLI_DISTANCE_BIT_PENALTY 30
112
/* Score must be positive after applying maximal penalty. */
113
0
#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
114
115
/* Usually, we always choose the longest backward reference. This function
116
   allows for the exception of that rule.
117
118
   If we choose a backward reference that is further away, it will
119
   usually be coded with more bits. We approximate this by assuming
120
   log2(distance). If the distance can be expressed in terms of the
121
   last four distances, we use some heuristic constants to estimate
122
   the bits cost. For the first up to four literals we use the bit
123
   cost of the literals from the literal cost model, after that we
124
   use the average bit cost of the cost model.
125
126
   This function is used to sometimes discard a longer backward reference
127
   when it is not much longer and the bit cost for encoding it is more
128
   than the saved literals.
129
130
   backward_reference_offset MUST be positive. */
131
static BROTLI_INLINE score_t BackwardReferenceScore(
132
0
    size_t copy_length, size_t backward_reference_offset) {
133
0
  return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
134
0
      BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
135
0
}
Unexecuted instantiation: backward_references.c:BackwardReferenceScore
Unexecuted instantiation: backward_references_hq.c:BackwardReferenceScore
Unexecuted instantiation: encode.c:BackwardReferenceScore
136
137
static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
138
0
    size_t copy_length) {
139
0
  return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
140
0
      BROTLI_SCORE_BASE + 15;
141
0
}
Unexecuted instantiation: backward_references.c:BackwardReferenceScoreUsingLastDistance
Unexecuted instantiation: backward_references_hq.c:BackwardReferenceScoreUsingLastDistance
Unexecuted instantiation: encode.c:BackwardReferenceScoreUsingLastDistance
142
143
static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
144
0
    size_t distance_short_code) {
145
0
  return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
146
0
}
Unexecuted instantiation: backward_references.c:BackwardReferencePenaltyUsingLastDistance
Unexecuted instantiation: backward_references_hq.c:BackwardReferencePenaltyUsingLastDistance
Unexecuted instantiation: encode.c:BackwardReferencePenaltyUsingLastDistance
147
148
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
149
    const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
150
0
    size_t max_length, size_t max_backward, HasherSearchResult* out) {
151
0
  size_t len;
152
0
  size_t dist;
153
0
  size_t offset;
154
0
  size_t matchlen;
155
0
  size_t backward;
156
0
  score_t score;
157
0
  len = item & 0x1F;
158
0
  dist = item >> 5;
159
0
  offset = dictionary->offsets_by_length[len] + len * dist;
160
0
  if (len > max_length) {
161
0
    return BROTLI_FALSE;
162
0
  }
163
164
0
  matchlen =
165
0
      FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
166
0
  if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
167
0
    return BROTLI_FALSE;
168
0
  }
169
0
  {
170
0
    size_t cut = len - matchlen;
171
0
    size_t transform_id =
172
0
        (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
173
0
    backward = max_backward + dist + 1 +
174
0
        (transform_id << dictionary->size_bits_by_length[len]);
175
0
  }
176
0
  if (backward >= BROTLI_MAX_DISTANCE) {
177
0
    return BROTLI_FALSE;
178
0
  }
179
0
  score = BackwardReferenceScore(matchlen, backward);
180
0
  if (score < out->score) {
181
0
    return BROTLI_FALSE;
182
0
  }
183
0
  out->len = matchlen;
184
0
  out->len_code_delta = (int)len - (int)matchlen;
185
0
  out->distance = backward;
186
0
  out->score = score;
187
0
  return BROTLI_TRUE;
188
0
}
Unexecuted instantiation: backward_references.c:TestStaticDictionaryItem
Unexecuted instantiation: backward_references_hq.c:TestStaticDictionaryItem
Unexecuted instantiation: encode.c:TestStaticDictionaryItem
189
190
static BROTLI_INLINE void SearchInStaticDictionary(
191
    const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
192
    HasherHandle handle, const uint8_t* data, size_t max_length,
193
0
    size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
194
0
  size_t key;
195
0
  size_t i;
196
0
  HasherCommon* self = GetHasherCommon(handle);
197
0
  if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
198
0
    return;
199
0
  }
200
0
  key = Hash14(data) << 1;
201
0
  for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
202
0
    size_t item = dictionary_hash[key];
203
0
    self->dict_num_lookups++;
204
0
    if (item != 0) {
205
0
      BROTLI_BOOL item_matches = TestStaticDictionaryItem(
206
0
          dictionary, item, data, max_length, max_backward, out);
207
0
      if (item_matches) {
208
0
        self->dict_num_matches++;
209
0
      }
210
0
    }
211
0
  }
212
0
}
Unexecuted instantiation: backward_references.c:SearchInStaticDictionary
Unexecuted instantiation: backward_references_hq.c:SearchInStaticDictionary
Unexecuted instantiation: encode.c:SearchInStaticDictionary
213
214
typedef struct BackwardMatch {
215
  uint32_t distance;
216
  uint32_t length_and_code;
217
} BackwardMatch;
218
219
static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
220
0
    size_t dist, size_t len) {
221
0
  self->distance = (uint32_t)dist;
222
0
  self->length_and_code = (uint32_t)(len << 5);
223
0
}
Unexecuted instantiation: backward_references.c:InitBackwardMatch
Unexecuted instantiation: backward_references_hq.c:InitBackwardMatch
Unexecuted instantiation: encode.c:InitBackwardMatch
224
225
static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
226
0
    size_t dist, size_t len, size_t len_code) {
227
0
  self->distance = (uint32_t)dist;
228
0
  self->length_and_code =
229
0
      (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
230
0
}
Unexecuted instantiation: backward_references.c:InitDictionaryBackwardMatch
Unexecuted instantiation: backward_references_hq.c:InitDictionaryBackwardMatch
Unexecuted instantiation: encode.c:InitDictionaryBackwardMatch
231
232
0
static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
233
0
  return self->length_and_code >> 5;
234
0
}
Unexecuted instantiation: backward_references.c:BackwardMatchLength
Unexecuted instantiation: backward_references_hq.c:BackwardMatchLength
Unexecuted instantiation: encode.c:BackwardMatchLength
235
236
0
static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
237
0
  size_t code = self->length_and_code & 31;
238
0
  return code ? code : BackwardMatchLength(self);
239
0
}
Unexecuted instantiation: backward_references.c:BackwardMatchLengthCode
Unexecuted instantiation: backward_references_hq.c:BackwardMatchLengthCode
Unexecuted instantiation: encode.c:BackwardMatchLengthCode
240
241
0
#define EXPAND_CAT(a, b) CAT(a, b)
242
0
#define CAT(a, b) a ## b
243
0
#define FN(X) EXPAND_CAT(X, HASHER())
244
245
0
#define HASHER() H10
246
0
#define BUCKET_BITS 17
247
0
#define MAX_TREE_SEARCH_DEPTH 64
248
0
#define MAX_TREE_COMP_LENGTH 128
249
#include "./hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
250
#undef MAX_TREE_SEARCH_DEPTH
251
#undef MAX_TREE_COMP_LENGTH
252
#undef BUCKET_BITS
253
#undef HASHER
254
/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
255
#define MAX_NUM_MATCHES_H10 128
256
257
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
258
   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
259
   and HTML inputs. */
260
261
0
#define HASHER() H2
262
0
#define BUCKET_BITS 16
263
0
#define BUCKET_SWEEP 1
264
0
#define HASH_LEN 5
265
0
#define USE_DICTIONARY 1
266
#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
267
#undef BUCKET_SWEEP
268
#undef USE_DICTIONARY
269
#undef HASHER
270
271
0
#define HASHER() H3
272
0
#define BUCKET_SWEEP 2
273
0
#define USE_DICTIONARY 0
274
#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
275
#undef USE_DICTIONARY
276
#undef BUCKET_SWEEP
277
#undef BUCKET_BITS
278
#undef HASHER
279
280
0
#define HASHER() H4
281
0
#define BUCKET_BITS 17
282
0
#define BUCKET_SWEEP 4
283
0
#define USE_DICTIONARY 1
284
#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
285
#undef USE_DICTIONARY
286
#undef HASH_LEN
287
#undef BUCKET_SWEEP
288
#undef BUCKET_BITS
289
#undef HASHER
290
291
0
#define HASHER() H5
292
#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
293
#undef HASHER
294
295
0
#define HASHER() H6
296
#include "./hash_longest_match64_inc.h"  /* NOLINT(build/include) */
297
#undef HASHER
298
299
0
#define BUCKET_BITS 15
300
301
0
#define NUM_LAST_DISTANCES_TO_CHECK 4
302
0
#define NUM_BANKS 1
303
0
#define BANK_BITS 16
304
0
#define HASHER() H40
305
#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
306
#undef HASHER
307
#undef NUM_LAST_DISTANCES_TO_CHECK
308
309
0
#define NUM_LAST_DISTANCES_TO_CHECK 10
310
0
#define HASHER() H41
311
#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
312
#undef HASHER
313
#undef NUM_LAST_DISTANCES_TO_CHECK
314
#undef NUM_BANKS
315
#undef BANK_BITS
316
317
0
#define NUM_LAST_DISTANCES_TO_CHECK 16
318
0
#define NUM_BANKS 512
319
0
#define BANK_BITS 9
320
0
#define HASHER() H42
321
#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
322
#undef HASHER
323
#undef NUM_LAST_DISTANCES_TO_CHECK
324
#undef NUM_BANKS
325
#undef BANK_BITS
326
327
#undef BUCKET_BITS
328
329
0
#define HASHER() H54
330
0
#define BUCKET_BITS 20
331
0
#define BUCKET_SWEEP 4
332
0
#define HASH_LEN 7
333
0
#define USE_DICTIONARY 0
334
#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
335
#undef USE_DICTIONARY
336
#undef HASH_LEN
337
#undef BUCKET_SWEEP
338
#undef BUCKET_BITS
339
#undef HASHER
340
341
#undef FN
342
#undef CAT
343
#undef EXPAND_CAT
344
345
0
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
346
0
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
347
348
static BROTLI_INLINE void DestroyHasher(
349
0
    MemoryManager* m, HasherHandle* handle) {
350
0
  if (*handle == NULL) return;
351
0
  BROTLI_FREE(m, *handle);
352
0
}
Unexecuted instantiation: backward_references.c:DestroyHasher
Unexecuted instantiation: backward_references_hq.c:DestroyHasher
Unexecuted instantiation: encode.c:DestroyHasher
353
354
0
static BROTLI_INLINE void HasherReset(HasherHandle handle) {
355
0
  if (handle == NULL) return;
356
0
  GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
357
0
}
Unexecuted instantiation: backward_references.c:HasherReset
Unexecuted instantiation: backward_references_hq.c:HasherReset
Unexecuted instantiation: encode.c:HasherReset
358
359
static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
360
0
    BROTLI_BOOL one_shot, const size_t input_size) {
361
0
  size_t result = sizeof(HasherCommon);
362
0
  switch (params->hasher.type) {
363
0
#define SIZE_(N)                                                         \
364
0
    case N:                                                              \
365
0
      result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
366
0
      break;
367
0
    FOR_ALL_HASHERS(SIZE_)
368
0
#undef SIZE_
369
0
    default:
370
0
      break;
371
0
  }
372
0
  return result;
373
0
}
Unexecuted instantiation: backward_references.c:HasherSize
Unexecuted instantiation: backward_references_hq.c:HasherSize
Unexecuted instantiation: encode.c:HasherSize
374
375
static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
376
    BrotliEncoderParams* params, const uint8_t* data, size_t position,
377
0
    size_t input_size, BROTLI_BOOL is_last) {
378
0
  HasherHandle self = NULL;
379
0
  HasherCommon* common = NULL;
380
0
  BROTLI_BOOL one_shot = (position == 0 && is_last);
381
0
  if (*handle == NULL) {
382
0
    size_t alloc_size;
383
0
    ChooseHasher(params, &params->hasher);
384
0
    alloc_size = HasherSize(params, one_shot, input_size);
385
0
    self = BROTLI_ALLOC(m, uint8_t, alloc_size);
386
0
    if (BROTLI_IS_OOM(m)) return;
387
0
    *handle = self;
388
0
    common = GetHasherCommon(self);
389
0
    common->params = params->hasher;
390
0
    switch (common->params.type) {
391
0
#define INITIALIZE_(N)                     \
392
0
      case N:                              \
393
0
        InitializeH ## N(*handle, params); \
394
0
        break;
395
0
      FOR_ALL_HASHERS(INITIALIZE_);
396
0
#undef INITIALIZE_
397
0
      default:
398
0
        break;
399
0
    }
400
0
    HasherReset(*handle);
401
0
  }
402
403
0
  self = *handle;
404
0
  common = GetHasherCommon(self);
405
0
  if (!common->is_prepared_) {
406
0
    switch (common->params.type) {
407
0
#define PREPARE_(N)                                      \
408
0
      case N:                                            \
409
0
        PrepareH ## N(self, one_shot, input_size, data); \
410
0
        break;
411
0
      FOR_ALL_HASHERS(PREPARE_)
412
0
#undef PREPARE_
413
0
      default: break;
414
0
    }
415
0
    if (position == 0) {
416
0
        common->dict_num_lookups = 0;
417
0
        common->dict_num_matches = 0;
418
0
    }
419
0
    common->is_prepared_ = BROTLI_TRUE;
420
0
  }
421
0
}
Unexecuted instantiation: backward_references.c:HasherSetup
Unexecuted instantiation: backward_references_hq.c:HasherSetup
Unexecuted instantiation: encode.c:HasherSetup
422
423
static BROTLI_INLINE void InitOrStitchToPreviousBlock(
424
    MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
425
    BrotliEncoderParams* params, size_t position, size_t input_size,
426
0
    BROTLI_BOOL is_last) {
427
0
  HasherHandle self;
428
0
  HasherSetup(m, handle, params, data, position, input_size, is_last);
429
0
  if (BROTLI_IS_OOM(m)) return;
430
0
  self = *handle;
431
0
  switch (GetHasherCommon(self)->params.type) {
432
0
#define INIT_(N)                                                           \
433
0
    case N:                                                                \
434
0
      StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
435
0
    break;
436
0
    FOR_ALL_HASHERS(INIT_)
437
0
#undef INIT_
438
0
    default: break;
439
0
  }
440
0
}
Unexecuted instantiation: backward_references.c:InitOrStitchToPreviousBlock
Unexecuted instantiation: backward_references_hq.c:InitOrStitchToPreviousBlock
Unexecuted instantiation: encode.c:InitOrStitchToPreviousBlock
441
442
#if defined(__cplusplus) || defined(c_plusplus)
443
}  /* extern "C" */
444
#endif
445
446
#endif  /* BROTLI_ENC_HASH_H_ */