Coverage Report

Created: 2025-06-12 07:25

/src/duckdb/third_party/brotli/enc/static_dict.cpp
Line
Count
Source (jump to first uncovered line)
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
#include "static_dict.h"
8
9
#include "../common/dictionary.h"
10
#include "../common/brotli_platform.h"
11
#include "../common/transform.h"
12
#include "encoder_dict.h"
13
#include "find_match_length.h"
14
15
using namespace duckdb_brotli;
16
17
0
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
18
0
  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
19
  /* The higher bits contain more mixture from the multiplication,
20
     so we take our results from there. */
21
0
  return h >> (32 - kDictNumBits);
22
0
}
23
24
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
25
0
                                   uint32_t* matches) {
26
0
  uint32_t match = (uint32_t)((distance << 5) + len_code);
27
0
  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
28
0
}
29
30
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
31
                                            const uint8_t* data,
32
                                            size_t id,
33
                                            size_t len,
34
0
                                            size_t maxlen) {
35
0
  const size_t offset = dictionary->offsets_by_length[len] + len * id;
36
0
  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
37
0
                                  BROTLI_MIN(size_t, len, maxlen));
38
0
}
39
40
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
41
0
    DictWord w, const uint8_t* data, size_t max_length) {
42
0
  if (w.len > max_length) {
43
0
    return BROTLI_FALSE;
44
0
  } else {
45
0
    const size_t offset = dictionary->offsets_by_length[w.len] +
46
0
        (size_t)w.len * (size_t)w.idx;
47
0
    const uint8_t* dict = &dictionary->data[offset];
48
0
    if (w.transform == 0) {
49
      /* Match against base dictionary word. */
50
0
      return
51
0
          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
52
0
    } else if (w.transform == 10) {
53
      /* Match against uppercase first transform.
54
         Note that there are only ASCII uppercase words in the lookup table. */
55
0
      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
56
0
              (dict[0] ^ 32) == data[0] &&
57
0
              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
58
0
              w.len - 1u);
59
0
    } else {
60
      /* Match against uppercase all transform.
61
         Note that there are only ASCII uppercase words in the lookup table. */
62
0
      size_t i;
63
0
      for (i = 0; i < w.len; ++i) {
64
0
        if (dict[i] >= 'a' && dict[i] <= 'z') {
65
0
          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
66
0
        } else {
67
0
          if (dict[i] != data[i]) return BROTLI_FALSE;
68
0
        }
69
0
      }
70
0
      return BROTLI_TRUE;
71
0
    }
72
0
  }
73
0
}
74
75
/* Finds matches for a single static dictionary */
76
static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
77
    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
78
0
    size_t min_length, size_t max_length, uint32_t* matches) {
79
0
  BROTLI_BOOL has_found_match = BROTLI_FALSE;
80
#if defined(BROTLI_EXPERIMENTAL)
81
  if (dictionary->has_words_heavy) {
82
    const BrotliTrieNode* node = &dictionary->trie.root;
83
    size_t l = 0;
84
    while (node && l < max_length) {
85
      uint8_t c;
86
      if (l >= min_length && node->len_) {
87
        AddMatch(node->idx_, l, node->len_, matches);
88
        has_found_match = BROTLI_TRUE;
89
      }
90
      c = data[l++];
91
      node = BrotliTrieSub(&dictionary->trie, node, c);
92
    }
93
    return has_found_match;
94
  }
95
#endif  /* BROTLI_EXPERIMENTAL */
96
0
  {
97
0
    size_t offset = dictionary->buckets[Hash(data)];
98
0
    BROTLI_BOOL end = !offset;
99
0
    while (!end) {
100
0
      DictWord w = dictionary->dict_words[offset++];
101
0
      const size_t l = w.len & 0x1F;
102
0
      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
103
0
      const size_t id = w.idx;
104
0
      end = !!(w.len & 0x80);
105
0
      w.len = (uint8_t)l;
106
0
      if (w.transform == 0) {
107
0
        const size_t matchlen =
108
0
            DictMatchLength(dictionary->words, data, id, l, max_length);
109
0
        const uint8_t* s;
110
0
        size_t minlen;
111
0
        size_t maxlen;
112
0
        size_t len;
113
        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
114
0
        if (matchlen == l) {
115
0
          AddMatch(id, l, l, matches);
116
0
          has_found_match = BROTLI_TRUE;
117
0
        }
118
        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
119
                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
120
0
        if (matchlen >= l - 1) {
121
0
          AddMatch(id + 12 * n, l - 1, l, matches);
122
0
          if (l + 2 < max_length &&
123
0
              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
124
0
              data[l + 2] == ' ') {
125
0
            AddMatch(id + 49 * n, l + 3, l, matches);
126
0
          }
127
0
          has_found_match = BROTLI_TRUE;
128
0
        }
129
        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
130
0
        minlen = min_length;
131
0
        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
132
0
        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
133
0
        for (len = minlen; len <= maxlen; ++len) {
134
0
          size_t cut = l - len;
135
0
          size_t transform_id = (cut << 2) +
136
0
              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
137
0
          AddMatch(id + transform_id * n, len, l, matches);
138
0
          has_found_match = BROTLI_TRUE;
139
0
        }
140
0
        if (matchlen < l || l + 6 >= max_length) {
141
0
          continue;
142
0
        }
143
0
        s = &data[l];
144
        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
145
0
        if (s[0] == ' ') {
146
0
          AddMatch(id + n, l + 1, l, matches);
147
0
          if (s[1] == 'a') {
148
0
            if (s[2] == ' ') {
149
0
              AddMatch(id + 28 * n, l + 3, l, matches);
150
0
            } else if (s[2] == 's') {
151
0
              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
152
0
            } else if (s[2] == 't') {
153
0
              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
154
0
            } else if (s[2] == 'n') {
155
0
              if (s[3] == 'd' && s[4] == ' ') {
156
0
                AddMatch(id + 10 * n, l + 5, l, matches);
157
0
              }
158
0
            }
159
0
          } else if (s[1] == 'b') {
160
0
            if (s[2] == 'y' && s[3] == ' ') {
161
0
              AddMatch(id + 38 * n, l + 4, l, matches);
162
0
            }
163
0
          } else if (s[1] == 'i') {
164
0
            if (s[2] == 'n') {
165
0
              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
166
0
            } else if (s[2] == 's') {
167
0
              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
168
0
            }
169
0
          } else if (s[1] == 'f') {
170
0
            if (s[2] == 'o') {
171
0
              if (s[3] == 'r' && s[4] == ' ') {
172
0
                AddMatch(id + 25 * n, l + 5, l, matches);
173
0
              }
174
0
            } else if (s[2] == 'r') {
175
0
              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
176
0
                AddMatch(id + 37 * n, l + 6, l, matches);
177
0
              }
178
0
            }
179
0
          } else if (s[1] == 'o') {
180
0
            if (s[2] == 'f') {
181
0
              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
182
0
            } else if (s[2] == 'n') {
183
0
              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
184
0
            }
185
0
          } else if (s[1] == 'n') {
186
0
            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
187
0
              AddMatch(id + 80 * n, l + 5, l, matches);
188
0
            }
189
0
          } else if (s[1] == 't') {
190
0
            if (s[2] == 'h') {
191
0
              if (s[3] == 'e') {
192
0
                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
193
0
              } else if (s[3] == 'a') {
194
0
                if (s[4] == 't' && s[5] == ' ') {
195
0
                  AddMatch(id + 29 * n, l + 6, l, matches);
196
0
                }
197
0
              }
198
0
            } else if (s[2] == 'o') {
199
0
              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
200
0
            }
201
0
          } else if (s[1] == 'w') {
202
0
            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
203
0
              AddMatch(id + 35 * n, l + 6, l, matches);
204
0
            }
205
0
          }
206
0
        } else if (s[0] == '"') {
207
0
          AddMatch(id + 19 * n, l + 1, l, matches);
208
0
          if (s[1] == '>') {
209
0
            AddMatch(id + 21 * n, l + 2, l, matches);
210
0
          }
211
0
        } else if (s[0] == '.') {
212
0
          AddMatch(id + 20 * n, l + 1, l, matches);
213
0
          if (s[1] == ' ') {
214
0
            AddMatch(id + 31 * n, l + 2, l, matches);
215
0
            if (s[2] == 'T' && s[3] == 'h') {
216
0
              if (s[4] == 'e') {
217
0
                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
218
0
              } else if (s[4] == 'i') {
219
0
                if (s[5] == 's' && s[6] == ' ') {
220
0
                  AddMatch(id + 75 * n, l + 7, l, matches);
221
0
                }
222
0
              }
223
0
            }
224
0
          }
225
0
        } else if (s[0] == ',') {
226
0
          AddMatch(id + 76 * n, l + 1, l, matches);
227
0
          if (s[1] == ' ') {
228
0
            AddMatch(id + 14 * n, l + 2, l, matches);
229
0
          }
230
0
        } else if (s[0] == '\n') {
231
0
          AddMatch(id + 22 * n, l + 1, l, matches);
232
0
          if (s[1] == '\t') {
233
0
            AddMatch(id + 50 * n, l + 2, l, matches);
234
0
          }
235
0
        } else if (s[0] == ']') {
236
0
          AddMatch(id + 24 * n, l + 1, l, matches);
237
0
        } else if (s[0] == '\'') {
238
0
          AddMatch(id + 36 * n, l + 1, l, matches);
239
0
        } else if (s[0] == ':') {
240
0
          AddMatch(id + 51 * n, l + 1, l, matches);
241
0
        } else if (s[0] == '(') {
242
0
          AddMatch(id + 57 * n, l + 1, l, matches);
243
0
        } else if (s[0] == '=') {
244
0
          if (s[1] == '"') {
245
0
            AddMatch(id + 70 * n, l + 2, l, matches);
246
0
          } else if (s[1] == '\'') {
247
0
            AddMatch(id + 86 * n, l + 2, l, matches);
248
0
          }
249
0
        } else if (s[0] == 'a') {
250
0
          if (s[1] == 'l' && s[2] == ' ') {
251
0
            AddMatch(id + 84 * n, l + 3, l, matches);
252
0
          }
253
0
        } else if (s[0] == 'e') {
254
0
          if (s[1] == 'd') {
255
0
            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
256
0
          } else if (s[1] == 'r') {
257
0
            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
258
0
          } else if (s[1] == 's') {
259
0
            if (s[2] == 't' && s[3] == ' ') {
260
0
              AddMatch(id + 95 * n, l + 4, l, matches);
261
0
            }
262
0
          }
263
0
        } else if (s[0] == 'f') {
264
0
          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
265
0
            AddMatch(id + 90 * n, l + 4, l, matches);
266
0
          }
267
0
        } else if (s[0] == 'i') {
268
0
          if (s[1] == 'v') {
269
0
            if (s[2] == 'e' && s[3] == ' ') {
270
0
              AddMatch(id + 92 * n, l + 4, l, matches);
271
0
            }
272
0
          } else if (s[1] == 'z') {
273
0
            if (s[2] == 'e' && s[3] == ' ') {
274
0
              AddMatch(id + 100 * n, l + 4, l, matches);
275
0
            }
276
0
          }
277
0
        } else if (s[0] == 'l') {
278
0
          if (s[1] == 'e') {
279
0
            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
280
0
              AddMatch(id + 93 * n, l + 5, l, matches);
281
0
            }
282
0
          } else if (s[1] == 'y') {
283
0
            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
284
0
          }
285
0
        } else if (s[0] == 'o') {
286
0
          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
287
0
            AddMatch(id + 106 * n, l + 4, l, matches);
288
0
          }
289
0
        }
290
0
      } else {
291
        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
292
               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
293
           transform. */
294
0
        const BROTLI_BOOL is_all_caps =
295
0
            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
296
0
        const uint8_t* s;
297
0
        if (!IsMatch(dictionary->words, w, data, max_length)) {
298
0
          continue;
299
0
        }
300
        /* Transform "" + kUppercase{First,All} + "" */
301
0
        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
302
0
        has_found_match = BROTLI_TRUE;
303
0
        if (l + 1 >= max_length) {
304
0
          continue;
305
0
        }
306
        /* Transforms "" + kUppercase{First,All} + <suffix> */
307
0
        s = &data[l];
308
0
        if (s[0] == ' ') {
309
0
          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
310
0
        } else if (s[0] == '"') {
311
0
          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
312
0
          if (s[1] == '>') {
313
0
            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
314
0
          }
315
0
        } else if (s[0] == '.') {
316
0
          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
317
0
          if (s[1] == ' ') {
318
0
            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
319
0
          }
320
0
        } else if (s[0] == ',') {
321
0
          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
322
0
          if (s[1] == ' ') {
323
0
            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
324
0
          }
325
0
        } else if (s[0] == '\'') {
326
0
          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
327
0
        } else if (s[0] == '(') {
328
0
          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
329
0
        } else if (s[0] == '=') {
330
0
          if (s[1] == '"') {
331
0
            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
332
0
          } else if (s[1] == '\'') {
333
0
            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
334
0
          }
335
0
        }
336
0
      }
337
0
    }
338
0
  }
339
  /* Transforms with prefixes " " and "." */
340
0
  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
341
0
    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
342
0
    size_t offset = dictionary->buckets[Hash(&data[1])];
343
0
    BROTLI_BOOL end = !offset;
344
0
    while (!end) {
345
0
      DictWord w = dictionary->dict_words[offset++];
346
0
      const size_t l = w.len & 0x1F;
347
0
      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
348
0
      const size_t id = w.idx;
349
0
      end = !!(w.len & 0x80);
350
0
      w.len = (uint8_t)l;
351
0
      if (w.transform == 0) {
352
0
        const uint8_t* s;
353
0
        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
354
0
          continue;
355
0
        }
356
        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
357
                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
358
0
        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
359
0
        has_found_match = BROTLI_TRUE;
360
0
        if (l + 2 >= max_length) {
361
0
          continue;
362
0
        }
363
        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
364
                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
365
        */
366
0
        s = &data[l + 1];
367
0
        if (s[0] == ' ') {
368
0
          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
369
0
        } else if (s[0] == '(') {
370
0
          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
371
0
        } else if (is_space) {
372
0
          if (s[0] == ',') {
373
0
            AddMatch(id + 103 * n, l + 2, l, matches);
374
0
            if (s[1] == ' ') {
375
0
              AddMatch(id + 33 * n, l + 3, l, matches);
376
0
            }
377
0
          } else if (s[0] == '.') {
378
0
            AddMatch(id + 71 * n, l + 2, l, matches);
379
0
            if (s[1] == ' ') {
380
0
              AddMatch(id + 52 * n, l + 3, l, matches);
381
0
            }
382
0
          } else if (s[0] == '=') {
383
0
            if (s[1] == '"') {
384
0
              AddMatch(id + 81 * n, l + 3, l, matches);
385
0
            } else if (s[1] == '\'') {
386
0
              AddMatch(id + 98 * n, l + 3, l, matches);
387
0
            }
388
0
          }
389
0
        }
390
0
      } else if (is_space) {
391
        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
392
               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
393
           transform. */
394
0
        const BROTLI_BOOL is_all_caps =
395
0
            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
396
0
        const uint8_t* s;
397
0
        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
398
0
          continue;
399
0
        }
400
        /* Transforms " " + kUppercase{First,All} + "" */
401
0
        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
402
0
        has_found_match = BROTLI_TRUE;
403
0
        if (l + 2 >= max_length) {
404
0
          continue;
405
0
        }
406
        /* Transforms " " + kUppercase{First,All} + <suffix> */
407
0
        s = &data[l + 1];
408
0
        if (s[0] == ' ') {
409
0
          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
410
0
        } else if (s[0] == ',') {
411
0
          if (!is_all_caps) {
412
0
            AddMatch(id + 109 * n, l + 2, l, matches);
413
0
          }
414
0
          if (s[1] == ' ') {
415
0
            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
416
0
          }
417
0
        } else if (s[0] == '.') {
418
0
          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
419
0
          if (s[1] == ' ') {
420
0
            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
421
0
          }
422
0
        } else if (s[0] == '=') {
423
0
          if (s[1] == '"') {
424
0
            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
425
0
          } else if (s[1] == '\'') {
426
0
            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
427
0
          }
428
0
        }
429
0
      }
430
0
    }
431
0
  }
432
0
  if (max_length >= 6) {
433
    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
434
0
    if ((data[1] == ' ' &&
435
0
         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
436
0
        (data[0] == 0xC2 && data[1] == 0xA0)) {
437
0
      size_t offset = dictionary->buckets[Hash(&data[2])];
438
0
      BROTLI_BOOL end = !offset;
439
0
      while (!end) {
440
0
        DictWord w = dictionary->dict_words[offset++];
441
0
        const size_t l = w.len & 0x1F;
442
0
        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
443
0
        const size_t id = w.idx;
444
0
        end = !!(w.len & 0x80);
445
0
        w.len = (uint8_t)l;
446
0
        if (w.transform == 0 &&
447
0
            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
448
0
          if (data[0] == 0xC2) {
449
0
            AddMatch(id + 102 * n, l + 2, l, matches);
450
0
            has_found_match = BROTLI_TRUE;
451
0
          } else if (l + 2 < max_length && data[l + 2] == ' ') {
452
0
            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
453
0
            AddMatch(id + t * n, l + 3, l, matches);
454
0
            has_found_match = BROTLI_TRUE;
455
0
          }
456
0
        }
457
0
      }
458
0
    }
459
0
  }
460
0
  if (max_length >= 9) {
461
    /* Transforms with prefixes " the " and ".com/" */
462
0
    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
463
0
         data[3] == 'e' && data[4] == ' ') ||
464
0
        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
465
0
         data[3] == 'm' && data[4] == '/')) {
466
0
      size_t offset = dictionary->buckets[Hash(&data[5])];
467
0
      BROTLI_BOOL end = !offset;
468
0
      while (!end) {
469
0
        DictWord w = dictionary->dict_words[offset++];
470
0
        const size_t l = w.len & 0x1F;
471
0
        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
472
0
        const size_t id = w.idx;
473
0
        end = !!(w.len & 0x80);
474
0
        w.len = (uint8_t)l;
475
0
        if (w.transform == 0 &&
476
0
            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
477
0
          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
478
0
          has_found_match = BROTLI_TRUE;
479
0
          if (l + 5 < max_length) {
480
0
            const uint8_t* s = &data[l + 5];
481
0
            if (data[0] == ' ') {
482
0
              if (l + 8 < max_length &&
483
0
                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
484
0
                AddMatch(id + 62 * n, l + 9, l, matches);
485
0
                if (l + 12 < max_length &&
486
0
                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
487
0
                  AddMatch(id + 73 * n, l + 13, l, matches);
488
0
                }
489
0
              }
490
0
            }
491
0
          }
492
0
        }
493
0
      }
494
0
    }
495
0
  }
496
0
  return has_found_match;
497
0
}
498
499
/* Finds matches for one or more dictionaries, if multiple are present
500
   in the contextual dictionary */
501
BROTLI_BOOL duckdb_brotli::BrotliFindAllStaticDictionaryMatches(
502
    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
503
0
    size_t min_length, size_t max_length, uint32_t* matches) {
504
0
  BROTLI_BOOL has_found_match =
505
0
      BrotliFindAllStaticDictionaryMatchesFor(
506
0
          dictionary, data, min_length, max_length, matches);
507
508
0
  if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
509
0
    uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
510
0
    int l;
511
0
    const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
512
0
    if (dictionary2 == dictionary) {
513
0
      dictionary2 = dictionary->parent->dict[1];
514
0
    }
515
516
0
    for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
517
0
      matches2[l] = kInvalidMatch;
518
0
    }
519
520
0
    has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
521
0
        dictionary2, data, min_length, max_length, matches2);
522
523
0
    for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
524
0
      if (matches2[l] != kInvalidMatch) {
525
0
        uint32_t dist = (uint32_t)(matches2[l] >> 5);
526
0
        uint32_t len_code = matches2[l] & 31;
527
0
        uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
528
0
            size_bits_by_length[len_code]) & ~1u) *
529
0
            (uint32_t)dictionary->num_transforms;
530
        /* TODO(lode): check for dist overflow */
531
0
        dist += skipdist;
532
0
        AddMatch(dist, (size_t)l, len_code, matches);
533
0
      }
534
0
    }
535
0
  }
536
0
  return has_found_match;
537
0
}
538