Coverage Report

Created: 2025-11-15 07:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/third_party/brotli/common/transform.cpp
Line
Count
Source
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
#include "transform.h"
8
9
using namespace duckdb_brotli;
10
11
/* RFC 7932 transforms string data */
12
static const char kPrefixSuffix[217] =
13
      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
14
/* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
15
      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
16
/* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
17
      "that \1\'\6 with \6 from \4 by \1(\6. T"
18
/* 4x       _5_ _7      _E      _5    _A _C */
19
      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
20
/* 6x     _3    _8    _D    _2    _7_ _ _A _C */
21
      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
22
/* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
23
      " not \3er \3al \4ful \4ive \5less \4es"
24
/* Ax       _5   _9   _D    _2    _7     _D */
25
      "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
26
/* Cx    _2    _7___ ___ _A    _F     _5        _8 */
27
28
static const uint16_t kPrefixSuffixMap[50] = {
29
  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
30
  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
31
  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
32
  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
33
  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
34
};
35
36
/* RFC 7932 transforms */
37
static const uint8_t kTransformsData[] = {
38
  49, BROTLI_TRANSFORM_IDENTITY, 49,
39
  49, BROTLI_TRANSFORM_IDENTITY, 0,
40
   0, BROTLI_TRANSFORM_IDENTITY, 0,
41
  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
42
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
43
  49, BROTLI_TRANSFORM_IDENTITY, 47,
44
   0, BROTLI_TRANSFORM_IDENTITY, 49,
45
   4, BROTLI_TRANSFORM_IDENTITY, 0,
46
  49, BROTLI_TRANSFORM_IDENTITY, 3,
47
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
48
  49, BROTLI_TRANSFORM_IDENTITY, 6,
49
  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
50
  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
51
   1, BROTLI_TRANSFORM_IDENTITY, 0,
52
  49, BROTLI_TRANSFORM_IDENTITY, 1,
53
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
54
  49, BROTLI_TRANSFORM_IDENTITY, 7,
55
  49, BROTLI_TRANSFORM_IDENTITY, 9,
56
  48, BROTLI_TRANSFORM_IDENTITY, 0,
57
  49, BROTLI_TRANSFORM_IDENTITY, 8,
58
  49, BROTLI_TRANSFORM_IDENTITY, 5,
59
  49, BROTLI_TRANSFORM_IDENTITY, 10,
60
  49, BROTLI_TRANSFORM_IDENTITY, 11,
61
  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
62
  49, BROTLI_TRANSFORM_IDENTITY, 13,
63
  49, BROTLI_TRANSFORM_IDENTITY, 14,
64
  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
65
  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
66
  49, BROTLI_TRANSFORM_IDENTITY, 15,
67
  49, BROTLI_TRANSFORM_IDENTITY, 16,
68
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
69
  49, BROTLI_TRANSFORM_IDENTITY, 12,
70
   5, BROTLI_TRANSFORM_IDENTITY, 49,
71
   0, BROTLI_TRANSFORM_IDENTITY, 1,
72
  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
73
  49, BROTLI_TRANSFORM_IDENTITY, 18,
74
  49, BROTLI_TRANSFORM_IDENTITY, 17,
75
  49, BROTLI_TRANSFORM_IDENTITY, 19,
76
  49, BROTLI_TRANSFORM_IDENTITY, 20,
77
  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
78
  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
79
  47, BROTLI_TRANSFORM_IDENTITY, 49,
80
  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
81
  49, BROTLI_TRANSFORM_IDENTITY, 22,
82
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
83
  49, BROTLI_TRANSFORM_IDENTITY, 23,
84
  49, BROTLI_TRANSFORM_IDENTITY, 24,
85
  49, BROTLI_TRANSFORM_IDENTITY, 25,
86
  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
87
  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
88
  49, BROTLI_TRANSFORM_IDENTITY, 27,
89
  49, BROTLI_TRANSFORM_IDENTITY, 28,
90
   0, BROTLI_TRANSFORM_IDENTITY, 12,
91
  49, BROTLI_TRANSFORM_IDENTITY, 29,
92
  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
93
  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
94
  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
95
  49, BROTLI_TRANSFORM_IDENTITY, 21,
96
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
97
  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
98
  49, BROTLI_TRANSFORM_IDENTITY, 31,
99
  49, BROTLI_TRANSFORM_IDENTITY, 32,
100
  47, BROTLI_TRANSFORM_IDENTITY, 3,
101
  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
102
  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
103
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
104
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
105
   5, BROTLI_TRANSFORM_IDENTITY, 21,
106
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
107
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
108
  49, BROTLI_TRANSFORM_IDENTITY, 30,
109
   0, BROTLI_TRANSFORM_IDENTITY, 5,
110
  35, BROTLI_TRANSFORM_IDENTITY, 49,
111
  47, BROTLI_TRANSFORM_IDENTITY, 2,
112
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
113
  49, BROTLI_TRANSFORM_IDENTITY, 36,
114
  49, BROTLI_TRANSFORM_IDENTITY, 33,
115
   5, BROTLI_TRANSFORM_IDENTITY, 0,
116
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
117
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
118
  49, BROTLI_TRANSFORM_IDENTITY, 37,
119
   0, BROTLI_TRANSFORM_IDENTITY, 30,
120
  49, BROTLI_TRANSFORM_IDENTITY, 38,
121
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
122
  49, BROTLI_TRANSFORM_IDENTITY, 39,
123
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
124
  49, BROTLI_TRANSFORM_IDENTITY, 34,
125
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
126
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
127
   0, BROTLI_TRANSFORM_IDENTITY, 21,
128
  49, BROTLI_TRANSFORM_IDENTITY, 40,
129
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
130
  49, BROTLI_TRANSFORM_IDENTITY, 41,
131
  49, BROTLI_TRANSFORM_IDENTITY, 42,
132
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
133
  49, BROTLI_TRANSFORM_IDENTITY, 43,
134
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
135
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
136
   0, BROTLI_TRANSFORM_IDENTITY, 34,
137
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
138
  49, BROTLI_TRANSFORM_IDENTITY, 44,
139
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
140
  45, BROTLI_TRANSFORM_IDENTITY, 49,
141
   0, BROTLI_TRANSFORM_IDENTITY, 33,
142
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
143
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
144
  49, BROTLI_TRANSFORM_IDENTITY, 46,
145
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
146
  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
147
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
148
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
149
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
150
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
151
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
152
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
153
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
154
  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
155
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
156
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
157
   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
158
   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
159
};
160
161
static const BrotliTransforms kBrotliTransforms = {
162
  sizeof(kPrefixSuffix),
163
  (const uint8_t*)kPrefixSuffix,
164
  kPrefixSuffixMap,
165
  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
166
  kTransformsData,
167
  NULL,  /* no extra parameters */
168
  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
169
};
170
171
0
const BrotliTransforms* duckdb_brotli::BrotliGetTransforms(void) {
172
0
  return &kBrotliTransforms;
173
0
}
174
175
0
static int ToUpperCase(uint8_t* p) {
176
0
  if (p[0] < 0xC0) {
177
0
    if (p[0] >= 'a' && p[0] <= 'z') {
178
0
      p[0] ^= 32;
179
0
    }
180
0
    return 1;
181
0
  }
182
  /* An overly simplified uppercasing model for UTF-8. */
183
0
  if (p[0] < 0xE0) {
184
0
    p[1] ^= 32;
185
0
    return 2;
186
0
  }
187
  /* An arbitrary transform for three byte characters. */
188
0
  p[2] ^= 5;
189
0
  return 3;
190
0
}
191
192
0
static int Shift(uint8_t* word, int word_len, uint16_t parameter) {
193
  /* Limited sign extension: scalar < (1 << 24). */
194
0
  uint32_t scalar =
195
0
      (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
196
0
  if (word[0] < 0x80) {
197
    /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
198
0
    scalar += (uint32_t)word[0];
199
0
    word[0] = (uint8_t)(scalar & 0x7Fu);
200
0
    return 1;
201
0
  } else if (word[0] < 0xC0) {
202
    /* Continuation / 10AAAAAA. */
203
0
    return 1;
204
0
  } else if (word[0] < 0xE0) {
205
    /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
206
0
    if (word_len < 2) return 1;
207
0
    scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
208
0
    word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
209
0
    word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
210
0
    return 2;
211
0
  } else if (word[0] < 0xF0) {
212
    /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
213
0
    if (word_len < 3) return word_len;
214
0
    scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
215
0
        ((word[0] & 0x0Fu) << 12u));
216
0
    word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
217
0
    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
218
0
    word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
219
0
    return 3;
220
0
  } else if (word[0] < 0xF8) {
221
    /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
222
0
    if (word_len < 4) return word_len;
223
0
    scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
224
0
        ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
225
0
    word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
226
0
    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
227
0
    word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
228
0
    word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
229
0
    return 4;
230
0
  }
231
0
  return 1;
232
0
}
233
234
int duckdb_brotli::BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
235
0
    const BrotliTransforms* transforms, int transform_idx) {
236
0
  int idx = 0;
237
0
  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
238
0
  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
239
0
  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
240
0
  {
241
0
    int prefix_len = *prefix++;
242
0
    while (prefix_len--) { dst[idx++] = *prefix++; }
243
0
  }
244
0
  {
245
0
    const int t = type;
246
0
    int i = 0;
247
0
    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
248
0
      len -= t;
249
0
    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
250
0
        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
251
0
      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
252
0
      word += skip;
253
0
      len -= skip;
254
0
    }
255
0
    while (i < len) { dst[idx++] = word[i++]; }
256
0
    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
257
0
      ToUpperCase(&dst[idx - len]);
258
0
    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
259
0
      uint8_t* uppercase = &dst[idx - len];
260
0
      while (len > 0) {
261
0
        int step = ToUpperCase(uppercase);
262
0
        uppercase += step;
263
0
        len -= step;
264
0
      }
265
0
    } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
266
0
      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
267
0
          + (transforms->params[transform_idx * 2 + 1] << 8u));
268
0
      Shift(&dst[idx - len], len, param);
269
0
    } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
270
0
      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
271
0
          + (transforms->params[transform_idx * 2 + 1] << 8u));
272
0
      uint8_t* shift = &dst[idx - len];
273
0
      while (len > 0) {
274
0
        int step = Shift(shift, len, param);
275
0
        shift += step;
276
0
        len -= step;
277
0
      }
278
0
    }
279
0
  }
280
0
  {
281
0
    int suffix_len = *suffix++;
282
0
    while (suffix_len--) { dst[idx++] = *suffix++; }
283
0
    return idx;
284
0
  }
285
0
}
286
287