Coverage Report

Created: 2025-10-10 06:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/h2o/deps/brotli/c/dec/transform.h
Line
Count
Source
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Transformations on dictionary words. */
8
9
#ifndef BROTLI_DEC_TRANSFORM_H_
10
#define BROTLI_DEC_TRANSFORM_H_
11
12
#include <brotli/types.h>
13
#include "./port.h"
14
15
#if defined(__cplusplus) || defined(c_plusplus)
16
extern "C" {
17
#endif
18
19
enum WordTransformType {
20
  kIdentity = 0,
21
  kOmitLast1 = 1,
22
  kOmitLast2 = 2,
23
  kOmitLast3 = 3,
24
  kOmitLast4 = 4,
25
  kOmitLast5 = 5,
26
  kOmitLast6 = 6,
27
  kOmitLast7 = 7,
28
  kOmitLast8 = 8,
29
  kOmitLast9 = 9,
30
  kUppercaseFirst = 10,
31
  kUppercaseAll = 11,
32
  kOmitFirst1 = 12,
33
  kOmitFirst2 = 13,
34
  kOmitFirst3 = 14,
35
  kOmitFirst4 = 15,
36
  kOmitFirst5 = 16,
37
  kOmitFirst6 = 17,
38
  kOmitFirst7 = 18,
39
  kOmitFirst8 = 19,
40
  kOmitFirst9 = 20
41
};
42
43
typedef struct {
44
  const uint8_t prefix_id;
45
  const uint8_t transform;
46
  const uint8_t suffix_id;
47
} Transform;
48
49
static const char kPrefixSuffix[208] =
50
    "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
51
    " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
52
    " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
53
    " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
54
55
enum {
56
  /* EMPTY = ""
57
     SP = " "
58
     DQUOT = "\""
59
     SQUOT = "'"
60
     CLOSEBR = "]"
61
     OPEN = "("
62
     SLASH = "/"
63
     NBSP = non-breaking space "\0xc2\xa0"
64
  */
65
  kPFix_EMPTY = 0,
66
  kPFix_SP = 1,
67
  kPFix_COMMASP = 3,
68
  kPFix_SPofSPtheSP = 6,
69
  kPFix_SPtheSP = 9,
70
  kPFix_eSP = 12,
71
  kPFix_SPofSP = 15,
72
  kPFix_sSP = 20,
73
  kPFix_DOT = 23,
74
  kPFix_SPandSP = 25,
75
  kPFix_SPinSP = 31,
76
  kPFix_DQUOT = 36,
77
  kPFix_SPtoSP = 38,
78
  kPFix_DQUOTGT = 43,
79
  kPFix_NEWLINE = 46,
80
  kPFix_DOTSP = 48,
81
  kPFix_CLOSEBR = 51,
82
  kPFix_SPforSP = 53,
83
  kPFix_SPaSP = 59,
84
  kPFix_SPthatSP = 63,
85
  kPFix_SQUOT = 70,
86
  kPFix_SPwithSP = 72,
87
  kPFix_SPfromSP = 79,
88
  kPFix_SPbySP = 86,
89
  kPFix_OPEN = 91,
90
  kPFix_DOTSPTheSP = 93,
91
  kPFix_SPonSP = 100,
92
  kPFix_SPasSP = 105,
93
  kPFix_SPisSP = 110,
94
  kPFix_ingSP = 115,
95
  kPFix_NEWLINETAB = 120,
96
  kPFix_COLON = 123,
97
  kPFix_edSP = 125,
98
  kPFix_EQDQUOT = 129,
99
  kPFix_SPatSP = 132,
100
  kPFix_lySP = 137,
101
  kPFix_COMMA = 141,
102
  kPFix_EQSQUOT = 143,
103
  kPFix_DOTcomSLASH = 146,
104
  kPFix_DOTSPThisSP = 152,
105
  kPFix_SPnotSP = 160,
106
  kPFix_erSP = 166,
107
  kPFix_alSP = 170,
108
  kPFix_fulSP = 174,
109
  kPFix_iveSP = 179,
110
  kPFix_lessSP = 184,
111
  kPFix_estSP = 190,
112
  kPFix_izeSP = 195,
113
  kPFix_NBSP = 200,
114
  kPFix_ousSP = 203
115
};
116
117
static const Transform kTransforms[] = {
118
  { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
119
  { kPFix_EMPTY, kIdentity, kPFix_SP },
120
  { kPFix_SP, kIdentity, kPFix_SP },
121
  { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
122
  { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
123
  { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
124
  { kPFix_SP, kIdentity, kPFix_EMPTY },
125
  { kPFix_sSP, kIdentity, kPFix_SP },
126
  { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
127
  { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
128
  { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
129
  { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
130
  { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
131
  { kPFix_COMMASP, kIdentity, kPFix_SP },
132
  { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
133
  { kPFix_SP, kUppercaseFirst, kPFix_SP },
134
  { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
135
  { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
136
  { kPFix_eSP, kIdentity, kPFix_SP },
137
  { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
138
  { kPFix_EMPTY, kIdentity, kPFix_DOT },
139
  { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
140
  { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
141
  { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
142
  { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
143
  { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
144
  { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
145
  { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
146
  { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
147
  { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
148
  { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
149
  { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
150
  { kPFix_DOT, kIdentity, kPFix_EMPTY },
151
  { kPFix_SP, kIdentity, kPFix_COMMASP },
152
  { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
153
  { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
154
  { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
155
  { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
156
  { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
157
  { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
158
  { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
159
  { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
160
  { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
161
  { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
162
  { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
163
  { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
164
  { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
165
  { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
166
  { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
167
  { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
168
  { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
169
  { kPFix_EMPTY, kIdentity, kPFix_COLON },
170
  { kPFix_SP, kIdentity, kPFix_DOTSP },
171
  { kPFix_EMPTY, kIdentity, kPFix_edSP },
172
  { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
173
  { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
174
  { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
175
  { kPFix_EMPTY, kIdentity, kPFix_OPEN },
176
  { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
177
  { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
178
  { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
179
  { kPFix_EMPTY, kIdentity, kPFix_lySP },
180
  { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
181
  { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
182
  { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
183
  { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
184
  { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
185
  { kPFix_DOT, kIdentity, kPFix_OPEN },
186
  { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
187
  { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
188
  { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
189
  { kPFix_SP, kIdentity, kPFix_DOT },
190
  { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
191
  { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
192
  { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
193
  { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
194
  { kPFix_EMPTY, kIdentity, kPFix_COMMA },
195
  { kPFix_DOT, kIdentity, kPFix_SP },
196
  { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
197
  { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
198
  { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
199
  { kPFix_SP, kIdentity, kPFix_EQDQUOT },
200
  { kPFix_EMPTY, kIdentity, kPFix_erSP },
201
  { kPFix_SP, kUppercaseAll, kPFix_SP },
202
  { kPFix_EMPTY, kIdentity, kPFix_alSP },
203
  { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
204
  { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
205
  { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
206
  { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
207
  { kPFix_SP, kIdentity, kPFix_OPEN },
208
  { kPFix_EMPTY, kIdentity, kPFix_fulSP },
209
  { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
210
  { kPFix_EMPTY, kIdentity, kPFix_iveSP },
211
  { kPFix_EMPTY, kIdentity, kPFix_lessSP },
212
  { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
213
  { kPFix_EMPTY, kIdentity, kPFix_estSP },
214
  { kPFix_SP, kUppercaseFirst, kPFix_DOT },
215
  { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
216
  { kPFix_SP, kIdentity, kPFix_EQSQUOT },
217
  { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
218
  { kPFix_EMPTY, kIdentity, kPFix_izeSP },
219
  { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
220
  { kPFix_NBSP, kIdentity, kPFix_EMPTY },
221
  { kPFix_SP, kIdentity, kPFix_COMMA },
222
  { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
223
  { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
224
  { kPFix_EMPTY, kIdentity, kPFix_ousSP },
225
  { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
226
  { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
227
  { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
228
  { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
229
  { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
230
  { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
231
  { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
232
  { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
233
  { kPFix_SP, kUppercaseAll, kPFix_DOT },
234
  { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
235
  { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
236
  { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
237
  { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
238
  { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
239
};
240
241
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
242
243
0
static int ToUpperCase(uint8_t* p) {
244
0
  if (p[0] < 0xc0) {
245
0
    if (p[0] >= 'a' && p[0] <= 'z') {
246
0
      p[0] ^= 32;
247
0
    }
248
0
    return 1;
249
0
  }
250
  /* An overly simplified uppercasing model for UTF-8. */
251
0
  if (p[0] < 0xe0) {
252
0
    p[1] ^= 32;
253
0
    return 2;
254
0
  }
255
  /* An arbitrary transform for three byte characters. */
256
0
  p[2] ^= 5;
257
0
  return 3;
258
0
}
259
260
static BROTLI_NOINLINE int TransformDictionaryWord(
261
0
    uint8_t* dst, const uint8_t* word, int len, int transform) {
262
0
  int idx = 0;
263
0
  {
264
0
    const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
265
0
    while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
266
0
  }
267
0
  {
268
0
    const int t = kTransforms[transform].transform;
269
0
    int i = 0;
270
0
    int skip = t - (kOmitFirst1 - 1);
271
0
    if (skip > 0) {
272
0
      word += skip;
273
0
      len -= skip;
274
0
    } else if (t <= kOmitLast9) {
275
0
      len -= t;
276
0
    }
277
0
    while (i < len) { dst[idx++] = word[i++]; }
278
0
    if (t == kUppercaseFirst) {
279
0
      ToUpperCase(&dst[idx - len]);
280
0
    } else if (t == kUppercaseAll) {
281
0
      uint8_t* uppercase = &dst[idx - len];
282
0
      while (len > 0) {
283
0
        int step = ToUpperCase(uppercase);
284
0
        uppercase += step;
285
0
        len -= step;
286
0
      }
287
0
    }
288
0
  }
289
0
  {
290
0
    const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
291
0
    while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
292
0
    return idx;
293
0
  }
294
0
}
295
296
#if defined(__cplusplus) || defined(c_plusplus)
297
}  /* extern "C" */
298
#endif
299
300
#endif  /* BROTLI_DEC_TRANSFORM_H_ */