/src/h2o/deps/brotli/c/dec/transform.h
Line | Count | Source |
1 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | /* Transformations on dictionary words. */ |
8 | | |
9 | | #ifndef BROTLI_DEC_TRANSFORM_H_ |
10 | | #define BROTLI_DEC_TRANSFORM_H_ |
11 | | |
12 | | #include <brotli/types.h> |
13 | | #include "./port.h" |
14 | | |
15 | | #if defined(__cplusplus) || defined(c_plusplus) |
16 | | extern "C" { |
17 | | #endif |
18 | | |
19 | | enum WordTransformType { |
20 | | kIdentity = 0, |
21 | | kOmitLast1 = 1, |
22 | | kOmitLast2 = 2, |
23 | | kOmitLast3 = 3, |
24 | | kOmitLast4 = 4, |
25 | | kOmitLast5 = 5, |
26 | | kOmitLast6 = 6, |
27 | | kOmitLast7 = 7, |
28 | | kOmitLast8 = 8, |
29 | | kOmitLast9 = 9, |
30 | | kUppercaseFirst = 10, |
31 | | kUppercaseAll = 11, |
32 | | kOmitFirst1 = 12, |
33 | | kOmitFirst2 = 13, |
34 | | kOmitFirst3 = 14, |
35 | | kOmitFirst4 = 15, |
36 | | kOmitFirst5 = 16, |
37 | | kOmitFirst6 = 17, |
38 | | kOmitFirst7 = 18, |
39 | | kOmitFirst8 = 19, |
40 | | kOmitFirst9 = 20 |
41 | | }; |
42 | | |
43 | | typedef struct { |
44 | | const uint8_t prefix_id; |
45 | | const uint8_t transform; |
46 | | const uint8_t suffix_id; |
47 | | } Transform; |
48 | | |
49 | | static const char kPrefixSuffix[208] = |
50 | | "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" |
51 | | " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" |
52 | | " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" |
53 | | " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; |
54 | | |
55 | | enum { |
56 | | /* EMPTY = "" |
57 | | SP = " " |
58 | | DQUOT = "\"" |
59 | | SQUOT = "'" |
60 | | CLOSEBR = "]" |
61 | | OPEN = "(" |
62 | | SLASH = "/" |
63 | | NBSP = non-breaking space "\0xc2\xa0" |
64 | | */ |
65 | | kPFix_EMPTY = 0, |
66 | | kPFix_SP = 1, |
67 | | kPFix_COMMASP = 3, |
68 | | kPFix_SPofSPtheSP = 6, |
69 | | kPFix_SPtheSP = 9, |
70 | | kPFix_eSP = 12, |
71 | | kPFix_SPofSP = 15, |
72 | | kPFix_sSP = 20, |
73 | | kPFix_DOT = 23, |
74 | | kPFix_SPandSP = 25, |
75 | | kPFix_SPinSP = 31, |
76 | | kPFix_DQUOT = 36, |
77 | | kPFix_SPtoSP = 38, |
78 | | kPFix_DQUOTGT = 43, |
79 | | kPFix_NEWLINE = 46, |
80 | | kPFix_DOTSP = 48, |
81 | | kPFix_CLOSEBR = 51, |
82 | | kPFix_SPforSP = 53, |
83 | | kPFix_SPaSP = 59, |
84 | | kPFix_SPthatSP = 63, |
85 | | kPFix_SQUOT = 70, |
86 | | kPFix_SPwithSP = 72, |
87 | | kPFix_SPfromSP = 79, |
88 | | kPFix_SPbySP = 86, |
89 | | kPFix_OPEN = 91, |
90 | | kPFix_DOTSPTheSP = 93, |
91 | | kPFix_SPonSP = 100, |
92 | | kPFix_SPasSP = 105, |
93 | | kPFix_SPisSP = 110, |
94 | | kPFix_ingSP = 115, |
95 | | kPFix_NEWLINETAB = 120, |
96 | | kPFix_COLON = 123, |
97 | | kPFix_edSP = 125, |
98 | | kPFix_EQDQUOT = 129, |
99 | | kPFix_SPatSP = 132, |
100 | | kPFix_lySP = 137, |
101 | | kPFix_COMMA = 141, |
102 | | kPFix_EQSQUOT = 143, |
103 | | kPFix_DOTcomSLASH = 146, |
104 | | kPFix_DOTSPThisSP = 152, |
105 | | kPFix_SPnotSP = 160, |
106 | | kPFix_erSP = 166, |
107 | | kPFix_alSP = 170, |
108 | | kPFix_fulSP = 174, |
109 | | kPFix_iveSP = 179, |
110 | | kPFix_lessSP = 184, |
111 | | kPFix_estSP = 190, |
112 | | kPFix_izeSP = 195, |
113 | | kPFix_NBSP = 200, |
114 | | kPFix_ousSP = 203 |
115 | | }; |
116 | | |
117 | | static const Transform kTransforms[] = { |
118 | | { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, |
119 | | { kPFix_EMPTY, kIdentity, kPFix_SP }, |
120 | | { kPFix_SP, kIdentity, kPFix_SP }, |
121 | | { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, |
122 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, |
123 | | { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, |
124 | | { kPFix_SP, kIdentity, kPFix_EMPTY }, |
125 | | { kPFix_sSP, kIdentity, kPFix_SP }, |
126 | | { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, |
127 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, |
128 | | { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, |
129 | | { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, |
130 | | { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, |
131 | | { kPFix_COMMASP, kIdentity, kPFix_SP }, |
132 | | { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, |
133 | | { kPFix_SP, kUppercaseFirst, kPFix_SP }, |
134 | | { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, |
135 | | { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, |
136 | | { kPFix_eSP, kIdentity, kPFix_SP }, |
137 | | { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, |
138 | | { kPFix_EMPTY, kIdentity, kPFix_DOT }, |
139 | | { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, |
140 | | { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, |
141 | | { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, |
142 | | { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, |
143 | | { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, |
144 | | { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, |
145 | | { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, |
146 | | { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, |
147 | | { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, |
148 | | { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, |
149 | | { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, |
150 | | { kPFix_DOT, kIdentity, kPFix_EMPTY }, |
151 | | { kPFix_SP, kIdentity, kPFix_COMMASP }, |
152 | | { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, |
153 | | { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, |
154 | | { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, |
155 | | { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, |
156 | | { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, |
157 | | { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, |
158 | | { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, |
159 | | { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, |
160 | | { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, |
161 | | { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, |
162 | | { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, |
163 | | { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, |
164 | | { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, |
165 | | { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, |
166 | | { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, |
167 | | { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, |
168 | | { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, |
169 | | { kPFix_EMPTY, kIdentity, kPFix_COLON }, |
170 | | { kPFix_SP, kIdentity, kPFix_DOTSP }, |
171 | | { kPFix_EMPTY, kIdentity, kPFix_edSP }, |
172 | | { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, |
173 | | { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, |
174 | | { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, |
175 | | { kPFix_EMPTY, kIdentity, kPFix_OPEN }, |
176 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, |
177 | | { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, |
178 | | { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, |
179 | | { kPFix_EMPTY, kIdentity, kPFix_lySP }, |
180 | | { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, |
181 | | { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, |
182 | | { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, |
183 | | { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, |
184 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, |
185 | | { kPFix_DOT, kIdentity, kPFix_OPEN }, |
186 | | { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, |
187 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, |
188 | | { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, |
189 | | { kPFix_SP, kIdentity, kPFix_DOT }, |
190 | | { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, |
191 | | { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, |
192 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, |
193 | | { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, |
194 | | { kPFix_EMPTY, kIdentity, kPFix_COMMA }, |
195 | | { kPFix_DOT, kIdentity, kPFix_SP }, |
196 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, |
197 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, |
198 | | { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, |
199 | | { kPFix_SP, kIdentity, kPFix_EQDQUOT }, |
200 | | { kPFix_EMPTY, kIdentity, kPFix_erSP }, |
201 | | { kPFix_SP, kUppercaseAll, kPFix_SP }, |
202 | | { kPFix_EMPTY, kIdentity, kPFix_alSP }, |
203 | | { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, |
204 | | { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, |
205 | | { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, |
206 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, |
207 | | { kPFix_SP, kIdentity, kPFix_OPEN }, |
208 | | { kPFix_EMPTY, kIdentity, kPFix_fulSP }, |
209 | | { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, |
210 | | { kPFix_EMPTY, kIdentity, kPFix_iveSP }, |
211 | | { kPFix_EMPTY, kIdentity, kPFix_lessSP }, |
212 | | { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, |
213 | | { kPFix_EMPTY, kIdentity, kPFix_estSP }, |
214 | | { kPFix_SP, kUppercaseFirst, kPFix_DOT }, |
215 | | { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, |
216 | | { kPFix_SP, kIdentity, kPFix_EQSQUOT }, |
217 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, |
218 | | { kPFix_EMPTY, kIdentity, kPFix_izeSP }, |
219 | | { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, |
220 | | { kPFix_NBSP, kIdentity, kPFix_EMPTY }, |
221 | | { kPFix_SP, kIdentity, kPFix_COMMA }, |
222 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, |
223 | | { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, |
224 | | { kPFix_EMPTY, kIdentity, kPFix_ousSP }, |
225 | | { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, |
226 | | { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, |
227 | | { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, |
228 | | { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, |
229 | | { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, |
230 | | { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, |
231 | | { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, |
232 | | { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, |
233 | | { kPFix_SP, kUppercaseAll, kPFix_DOT }, |
234 | | { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, |
235 | | { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, |
236 | | { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, |
237 | | { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, |
238 | | { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, |
239 | | }; |
240 | | |
241 | | static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); |
242 | | |
243 | 0 | static int ToUpperCase(uint8_t* p) { |
244 | 0 | if (p[0] < 0xc0) { |
245 | 0 | if (p[0] >= 'a' && p[0] <= 'z') { |
246 | 0 | p[0] ^= 32; |
247 | 0 | } |
248 | 0 | return 1; |
249 | 0 | } |
250 | | /* An overly simplified uppercasing model for UTF-8. */ |
251 | 0 | if (p[0] < 0xe0) { |
252 | 0 | p[1] ^= 32; |
253 | 0 | return 2; |
254 | 0 | } |
255 | | /* An arbitrary transform for three byte characters. */ |
256 | 0 | p[2] ^= 5; |
257 | 0 | return 3; |
258 | 0 | } |
259 | | |
260 | | static BROTLI_NOINLINE int TransformDictionaryWord( |
261 | 0 | uint8_t* dst, const uint8_t* word, int len, int transform) { |
262 | 0 | int idx = 0; |
263 | 0 | { |
264 | 0 | const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; |
265 | 0 | while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } |
266 | 0 | } |
267 | 0 | { |
268 | 0 | const int t = kTransforms[transform].transform; |
269 | 0 | int i = 0; |
270 | 0 | int skip = t - (kOmitFirst1 - 1); |
271 | 0 | if (skip > 0) { |
272 | 0 | word += skip; |
273 | 0 | len -= skip; |
274 | 0 | } else if (t <= kOmitLast9) { |
275 | 0 | len -= t; |
276 | 0 | } |
277 | 0 | while (i < len) { dst[idx++] = word[i++]; } |
278 | 0 | if (t == kUppercaseFirst) { |
279 | 0 | ToUpperCase(&dst[idx - len]); |
280 | 0 | } else if (t == kUppercaseAll) { |
281 | 0 | uint8_t* uppercase = &dst[idx - len]; |
282 | 0 | while (len > 0) { |
283 | 0 | int step = ToUpperCase(uppercase); |
284 | 0 | uppercase += step; |
285 | 0 | len -= step; |
286 | 0 | } |
287 | 0 | } |
288 | 0 | } |
289 | 0 | { |
290 | 0 | const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; |
291 | 0 | while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } |
292 | 0 | return idx; |
293 | 0 | } |
294 | 0 | } |
295 | | |
296 | | #if defined(__cplusplus) || defined(c_plusplus) |
297 | | } /* extern "C" */ |
298 | | #endif |
299 | | |
300 | | #endif /* BROTLI_DEC_TRANSFORM_H_ */ |