/src/brotli/c/common/transform.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | #include "platform.h" |
8 | | #include "transform.h" |
9 | | |
10 | | #if defined(__cplusplus) || defined(c_plusplus) |
11 | | extern "C" { |
12 | | #endif |
13 | | |
14 | | /* RFC 7932 transforms string data */ |
15 | | static const BROTLI_MODEL("small") char kPrefixSuffix[217] = |
16 | | "\1 \2, \10 of the \4 of \2s \1.\5 and \4 " |
17 | | /* 0x _0 _2 __5 _E _3 _6 _8 _E */ |
18 | | "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 " |
19 | | /* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */ |
20 | | "that \1\'\6 with \6 from \4 by \1(\6. T" |
21 | | /* 4x _5_ _7 _E _5 _A _C */ |
22 | | "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed " |
23 | | /* 6x _3 _8 _D _2 _7_ _ _A _C */ |
24 | | "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5" |
25 | | /* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ |
26 | | " not \3er \3al \4ful \4ive \5less \4es" |
27 | | /* Ax _5 _9 _D _2 _7 _D */ |
28 | | "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */ |
29 | | /* Cx _2 _7___ ___ _A _F _5 _8 */ |
30 | | |
31 | | static const BROTLI_MODEL("small") uint16_t kPrefixSuffixMap[50] = { |
32 | | 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, |
33 | | 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E, |
34 | | 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C, |
35 | | 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9, |
36 | | 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8 |
37 | | }; |
38 | | |
39 | | /* RFC 7932 transforms */ |
40 | | static const BROTLI_MODEL("small") uint8_t kTransformsData[] = { |
41 | | 49, BROTLI_TRANSFORM_IDENTITY, 49, |
42 | | 49, BROTLI_TRANSFORM_IDENTITY, 0, |
43 | | 0, BROTLI_TRANSFORM_IDENTITY, 0, |
44 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49, |
45 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, |
46 | | 49, BROTLI_TRANSFORM_IDENTITY, 47, |
47 | | 0, BROTLI_TRANSFORM_IDENTITY, 49, |
48 | | 4, BROTLI_TRANSFORM_IDENTITY, 0, |
49 | | 49, BROTLI_TRANSFORM_IDENTITY, 3, |
50 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, |
51 | | 49, BROTLI_TRANSFORM_IDENTITY, 6, |
52 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49, |
53 | | 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49, |
54 | | 1, BROTLI_TRANSFORM_IDENTITY, 0, |
55 | | 49, BROTLI_TRANSFORM_IDENTITY, 1, |
56 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, |
57 | | 49, BROTLI_TRANSFORM_IDENTITY, 7, |
58 | | 49, BROTLI_TRANSFORM_IDENTITY, 9, |
59 | | 48, BROTLI_TRANSFORM_IDENTITY, 0, |
60 | | 49, BROTLI_TRANSFORM_IDENTITY, 8, |
61 | | 49, BROTLI_TRANSFORM_IDENTITY, 5, |
62 | | 49, BROTLI_TRANSFORM_IDENTITY, 10, |
63 | | 49, BROTLI_TRANSFORM_IDENTITY, 11, |
64 | | 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49, |
65 | | 49, BROTLI_TRANSFORM_IDENTITY, 13, |
66 | | 49, BROTLI_TRANSFORM_IDENTITY, 14, |
67 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49, |
68 | | 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49, |
69 | | 49, BROTLI_TRANSFORM_IDENTITY, 15, |
70 | | 49, BROTLI_TRANSFORM_IDENTITY, 16, |
71 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, |
72 | | 49, BROTLI_TRANSFORM_IDENTITY, 12, |
73 | | 5, BROTLI_TRANSFORM_IDENTITY, 49, |
74 | | 0, BROTLI_TRANSFORM_IDENTITY, 1, |
75 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49, |
76 | | 49, BROTLI_TRANSFORM_IDENTITY, 18, |
77 | | 49, BROTLI_TRANSFORM_IDENTITY, 17, |
78 | | 49, BROTLI_TRANSFORM_IDENTITY, 19, |
79 | | 49, BROTLI_TRANSFORM_IDENTITY, 20, |
80 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49, |
81 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49, |
82 | | 47, BROTLI_TRANSFORM_IDENTITY, 49, |
83 | | 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49, |
84 | | 49, BROTLI_TRANSFORM_IDENTITY, 22, |
85 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, |
86 | | 49, BROTLI_TRANSFORM_IDENTITY, 23, |
87 | | 49, BROTLI_TRANSFORM_IDENTITY, 24, |
88 | | 49, BROTLI_TRANSFORM_IDENTITY, 25, |
89 | | 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49, |
90 | | 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26, |
91 | | 49, BROTLI_TRANSFORM_IDENTITY, 27, |
92 | | 49, BROTLI_TRANSFORM_IDENTITY, 28, |
93 | | 0, BROTLI_TRANSFORM_IDENTITY, 12, |
94 | | 49, BROTLI_TRANSFORM_IDENTITY, 29, |
95 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49, |
96 | | 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49, |
97 | | 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49, |
98 | | 49, BROTLI_TRANSFORM_IDENTITY, 21, |
99 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, |
100 | | 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49, |
101 | | 49, BROTLI_TRANSFORM_IDENTITY, 31, |
102 | | 49, BROTLI_TRANSFORM_IDENTITY, 32, |
103 | | 47, BROTLI_TRANSFORM_IDENTITY, 3, |
104 | | 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49, |
105 | | 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49, |
106 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, |
107 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8, |
108 | | 5, BROTLI_TRANSFORM_IDENTITY, 21, |
109 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, |
110 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10, |
111 | | 49, BROTLI_TRANSFORM_IDENTITY, 30, |
112 | | 0, BROTLI_TRANSFORM_IDENTITY, 5, |
113 | | 35, BROTLI_TRANSFORM_IDENTITY, 49, |
114 | | 47, BROTLI_TRANSFORM_IDENTITY, 2, |
115 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17, |
116 | | 49, BROTLI_TRANSFORM_IDENTITY, 36, |
117 | | 49, BROTLI_TRANSFORM_IDENTITY, 33, |
118 | | 5, BROTLI_TRANSFORM_IDENTITY, 0, |
119 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21, |
120 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, |
121 | | 49, BROTLI_TRANSFORM_IDENTITY, 37, |
122 | | 0, BROTLI_TRANSFORM_IDENTITY, 30, |
123 | | 49, BROTLI_TRANSFORM_IDENTITY, 38, |
124 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, |
125 | | 49, BROTLI_TRANSFORM_IDENTITY, 39, |
126 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, |
127 | | 49, BROTLI_TRANSFORM_IDENTITY, 34, |
128 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8, |
129 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, |
130 | | 0, BROTLI_TRANSFORM_IDENTITY, 21, |
131 | | 49, BROTLI_TRANSFORM_IDENTITY, 40, |
132 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, |
133 | | 49, BROTLI_TRANSFORM_IDENTITY, 41, |
134 | | 49, BROTLI_TRANSFORM_IDENTITY, 42, |
135 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17, |
136 | | 49, BROTLI_TRANSFORM_IDENTITY, 43, |
137 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, |
138 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10, |
139 | | 0, BROTLI_TRANSFORM_IDENTITY, 34, |
140 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, |
141 | | 49, BROTLI_TRANSFORM_IDENTITY, 44, |
142 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, |
143 | | 45, BROTLI_TRANSFORM_IDENTITY, 49, |
144 | | 0, BROTLI_TRANSFORM_IDENTITY, 33, |
145 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, |
146 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, |
147 | | 49, BROTLI_TRANSFORM_IDENTITY, 46, |
148 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, |
149 | | 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, |
150 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, |
151 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, |
152 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, |
153 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33, |
154 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21, |
155 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, |
156 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, |
157 | | 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, |
158 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, |
159 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, |
160 | | 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, |
161 | | 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, |
162 | | }; |
163 | | |
164 | | static const BROTLI_MODEL("small") |
165 | | BrotliTransforms kBrotliTransforms = { |
166 | | sizeof(kPrefixSuffix), |
167 | | (const uint8_t*)kPrefixSuffix, |
168 | | kPrefixSuffixMap, |
169 | | sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])), |
170 | | kTransformsData, |
171 | | NULL, /* no extra parameters */ |
172 | | {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} |
173 | | }; |
174 | | |
175 | 5.34k | const BrotliTransforms* BrotliGetTransforms(void) { |
176 | 5.34k | return &kBrotliTransforms; |
177 | 5.34k | } |
178 | | |
179 | 18.0k | static int ToUpperCase(uint8_t* p) { |
180 | 18.0k | if (p[0] < 0xC0) { |
181 | 13.9k | if (p[0] >= 'a' && p[0] <= 'z') { |
182 | 9.83k | p[0] ^= 32; |
183 | 9.83k | } |
184 | 13.9k | return 1; |
185 | 13.9k | } |
186 | | /* An overly simplified uppercasing model for UTF-8. */ |
187 | 4.04k | if (p[0] < 0xE0) { |
188 | 1.98k | p[1] ^= 32; |
189 | 1.98k | return 2; |
190 | 1.98k | } |
191 | | /* An arbitrary transform for three byte characters. */ |
192 | 2.05k | p[2] ^= 5; |
193 | 2.05k | return 3; |
194 | 4.04k | } |
195 | | |
196 | 0 | static int Shift(uint8_t* word, int word_len, uint16_t parameter) { |
197 | | /* Limited sign extension: scalar < (1 << 24). */ |
198 | 0 | uint32_t scalar = |
199 | 0 | (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u)); |
200 | 0 | if (word[0] < 0x80) { |
201 | | /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */ |
202 | 0 | scalar += (uint32_t)word[0]; |
203 | 0 | word[0] = (uint8_t)(scalar & 0x7Fu); |
204 | 0 | return 1; |
205 | 0 | } else if (word[0] < 0xC0) { |
206 | | /* Continuation / 10AAAAAA. */ |
207 | 0 | return 1; |
208 | 0 | } else if (word[0] < 0xE0) { |
209 | | /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */ |
210 | 0 | if (word_len < 2) return 1; |
211 | 0 | scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u)); |
212 | 0 | word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F)); |
213 | 0 | word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F)); |
214 | 0 | return 2; |
215 | 0 | } else if (word[0] < 0xF0) { |
216 | | /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */ |
217 | 0 | if (word_len < 3) return word_len; |
218 | 0 | scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) | |
219 | 0 | ((word[0] & 0x0Fu) << 12u)); |
220 | 0 | word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F)); |
221 | 0 | word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F)); |
222 | 0 | word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F)); |
223 | 0 | return 3; |
224 | 0 | } else if (word[0] < 0xF8) { |
225 | | /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */ |
226 | 0 | if (word_len < 4) return word_len; |
227 | 0 | scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) | |
228 | 0 | ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u)); |
229 | 0 | word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07)); |
230 | 0 | word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F)); |
231 | 0 | word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F)); |
232 | 0 | word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F)); |
233 | 0 | return 4; |
234 | 0 | } |
235 | 0 | return 1; |
236 | 0 | } |
237 | | |
238 | | int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len, |
239 | 31.0k | const BrotliTransforms* transforms, int transform_idx) { |
240 | 31.0k | int idx = 0; |
241 | 31.0k | const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx); |
242 | 31.0k | uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx); |
243 | 31.0k | const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx); |
244 | 31.0k | { |
245 | 31.0k | int prefix_len = *prefix++; |
246 | 48.5k | while (prefix_len--) { dst[idx++] = *prefix++; } |
247 | 31.0k | } |
248 | 31.0k | { |
249 | 31.0k | const int t = type; |
250 | 31.0k | int i = 0; |
251 | 31.0k | if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { |
252 | 21.2k | len -= t; |
253 | 21.2k | } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1 |
254 | 9.84k | && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { |
255 | 2.66k | int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1); |
256 | 2.66k | word += skip; |
257 | 2.66k | len -= skip; |
258 | 2.66k | } |
259 | 200k | while (i < len) { dst[idx++] = word[i++]; } |
260 | 31.0k | if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { |
261 | 4.84k | ToUpperCase(&dst[idx - len]); |
262 | 26.2k | } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { |
263 | 2.33k | uint8_t* uppercase = &dst[idx - len]; |
264 | 15.4k | while (len > 0) { |
265 | 13.1k | int step = ToUpperCase(uppercase); |
266 | 13.1k | uppercase += step; |
267 | 13.1k | len -= step; |
268 | 13.1k | } |
269 | 23.9k | } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) { |
270 | 0 | uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] |
271 | 0 | + (transforms->params[transform_idx * 2 + 1] << 8u)); |
272 | 0 | Shift(&dst[idx - len], len, param); |
273 | 23.9k | } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) { |
274 | 0 | uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] |
275 | 0 | + (transforms->params[transform_idx * 2 + 1] << 8u)); |
276 | 0 | uint8_t* shift = &dst[idx - len]; |
277 | 0 | while (len > 0) { |
278 | 0 | int step = Shift(shift, len, param); |
279 | 0 | shift += step; |
280 | 0 | len -= step; |
281 | 0 | } |
282 | 0 | } |
283 | 31.0k | } |
284 | 31.0k | { |
285 | 31.0k | int suffix_len = *suffix++; |
286 | 87.6k | while (suffix_len--) { dst[idx++] = *suffix++; } |
287 | 31.0k | return idx; |
288 | 31.0k | } |
289 | 31.0k | } |
290 | | |
291 | | #if defined(__cplusplus) || defined(c_plusplus) |
292 | | } /* extern "C" */ |
293 | | #endif |