/src/woff2/brotli/c/common/transform.c
Line  | Count  | Source  | 
1  |  | /* Copyright 2013 Google Inc. All Rights Reserved.  | 
2  |  |  | 
3  |  |    Distributed under MIT license.  | 
4  |  |    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT  | 
5  |  | */  | 
6  |  |  | 
7  |  | #include "./platform.h"  | 
8  |  | #include "./transform.h"  | 
9  |  |  | 
10  |  | #if defined(__cplusplus) || defined(c_plusplus)  | 
11  |  | extern "C" { | 
12  |  | #endif  | 
13  |  |  | 
14  |  | /* RFC 7932 transforms string data */  | 
15  |  | static const char kPrefixSuffix[217] =  | 
16  |  |       "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "  | 
17  |  | /* 0x  _0 _2  __5        _E    _3  _6 _8     _E */  | 
18  |  |       "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "  | 
19  |  | /* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */  | 
20  |  |       "that \1\'\6 with \6 from \4 by \1(\6. T"  | 
21  |  | /* 4x       _5_ _7      _E      _5    _A _C */  | 
22  |  |       "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "  | 
23  |  | /* 6x     _3    _8    _D    _2    _7_ _ _A _C */  | 
24  |  |       "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"  | 
25  |  | /* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */  | 
26  |  |       " not \3er \3al \4ful \4ive \5less \4es"  | 
27  |  | /* Ax       _5   _9   _D    _2    _7     _D */  | 
28  |  |       "t \4ize \2\xc2\xa0\4ous \5 the \2e \0";  | 
29  |  | /* Cx    _2    _7___ ___ _A    _F     _5  _8 */  | 
30  |  |  | 
31  |  | static const uint16_t kPrefixSuffixMap[50] = { | 
32  |  |   0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,  | 
33  |  |   0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,  | 
34  |  |   0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,  | 
35  |  |   0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,  | 
36  |  |   0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8  | 
37  |  | };  | 
38  |  |  | 
39  |  | /* RFC 7932 transforms */  | 
40  |  | static const uint8_t kTransformsData[] = { | 
41  |  |   49, BROTLI_TRANSFORM_IDENTITY, 49,  | 
42  |  |   49, BROTLI_TRANSFORM_IDENTITY, 0,  | 
43  |  |    0, BROTLI_TRANSFORM_IDENTITY, 0,  | 
44  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,  | 
45  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,  | 
46  |  |   49, BROTLI_TRANSFORM_IDENTITY, 47,  | 
47  |  |    0, BROTLI_TRANSFORM_IDENTITY, 49,  | 
48  |  |    4, BROTLI_TRANSFORM_IDENTITY, 0,  | 
49  |  |   49, BROTLI_TRANSFORM_IDENTITY, 3,  | 
50  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,  | 
51  |  |   49, BROTLI_TRANSFORM_IDENTITY, 6,  | 
52  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,  | 
53  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,  | 
54  |  |    1, BROTLI_TRANSFORM_IDENTITY, 0,  | 
55  |  |   49, BROTLI_TRANSFORM_IDENTITY, 1,  | 
56  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,  | 
57  |  |   49, BROTLI_TRANSFORM_IDENTITY, 7,  | 
58  |  |   49, BROTLI_TRANSFORM_IDENTITY, 9,  | 
59  |  |   48, BROTLI_TRANSFORM_IDENTITY, 0,  | 
60  |  |   49, BROTLI_TRANSFORM_IDENTITY, 8,  | 
61  |  |   49, BROTLI_TRANSFORM_IDENTITY, 5,  | 
62  |  |   49, BROTLI_TRANSFORM_IDENTITY, 10,  | 
63  |  |   49, BROTLI_TRANSFORM_IDENTITY, 11,  | 
64  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,  | 
65  |  |   49, BROTLI_TRANSFORM_IDENTITY, 13,  | 
66  |  |   49, BROTLI_TRANSFORM_IDENTITY, 14,  | 
67  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,  | 
68  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,  | 
69  |  |   49, BROTLI_TRANSFORM_IDENTITY, 15,  | 
70  |  |   49, BROTLI_TRANSFORM_IDENTITY, 16,  | 
71  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,  | 
72  |  |   49, BROTLI_TRANSFORM_IDENTITY, 12,  | 
73  |  |    5, BROTLI_TRANSFORM_IDENTITY, 49,  | 
74  |  |    0, BROTLI_TRANSFORM_IDENTITY, 1,  | 
75  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,  | 
76  |  |   49, BROTLI_TRANSFORM_IDENTITY, 18,  | 
77  |  |   49, BROTLI_TRANSFORM_IDENTITY, 17,  | 
78  |  |   49, BROTLI_TRANSFORM_IDENTITY, 19,  | 
79  |  |   49, BROTLI_TRANSFORM_IDENTITY, 20,  | 
80  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,  | 
81  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,  | 
82  |  |   47, BROTLI_TRANSFORM_IDENTITY, 49,  | 
83  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,  | 
84  |  |   49, BROTLI_TRANSFORM_IDENTITY, 22,  | 
85  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,  | 
86  |  |   49, BROTLI_TRANSFORM_IDENTITY, 23,  | 
87  |  |   49, BROTLI_TRANSFORM_IDENTITY, 24,  | 
88  |  |   49, BROTLI_TRANSFORM_IDENTITY, 25,  | 
89  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,  | 
90  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,  | 
91  |  |   49, BROTLI_TRANSFORM_IDENTITY, 27,  | 
92  |  |   49, BROTLI_TRANSFORM_IDENTITY, 28,  | 
93  |  |    0, BROTLI_TRANSFORM_IDENTITY, 12,  | 
94  |  |   49, BROTLI_TRANSFORM_IDENTITY, 29,  | 
95  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,  | 
96  |  |   49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,  | 
97  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,  | 
98  |  |   49, BROTLI_TRANSFORM_IDENTITY, 21,  | 
99  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,  | 
100  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,  | 
101  |  |   49, BROTLI_TRANSFORM_IDENTITY, 31,  | 
102  |  |   49, BROTLI_TRANSFORM_IDENTITY, 32,  | 
103  |  |   47, BROTLI_TRANSFORM_IDENTITY, 3,  | 
104  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,  | 
105  |  |   49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,  | 
106  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,  | 
107  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,  | 
108  |  |    5, BROTLI_TRANSFORM_IDENTITY, 21,  | 
109  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,  | 
110  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,  | 
111  |  |   49, BROTLI_TRANSFORM_IDENTITY, 30,  | 
112  |  |    0, BROTLI_TRANSFORM_IDENTITY, 5,  | 
113  |  |   35, BROTLI_TRANSFORM_IDENTITY, 49,  | 
114  |  |   47, BROTLI_TRANSFORM_IDENTITY, 2,  | 
115  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,  | 
116  |  |   49, BROTLI_TRANSFORM_IDENTITY, 36,  | 
117  |  |   49, BROTLI_TRANSFORM_IDENTITY, 33,  | 
118  |  |    5, BROTLI_TRANSFORM_IDENTITY, 0,  | 
119  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,  | 
120  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,  | 
121  |  |   49, BROTLI_TRANSFORM_IDENTITY, 37,  | 
122  |  |    0, BROTLI_TRANSFORM_IDENTITY, 30,  | 
123  |  |   49, BROTLI_TRANSFORM_IDENTITY, 38,  | 
124  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,  | 
125  |  |   49, BROTLI_TRANSFORM_IDENTITY, 39,  | 
126  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,  | 
127  |  |   49, BROTLI_TRANSFORM_IDENTITY, 34,  | 
128  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,  | 
129  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,  | 
130  |  |    0, BROTLI_TRANSFORM_IDENTITY, 21,  | 
131  |  |   49, BROTLI_TRANSFORM_IDENTITY, 40,  | 
132  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,  | 
133  |  |   49, BROTLI_TRANSFORM_IDENTITY, 41,  | 
134  |  |   49, BROTLI_TRANSFORM_IDENTITY, 42,  | 
135  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,  | 
136  |  |   49, BROTLI_TRANSFORM_IDENTITY, 43,  | 
137  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,  | 
138  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,  | 
139  |  |    0, BROTLI_TRANSFORM_IDENTITY, 34,  | 
140  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,  | 
141  |  |   49, BROTLI_TRANSFORM_IDENTITY, 44,  | 
142  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,  | 
143  |  |   45, BROTLI_TRANSFORM_IDENTITY, 49,  | 
144  |  |    0, BROTLI_TRANSFORM_IDENTITY, 33,  | 
145  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,  | 
146  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,  | 
147  |  |   49, BROTLI_TRANSFORM_IDENTITY, 46,  | 
148  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,  | 
149  |  |   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,  | 
150  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,  | 
151  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,  | 
152  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,  | 
153  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,  | 
154  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,  | 
155  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,  | 
156  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,  | 
157  |  |   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,  | 
158  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,  | 
159  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,  | 
160  |  |    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,  | 
161  |  |    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,  | 
162  |  | };  | 
163  |  |  | 
164  |  | static BrotliTransforms kBrotliTransforms = { | 
165  |  |   sizeof(kPrefixSuffix),  | 
166  |  |   (const uint8_t*)kPrefixSuffix,  | 
167  |  |   kPrefixSuffixMap,  | 
168  |  |   sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),  | 
169  |  |   kTransformsData,  | 
170  |  |   {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} | 
171  |  | };  | 
172  |  |  | 
173  | 10.9k  | const BrotliTransforms* BrotliGetTransforms(void) { | 
174  | 10.9k  |   return &kBrotliTransforms;  | 
175  | 10.9k  | }  | 
176  |  |  | 
177  | 33.7k  | static int ToUpperCase(uint8_t* p) { | 
178  | 33.7k  |   if (p[0] < 0xC0) { | 
179  | 29.8k  |     if (p[0] >= 'a' && p[0] <= 'z') { | 
180  | 21.9k  |       p[0] ^= 32;  | 
181  | 21.9k  |     }  | 
182  | 29.8k  |     return 1;  | 
183  | 29.8k  |   }  | 
184  |  |   /* An overly simplified uppercasing model for UTF-8. */  | 
185  | 3.86k  |   if (p[0] < 0xE0) { | 
186  | 2.01k  |     p[1] ^= 32;  | 
187  | 2.01k  |     return 2;  | 
188  | 2.01k  |   }  | 
189  |  |   /* An arbitrary transform for three byte characters. */  | 
190  | 1.85k  |   p[2] ^= 5;  | 
191  | 1.85k  |   return 3;  | 
192  | 3.86k  | }  | 
193  |  |  | 
194  |  | int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,  | 
195  | 47.1k  |     const BrotliTransforms* BROTLI_RESTRICT transforms, int transfom_idx) { | 
196  | 47.1k  |   int idx = 0;  | 
197  | 47.1k  |   const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transfom_idx);  | 
198  | 47.1k  |   uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transfom_idx);  | 
199  | 47.1k  |   const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transfom_idx);  | 
200  | 47.1k  |   { | 
201  | 47.1k  |     int prefix_len = *prefix++;  | 
202  | 68.4k  |     while (prefix_len--) { dst[idx++] = *prefix++; } | 
203  | 47.1k  |   }  | 
204  | 47.1k  |   { | 
205  | 47.1k  |     const int t = type;  | 
206  | 47.1k  |     int i = 0;  | 
207  | 47.1k  |     if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { | 
208  | 30.0k  |       len -= t;  | 
209  | 30.0k  |     } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1  | 
210  | 4.48k  |         && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { | 
211  | 4.48k  |       int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);  | 
212  | 4.48k  |       word += skip;  | 
213  | 4.48k  |       len -= skip;  | 
214  | 4.48k  |     }  | 
215  | 278k  |     while (i < len) { dst[idx++] = word[i++]; } | 
216  | 47.1k  |     if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { | 
217  | 8.16k  |       ToUpperCase(&dst[idx - len]);  | 
218  | 39.0k  |     } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { | 
219  | 4.44k  |       uint8_t* uppercase = &dst[idx - len];  | 
220  | 29.9k  |       while (len > 0) { | 
221  | 25.5k  |         int step = ToUpperCase(uppercase);  | 
222  | 25.5k  |         uppercase += step;  | 
223  | 25.5k  |         len -= step;  | 
224  | 25.5k  |       }  | 
225  | 4.44k  |     }  | 
226  | 47.1k  |   }  | 
227  | 47.1k  |   { | 
228  | 47.1k  |     int suffix_len = *suffix++;  | 
229  | 118k  |     while (suffix_len--) { dst[idx++] = *suffix++; } | 
230  | 47.1k  |     return idx;  | 
231  | 47.1k  |   }  | 
232  | 47.1k  | }  | 
233  |  |  | 
234  |  | #if defined(__cplusplus) || defined(c_plusplus)  | 
235  |  | }  /* extern "C" */  | 
236  |  | #endif  |