/src/h2o/deps/brotli/c/enc/context.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | | |
3 | | Distributed under MIT license. |
4 | | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | | */ |
6 | | |
7 | | /* Functions to map previous bytes into a context id. */ |
8 | | |
9 | | #ifndef BROTLI_ENC_CONTEXT_H_ |
10 | | #define BROTLI_ENC_CONTEXT_H_ |
11 | | |
12 | | #include <brotli/port.h> |
13 | | #include <brotli/types.h> |
14 | | |
15 | | #if defined(__cplusplus) || defined(c_plusplus) |
16 | | extern "C" { |
17 | | #endif |
18 | | |
19 | | /* Second-order context lookup table for UTF8 byte streams. |
20 | | |
21 | | If p1 and p2 are the previous two bytes, we calculate the context as |
22 | | |
23 | | context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. |
24 | | |
25 | | If the previous two bytes are ASCII characters (i.e. < 128), this will be |
26 | | equivalent to |
27 | | |
28 | | context = 4 * context1(p1) + context2(p2), |
29 | | |
30 | | where context1 is based on the previous byte in the following way: |
31 | | |
32 | | 0 : non-ASCII control |
33 | | 1 : \t, \n, \r |
34 | | 2 : space |
35 | | 3 : other punctuation |
36 | | 4 : " ' |
37 | | 5 : % |
38 | | 6 : ( < [ { |
39 | | 7 : ) > ] } |
40 | | 8 : , ; : |
41 | | 9 : . |
42 | | 10 : = |
43 | | 11 : number |
44 | | 12 : upper-case vowel |
45 | | 13 : upper-case consonant |
46 | | 14 : lower-case vowel |
47 | | 15 : lower-case consonant |
48 | | |
49 | | and context2 is based on the second last byte: |
50 | | |
51 | | 0 : control, space |
52 | | 1 : punctuation |
53 | | 2 : upper-case letter, number |
54 | | 3 : lower-case letter |
55 | | |
56 | | If the last byte is ASCII, and the second last byte is not (in a valid UTF8 |
57 | | stream it will be a continuation byte, value between 128 and 191), the |
58 | | context is the same as if the second last byte was an ASCII control or space. |
59 | | |
60 | | If the last byte is a UTF8 lead byte (value >= 192), then the next byte will |
61 | | be a continuation byte and the context id is 2 or 3 depending on the LSB of |
62 | | the last byte and to a lesser extent on the second last byte if it is ASCII. |
63 | | |
64 | | If the last byte is a UTF8 continuation byte, the second last byte can be: |
65 | | - continuation byte: the next byte is probably ASCII or lead byte (assuming |
66 | | 4-byte UTF8 characters are rare) and the context id is 0 or 1. |
67 | | - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 |
68 | | - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 |
69 | | |
70 | | The possible value combinations of the previous two bytes, the range of |
71 | | context ids and the type of the next byte is summarized in the table below: |
72 | | |
73 | | |--------\-----------------------------------------------------------------| |
74 | | | \ Last byte | |
75 | | | Second \---------------------------------------------------------------| |
76 | | | last byte \ ASCII | cont. byte | lead byte | |
77 | | | \ (0-127) | (128-191) | (192-) | |
78 | | |=============|===================|=====================|==================| |
79 | | | ASCII | next: ASCII/lead | not valid | next: cont. | |
80 | | | (0-127) | context: 4 - 63 | | context: 2 - 3 | |
81 | | |-------------|-------------------|---------------------|------------------| |
82 | | | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | |
83 | | | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | |
84 | | |-------------|-------------------|---------------------|------------------| |
85 | | | lead byte | not valid | next: ASCII/lead | not valid | |
86 | | | (192-207) | | context: 0 - 1 | | |
87 | | |-------------|-------------------|---------------------|------------------| |
88 | | | lead byte | not valid | next: cont. | not valid | |
89 | | | (208-) | | context: 2 - 3 | | |
90 | | |-------------|-------------------|---------------------|------------------| |
91 | | */ |
92 | | static const uint8_t kUTF8ContextLookup[512] = { |
93 | | /* Last byte. */ |
94 | | /* */ |
95 | | /* ASCII range. */ |
96 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, |
97 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
98 | | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, |
99 | | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, |
100 | | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, |
101 | | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, |
102 | | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, |
103 | | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, |
104 | | /* UTF8 continuation byte range. */ |
105 | | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
106 | | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
107 | | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
108 | | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
109 | | /* UTF8 lead byte range. */ |
110 | | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
111 | | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
112 | | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
113 | | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
114 | | /* Second last byte. */ |
115 | | /* */ |
116 | | /* ASCII range. */ |
117 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
118 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
119 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
120 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, |
121 | | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
122 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, |
123 | | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
124 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, |
125 | | /* UTF8 continuation byte range. */ |
126 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
127 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
128 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
129 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
130 | | /* UTF8 lead byte range. */ |
131 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
132 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
133 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
134 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
135 | | }; |
136 | | |
137 | | /* Context lookup table for small signed integers. */ |
138 | | static const uint8_t kSigned3BitContextLookup[] = { |
139 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
140 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
141 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
142 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
143 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
144 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
145 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
146 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
147 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
148 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
149 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
150 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
151 | | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
152 | | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
153 | | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
154 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, |
155 | | }; |
156 | | |
157 | | typedef enum ContextType { |
158 | | CONTEXT_LSB6 = 0, |
159 | | CONTEXT_MSB6 = 1, |
160 | | CONTEXT_UTF8 = 2, |
161 | | CONTEXT_SIGNED = 3 |
162 | | } ContextType; |
163 | | |
164 | 0 | static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { |
165 | 0 | switch (mode) { |
166 | 0 | case CONTEXT_LSB6: |
167 | 0 | return p1 & 0x3f; |
168 | 0 | case CONTEXT_MSB6: |
169 | 0 | return (uint8_t)(p1 >> 2); |
170 | 0 | case CONTEXT_UTF8: |
171 | 0 | return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; |
172 | 0 | case CONTEXT_SIGNED: |
173 | 0 | return (uint8_t)((kSigned3BitContextLookup[p1] << 3) + |
174 | 0 | kSigned3BitContextLookup[p2]); |
175 | 0 | default: |
176 | 0 | return 0; |
177 | 0 | } |
178 | 0 | } Unexecuted instantiation: bit_cost.c:Context Unexecuted instantiation: block_splitter.c:Context Unexecuted instantiation: brotli_bit_stream.c:Context Unexecuted instantiation: cluster.c:Context Unexecuted instantiation: compress_fragment.c:Context Unexecuted instantiation: compress_fragment_two_pass.c:Context Unexecuted instantiation: encode.c:Context Unexecuted instantiation: histogram.c:Context Unexecuted instantiation: metablock.c:Context |
179 | | |
180 | | #if defined(__cplusplus) || defined(c_plusplus) |
181 | | } /* extern "C" */ |
182 | | #endif |
183 | | |
184 | | #endif /* BROTLI_ENC_CONTEXT_H_ */ |