/src/zlib-ng/arch/generic/crc32_chorba_c.c
Line | Count | Source |
1 | | #include "zbuild.h" |
2 | | #if defined(__EMSCRIPTEN__) |
3 | | # include "zutil_p.h" |
4 | | #endif |
5 | | #include "crc32_braid_p.h" |
6 | | #include "crc32_braid_tbl.h" |
7 | | #include "generic_functions.h" |
8 | | |
9 | | /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */ |
10 | 0 | #define bitbuffersizebytes (16 * 1024 * sizeof(z_word_t)) |
11 | 0 | #define bitbuffersizezwords (bitbuffersizebytes / sizeof(z_word_t)) |
12 | 0 | #define bitbuffersizeqwords (bitbuffersizebytes / sizeof(uint64_t)) |
13 | | |
14 | | /** |
15 | | * Implements the Chorba algorithm for CRC32 computation (https://arxiv.org/abs/2412.16398). |
16 | | * |
17 | | * This implementation processes data in three phases: |
18 | | * 1. Initial pass: Zeros out bitbuffer |
19 | | * 2. Intermediate pass: Processes half the values |
20 | | * 3. Main pass: Processes remaining data |
21 | | * |
22 | | * @param crc Initial CRC value |
23 | | * @param input Input data buffer |
24 | | * @param len Length of input data |
25 | | * @return Computed CRC32 value |
26 | | * |
27 | | * @note Requires minimum input size of 118960 + 512 bytes |
28 | | * @note Uses 128KB temporary buffer |
29 | | */ |
30 | 0 | Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive (uint32_t crc, const z_word_t* input, size_t len) { |
31 | | #if defined(__EMSCRIPTEN__) |
32 | | z_word_t* bitbuffer = (z_word_t*)zng_alloc(bitbuffersizebytes); |
33 | | #else |
34 | 0 | ALIGNED_(16) z_word_t bitbuffer[bitbuffersizezwords]; |
35 | 0 | #endif |
36 | 0 | const uint8_t* bitbufferbytes = (const uint8_t*) bitbuffer; |
37 | 0 | uint64_t* bitbufferqwords = (uint64_t*) bitbuffer; |
38 | 0 | uint64_t* inputqwords = (uint64_t*) input; |
39 | |
|
40 | 0 | size_t i = 0; |
41 | |
|
42 | 0 | #if BYTE_ORDER == LITTLE_ENDIAN |
43 | 0 | z_word_t next1 = crc; |
44 | | #else |
45 | | z_word_t next1 = ZSWAPWORD(crc); |
46 | | #endif |
47 | |
|
48 | 0 | z_word_t next2 = 0; |
49 | 0 | z_word_t next3 = 0; |
50 | 0 | z_word_t next4 = 0; |
51 | 0 | z_word_t next5 = 0; |
52 | 0 | z_word_t next6 = 0; |
53 | 0 | z_word_t next7 = 0; |
54 | 0 | z_word_t next8 = 0; |
55 | 0 | z_word_t next9 = 0; |
56 | 0 | z_word_t next10 = 0; |
57 | 0 | z_word_t next11 = 0; |
58 | 0 | z_word_t next12 = 0; |
59 | 0 | z_word_t next13 = 0; |
60 | 0 | z_word_t next14 = 0; |
61 | 0 | z_word_t next15 = 0; |
62 | 0 | z_word_t next16 = 0; |
63 | 0 | z_word_t next17 = 0; |
64 | 0 | z_word_t next18 = 0; |
65 | 0 | z_word_t next19 = 0; |
66 | 0 | z_word_t next20 = 0; |
67 | 0 | z_word_t next21 = 0; |
68 | 0 | z_word_t next22 = 0; |
69 | 0 | crc = 0; |
70 | | |
71 | | // do a first pass to zero out bitbuffer |
72 | 0 | for(; i < (14848 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) { |
73 | 0 | z_word_t in1, in2, in3, in4, in5, in6, in7, in8; |
74 | 0 | z_word_t in9, in10, in11, in12, in13, in14, in15, in16; |
75 | 0 | z_word_t in17, in18, in19, in20, in21, in22, in23, in24; |
76 | 0 | z_word_t in25, in26, in27, in28, in29, in30, in31, in32; |
77 | 0 | int outoffset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffersizezwords; |
78 | 0 | int outoffset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffersizezwords; |
79 | |
|
80 | 0 | in1 = input[i / sizeof(z_word_t) + 0] ^ next1; |
81 | 0 | in2 = input[i / sizeof(z_word_t) + 1] ^ next2; |
82 | 0 | in3 = input[i / sizeof(z_word_t) + 2] ^ next3; |
83 | 0 | in4 = input[i / sizeof(z_word_t) + 3] ^ next4; |
84 | 0 | in5 = input[i / sizeof(z_word_t) + 4] ^ next5; |
85 | 0 | in6 = input[i / sizeof(z_word_t) + 5] ^ next6; |
86 | 0 | in7 = input[i / sizeof(z_word_t) + 6] ^ next7; |
87 | 0 | in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1; |
88 | 0 | in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2; |
89 | 0 | in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3; |
90 | 0 | in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4; |
91 | 0 | in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5; |
92 | 0 | in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6; |
93 | 0 | in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7; |
94 | 0 | in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8; |
95 | 0 | in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9; |
96 | 0 | in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10; |
97 | 0 | in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11; |
98 | 0 | in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12; |
99 | 0 | in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13; |
100 | 0 | in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14; |
101 | 0 | in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15; |
102 | 0 | in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16; |
103 | 0 | in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17; |
104 | 0 | in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18; |
105 | 0 | in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19; |
106 | 0 | in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20; |
107 | 0 | in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21; |
108 | 0 | in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22; |
109 | 0 | in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23; |
110 | 0 | in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24; |
111 | 0 | in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25; |
112 | |
|
113 | 0 | next1 = in11 ^ in22 ^ in26; |
114 | 0 | next2 = in12 ^ in23 ^ in27; |
115 | 0 | next3 = in13 ^ in24 ^ in28; |
116 | 0 | next4 = in14 ^ in25 ^ in29; |
117 | 0 | next5 = in15 ^ in26 ^ in30; |
118 | 0 | next6 = in16 ^ in27 ^ in31; |
119 | 0 | next7 = in17 ^ in28 ^ in32; |
120 | 0 | next8 = in18 ^ in29; |
121 | 0 | next9 = in19 ^ in30; |
122 | 0 | next10 = in20 ^ in31; |
123 | 0 | next11 = in21 ^ in32; |
124 | 0 | next12 = in22; |
125 | 0 | next13 = in23; |
126 | 0 | next14 = in24; |
127 | 0 | next15 = in25; |
128 | 0 | next16 = in26; |
129 | 0 | next17 = in27; |
130 | 0 | next18 = in28; |
131 | 0 | next19 = in29; |
132 | 0 | next20 = in30; |
133 | 0 | next21 = in31; |
134 | 0 | next22 = in32; |
135 | |
|
136 | 0 | bitbuffer[outoffset1 + 22] = in1; |
137 | 0 | bitbuffer[outoffset1 + 23] = in2; |
138 | 0 | bitbuffer[outoffset1 + 24] = in3; |
139 | 0 | bitbuffer[outoffset1 + 25] = in4; |
140 | 0 | bitbuffer[outoffset1 + 26] = in5; |
141 | 0 | bitbuffer[outoffset1 + 27] = in6; |
142 | 0 | bitbuffer[outoffset1 + 28] = in7; |
143 | 0 | bitbuffer[outoffset1 + 29] = in8; |
144 | 0 | bitbuffer[outoffset1 + 30] = in9; |
145 | 0 | bitbuffer[outoffset1 + 31] = in10; |
146 | 0 | bitbuffer[outoffset2 + 0] = in11; |
147 | 0 | bitbuffer[outoffset2 + 1] = in12; |
148 | 0 | bitbuffer[outoffset2 + 2] = in13; |
149 | 0 | bitbuffer[outoffset2 + 3] = in14; |
150 | 0 | bitbuffer[outoffset2 + 4] = in15; |
151 | 0 | bitbuffer[outoffset2 + 5] = in16; |
152 | 0 | bitbuffer[outoffset2 + 6] = in17; |
153 | 0 | bitbuffer[outoffset2 + 7] = in18; |
154 | 0 | bitbuffer[outoffset2 + 8] = in19; |
155 | 0 | bitbuffer[outoffset2 + 9] = in20; |
156 | 0 | bitbuffer[outoffset2 + 10] = in21; |
157 | 0 | bitbuffer[outoffset2 + 11] = in22; |
158 | 0 | bitbuffer[outoffset2 + 12] = in23; |
159 | 0 | bitbuffer[outoffset2 + 13] = in24; |
160 | 0 | bitbuffer[outoffset2 + 14] = in25; |
161 | 0 | bitbuffer[outoffset2 + 15] = in26; |
162 | 0 | bitbuffer[outoffset2 + 16] = in27; |
163 | 0 | bitbuffer[outoffset2 + 17] = in28; |
164 | 0 | bitbuffer[outoffset2 + 18] = in29; |
165 | 0 | bitbuffer[outoffset2 + 19] = in30; |
166 | 0 | bitbuffer[outoffset2 + 20] = in31; |
167 | 0 | bitbuffer[outoffset2 + 21] = in32; |
168 | 0 | } |
169 | | |
170 | | // one intermediate pass where we pull half the values |
171 | 0 | for(; i < (14880 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) { |
172 | 0 | z_word_t in1, in2, in3, in4, in5, in6, in7, in8; |
173 | 0 | z_word_t in9, in10, in11, in12, in13, in14, in15, in16; |
174 | 0 | z_word_t in17, in18, in19, in20, in21, in22, in23, in24; |
175 | 0 | z_word_t in25, in26, in27, in28, in29, in30, in31, in32; |
176 | 0 | int inoffset = (i / sizeof(z_word_t)) % bitbuffersizezwords; |
177 | 0 | int outoffset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffersizezwords; |
178 | 0 | int outoffset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffersizezwords; |
179 | |
|
180 | 0 | in1 = input[i / sizeof(z_word_t) + 0] ^ next1; |
181 | 0 | in2 = input[i / sizeof(z_word_t) + 1] ^ next2; |
182 | 0 | in3 = input[i / sizeof(z_word_t) + 2] ^ next3; |
183 | 0 | in4 = input[i / sizeof(z_word_t) + 3] ^ next4; |
184 | 0 | in5 = input[i / sizeof(z_word_t) + 4] ^ next5; |
185 | 0 | in6 = input[i / sizeof(z_word_t) + 5] ^ next6; |
186 | 0 | in7 = input[i / sizeof(z_word_t) + 6] ^ next7; |
187 | 0 | in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1; |
188 | 0 | in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2; |
189 | 0 | in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3; |
190 | 0 | in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4; |
191 | 0 | in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5; |
192 | 0 | in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6; |
193 | 0 | in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7; |
194 | 0 | in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8; |
195 | 0 | in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9; |
196 | 0 | in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10; |
197 | 0 | in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11; |
198 | 0 | in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12; |
199 | 0 | in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13; |
200 | 0 | in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14; |
201 | 0 | in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15; |
202 | 0 | in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[inoffset + 22]; |
203 | 0 | in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[inoffset + 23]; |
204 | 0 | in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[inoffset + 24]; |
205 | 0 | in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[inoffset + 25]; |
206 | 0 | in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[inoffset + 26]; |
207 | 0 | in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[inoffset + 27]; |
208 | 0 | in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[inoffset + 28]; |
209 | 0 | in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[inoffset + 29]; |
210 | 0 | in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[inoffset + 30]; |
211 | 0 | in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[inoffset + 31]; |
212 | |
|
213 | 0 | next1 = in11 ^ in22 ^ in26; |
214 | 0 | next2 = in12 ^ in23 ^ in27; |
215 | 0 | next3 = in13 ^ in24 ^ in28; |
216 | 0 | next4 = in14 ^ in25 ^ in29; |
217 | 0 | next5 = in15 ^ in26 ^ in30; |
218 | 0 | next6 = in16 ^ in27 ^ in31; |
219 | 0 | next7 = in17 ^ in28 ^ in32; |
220 | 0 | next8 = in18 ^ in29; |
221 | 0 | next9 = in19 ^ in30; |
222 | 0 | next10 = in20 ^ in31; |
223 | 0 | next11 = in21 ^ in32; |
224 | 0 | next12 = in22; |
225 | 0 | next13 = in23; |
226 | 0 | next14 = in24; |
227 | 0 | next15 = in25; |
228 | 0 | next16 = in26; |
229 | 0 | next17 = in27; |
230 | 0 | next18 = in28; |
231 | 0 | next19 = in29; |
232 | 0 | next20 = in30; |
233 | 0 | next21 = in31; |
234 | 0 | next22 = in32; |
235 | |
|
236 | 0 | bitbuffer[outoffset1 + 22] = in1; |
237 | 0 | bitbuffer[outoffset1 + 23] = in2; |
238 | 0 | bitbuffer[outoffset1 + 24] = in3; |
239 | 0 | bitbuffer[outoffset1 + 25] = in4; |
240 | 0 | bitbuffer[outoffset1 + 26] = in5; |
241 | 0 | bitbuffer[outoffset1 + 27] = in6; |
242 | 0 | bitbuffer[outoffset1 + 28] = in7; |
243 | 0 | bitbuffer[outoffset1 + 29] = in8; |
244 | 0 | bitbuffer[outoffset1 + 30] = in9; |
245 | 0 | bitbuffer[outoffset1 + 31] = in10; |
246 | 0 | bitbuffer[outoffset2 + 0] = in11; |
247 | 0 | bitbuffer[outoffset2 + 1] = in12; |
248 | 0 | bitbuffer[outoffset2 + 2] = in13; |
249 | 0 | bitbuffer[outoffset2 + 3] = in14; |
250 | 0 | bitbuffer[outoffset2 + 4] = in15; |
251 | 0 | bitbuffer[outoffset2 + 5] = in16; |
252 | 0 | bitbuffer[outoffset2 + 6] = in17; |
253 | 0 | bitbuffer[outoffset2 + 7] = in18; |
254 | 0 | bitbuffer[outoffset2 + 8] = in19; |
255 | 0 | bitbuffer[outoffset2 + 9] = in20; |
256 | 0 | bitbuffer[outoffset2 + 10] = in21; |
257 | 0 | bitbuffer[outoffset2 + 11] = in22; |
258 | 0 | bitbuffer[outoffset2 + 12] = in23; |
259 | 0 | bitbuffer[outoffset2 + 13] = in24; |
260 | 0 | bitbuffer[outoffset2 + 14] = in25; |
261 | 0 | bitbuffer[outoffset2 + 15] = in26; |
262 | 0 | bitbuffer[outoffset2 + 16] = in27; |
263 | 0 | bitbuffer[outoffset2 + 17] = in28; |
264 | 0 | bitbuffer[outoffset2 + 18] = in29; |
265 | 0 | bitbuffer[outoffset2 + 19] = in30; |
266 | 0 | bitbuffer[outoffset2 + 20] = in31; |
267 | 0 | bitbuffer[outoffset2 + 21] = in32; |
268 | 0 | } |
269 | |
|
270 | 0 | for(; (i + (14870 + 64) * sizeof(z_word_t)) < len; i += (32 * sizeof(z_word_t))) { |
271 | 0 | z_word_t in1, in2, in3, in4, in5, in6, in7, in8; |
272 | 0 | z_word_t in9, in10, in11, in12, in13, in14, in15, in16; |
273 | 0 | z_word_t in17, in18, in19, in20, in21, in22, in23, in24; |
274 | 0 | z_word_t in25, in26, in27, in28, in29, in30, in31, in32; |
275 | 0 | int inoffset = (i / sizeof(z_word_t)) % bitbuffersizezwords; |
276 | 0 | int outoffset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffersizezwords; |
277 | 0 | int outoffset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffersizezwords; |
278 | |
|
279 | 0 | in1 = input[i / sizeof(z_word_t) + 0] ^ next1 ^ bitbuffer[inoffset + 0]; |
280 | 0 | in2 = input[i / sizeof(z_word_t) + 1] ^ next2 ^ bitbuffer[inoffset + 1]; |
281 | 0 | in3 = input[i / sizeof(z_word_t) + 2] ^ next3 ^ bitbuffer[inoffset + 2]; |
282 | 0 | in4 = input[i / sizeof(z_word_t) + 3] ^ next4 ^ bitbuffer[inoffset + 3]; |
283 | 0 | in5 = input[i / sizeof(z_word_t) + 4] ^ next5 ^ bitbuffer[inoffset + 4]; |
284 | 0 | in6 = input[i / sizeof(z_word_t) + 5] ^ next6 ^ bitbuffer[inoffset + 5]; |
285 | 0 | in7 = input[i / sizeof(z_word_t) + 6] ^ next7 ^ bitbuffer[inoffset + 6]; |
286 | 0 | in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1 ^ bitbuffer[inoffset + 7]; |
287 | 0 | in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2 ^ bitbuffer[inoffset + 8]; |
288 | 0 | in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3 ^ bitbuffer[inoffset + 9]; |
289 | 0 | in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4 ^ bitbuffer[inoffset + 10]; |
290 | 0 | in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5 ^ bitbuffer[inoffset + 11]; |
291 | 0 | in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6 ^ bitbuffer[inoffset + 12]; |
292 | 0 | in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7 ^ bitbuffer[inoffset + 13]; |
293 | 0 | in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8 ^ bitbuffer[inoffset + 14]; |
294 | 0 | in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9 ^ bitbuffer[inoffset + 15]; |
295 | 0 | in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10 ^ bitbuffer[inoffset + 16]; |
296 | 0 | in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11 ^ bitbuffer[inoffset + 17]; |
297 | 0 | in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12 ^ bitbuffer[inoffset + 18]; |
298 | 0 | in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13 ^ bitbuffer[inoffset + 19]; |
299 | 0 | in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14 ^ bitbuffer[inoffset + 20]; |
300 | 0 | in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15 ^ bitbuffer[inoffset + 21]; |
301 | 0 | in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[inoffset + 22]; |
302 | 0 | in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[inoffset + 23]; |
303 | 0 | in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[inoffset + 24]; |
304 | 0 | in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[inoffset + 25]; |
305 | 0 | in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[inoffset + 26]; |
306 | 0 | in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[inoffset + 27]; |
307 | 0 | in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[inoffset + 28]; |
308 | 0 | in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[inoffset + 29]; |
309 | 0 | in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[inoffset + 30]; |
310 | 0 | in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[inoffset + 31]; |
311 | |
|
312 | 0 | next1 = in11 ^ in22 ^ in26; |
313 | 0 | next2 = in12 ^ in23 ^ in27; |
314 | 0 | next3 = in13 ^ in24 ^ in28; |
315 | 0 | next4 = in14 ^ in25 ^ in29; |
316 | 0 | next5 = in15 ^ in26 ^ in30; |
317 | 0 | next6 = in16 ^ in27 ^ in31; |
318 | 0 | next7 = in17 ^ in28 ^ in32; |
319 | 0 | next8 = in18 ^ in29; |
320 | 0 | next9 = in19 ^ in30; |
321 | 0 | next10 = in20 ^ in31; |
322 | 0 | next11 = in21 ^ in32; |
323 | 0 | next12 = in22; |
324 | 0 | next13 = in23; |
325 | 0 | next14 = in24; |
326 | 0 | next15 = in25; |
327 | 0 | next16 = in26; |
328 | 0 | next17 = in27; |
329 | 0 | next18 = in28; |
330 | 0 | next19 = in29; |
331 | 0 | next20 = in30; |
332 | 0 | next21 = in31; |
333 | 0 | next22 = in32; |
334 | |
|
335 | 0 | bitbuffer[outoffset1 + 22] = in1; |
336 | 0 | bitbuffer[outoffset1 + 23] = in2; |
337 | 0 | bitbuffer[outoffset1 + 24] = in3; |
338 | 0 | bitbuffer[outoffset1 + 25] = in4; |
339 | 0 | bitbuffer[outoffset1 + 26] = in5; |
340 | 0 | bitbuffer[outoffset1 + 27] = in6; |
341 | 0 | bitbuffer[outoffset1 + 28] = in7; |
342 | 0 | bitbuffer[outoffset1 + 29] = in8; |
343 | 0 | bitbuffer[outoffset1 + 30] = in9; |
344 | 0 | bitbuffer[outoffset1 + 31] = in10; |
345 | 0 | bitbuffer[outoffset2 + 0] = in11; |
346 | 0 | bitbuffer[outoffset2 + 1] = in12; |
347 | 0 | bitbuffer[outoffset2 + 2] = in13; |
348 | 0 | bitbuffer[outoffset2 + 3] = in14; |
349 | 0 | bitbuffer[outoffset2 + 4] = in15; |
350 | 0 | bitbuffer[outoffset2 + 5] = in16; |
351 | 0 | bitbuffer[outoffset2 + 6] = in17; |
352 | 0 | bitbuffer[outoffset2 + 7] = in18; |
353 | 0 | bitbuffer[outoffset2 + 8] = in19; |
354 | 0 | bitbuffer[outoffset2 + 9] = in20; |
355 | 0 | bitbuffer[outoffset2 + 10] = in21; |
356 | 0 | bitbuffer[outoffset2 + 11] = in22; |
357 | 0 | bitbuffer[outoffset2 + 12] = in23; |
358 | 0 | bitbuffer[outoffset2 + 13] = in24; |
359 | 0 | bitbuffer[outoffset2 + 14] = in25; |
360 | 0 | bitbuffer[outoffset2 + 15] = in26; |
361 | 0 | bitbuffer[outoffset2 + 16] = in27; |
362 | 0 | bitbuffer[outoffset2 + 17] = in28; |
363 | 0 | bitbuffer[outoffset2 + 18] = in29; |
364 | 0 | bitbuffer[outoffset2 + 19] = in30; |
365 | 0 | bitbuffer[outoffset2 + 20] = in31; |
366 | 0 | bitbuffer[outoffset2 + 21] = in32; |
367 | 0 | } |
368 | |
|
369 | 0 | bitbuffer[(i / sizeof(z_word_t) + 0) % bitbuffersizezwords] ^= next1; |
370 | 0 | bitbuffer[(i / sizeof(z_word_t) + 1) % bitbuffersizezwords] ^= next2; |
371 | 0 | bitbuffer[(i / sizeof(z_word_t) + 2) % bitbuffersizezwords] ^= next3; |
372 | 0 | bitbuffer[(i / sizeof(z_word_t) + 3) % bitbuffersizezwords] ^= next4; |
373 | 0 | bitbuffer[(i / sizeof(z_word_t) + 4) % bitbuffersizezwords] ^= next5; |
374 | 0 | bitbuffer[(i / sizeof(z_word_t) + 5) % bitbuffersizezwords] ^= next6; |
375 | 0 | bitbuffer[(i / sizeof(z_word_t) + 6) % bitbuffersizezwords] ^= next7; |
376 | 0 | bitbuffer[(i / sizeof(z_word_t) + 7) % bitbuffersizezwords] ^= next8; |
377 | 0 | bitbuffer[(i / sizeof(z_word_t) + 8) % bitbuffersizezwords] ^= next9; |
378 | 0 | bitbuffer[(i / sizeof(z_word_t) + 9) % bitbuffersizezwords] ^= next10; |
379 | 0 | bitbuffer[(i / sizeof(z_word_t) + 10) % bitbuffersizezwords] ^= next11; |
380 | 0 | bitbuffer[(i / sizeof(z_word_t) + 11) % bitbuffersizezwords] ^= next12; |
381 | 0 | bitbuffer[(i / sizeof(z_word_t) + 12) % bitbuffersizezwords] ^= next13; |
382 | 0 | bitbuffer[(i / sizeof(z_word_t) + 13) % bitbuffersizezwords] ^= next14; |
383 | 0 | bitbuffer[(i / sizeof(z_word_t) + 14) % bitbuffersizezwords] ^= next15; |
384 | 0 | bitbuffer[(i / sizeof(z_word_t) + 15) % bitbuffersizezwords] ^= next16; |
385 | 0 | bitbuffer[(i / sizeof(z_word_t) + 16) % bitbuffersizezwords] ^= next17; |
386 | 0 | bitbuffer[(i / sizeof(z_word_t) + 17) % bitbuffersizezwords] ^= next18; |
387 | 0 | bitbuffer[(i / sizeof(z_word_t) + 18) % bitbuffersizezwords] ^= next19; |
388 | 0 | bitbuffer[(i / sizeof(z_word_t) + 19) % bitbuffersizezwords] ^= next20; |
389 | 0 | bitbuffer[(i / sizeof(z_word_t) + 20) % bitbuffersizezwords] ^= next21; |
390 | 0 | bitbuffer[(i / sizeof(z_word_t) + 21) % bitbuffersizezwords] ^= next22; |
391 | |
|
392 | 0 | for (int j = 14870; j < 14870 + 64; j++) { |
393 | 0 | bitbuffer[(j + (i / sizeof(z_word_t))) % bitbuffersizezwords] = 0; |
394 | 0 | } |
395 | |
|
396 | 0 | uint64_t next1_64 = 0; |
397 | 0 | uint64_t next2_64 = 0; |
398 | 0 | uint64_t next3_64 = 0; |
399 | 0 | uint64_t next4_64 = 0; |
400 | 0 | uint64_t next5_64 = 0; |
401 | 0 | uint64_t final[9] = {0}; |
402 | |
|
403 | 0 | for(; (i + 72 < len); i += 32) { |
404 | 0 | uint64_t in1; |
405 | 0 | uint64_t in2; |
406 | 0 | uint64_t in3; |
407 | 0 | uint64_t in4; |
408 | 0 | uint64_t a1, a2, a3, a4; |
409 | 0 | uint64_t b1, b2, b3, b4; |
410 | 0 | uint64_t c1, c2, c3, c4; |
411 | 0 | uint64_t d1, d2, d3, d4; |
412 | |
|
413 | 0 | uint64_t out1; |
414 | 0 | uint64_t out2; |
415 | 0 | uint64_t out3; |
416 | 0 | uint64_t out4; |
417 | 0 | uint64_t out5; |
418 | |
|
419 | 0 | in1 = inputqwords[i / sizeof(uint64_t)] ^ bitbufferqwords[(i / sizeof(uint64_t)) % bitbuffersizeqwords]; |
420 | 0 | in2 = inputqwords[i / sizeof(uint64_t) + 1] ^ bitbufferqwords[(i / sizeof(uint64_t) + 1) % bitbuffersizeqwords]; |
421 | | #if BYTE_ORDER == BIG_ENDIAN |
422 | | in1 = ZSWAP64(in1); |
423 | | in2 = ZSWAP64(in2); |
424 | | #endif |
425 | 0 | in1 ^= next1_64; |
426 | 0 | in2 ^= next2_64; |
427 | |
|
428 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
429 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
430 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
431 | 0 | a4 = (in1 >> 20); |
432 | |
|
433 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
434 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
435 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
436 | 0 | b4 = (in2 >> 20); |
437 | |
|
438 | 0 | in3 = inputqwords[i / sizeof(uint64_t) + 2] ^ bitbufferqwords[(i / sizeof(uint64_t) + 2) % bitbuffersizeqwords]; |
439 | 0 | in4 = inputqwords[i / sizeof(uint64_t) + 3] ^ bitbufferqwords[(i / sizeof(uint64_t) + 3) % bitbuffersizeqwords]; |
440 | | #if BYTE_ORDER == BIG_ENDIAN |
441 | | in3 = ZSWAP64(in3); |
442 | | in4 = ZSWAP64(in4); |
443 | | #endif |
444 | 0 | in3 ^= next3_64 ^ a1; |
445 | 0 | in4 ^= next4_64 ^ a2 ^ b1; |
446 | |
|
447 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
448 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
449 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
450 | 0 | c4 = (in3 >> 20); |
451 | |
|
452 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
453 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
454 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
455 | 0 | d4 = (in4 >> 20); |
456 | |
|
457 | 0 | out1 = a3 ^ b2 ^ c1; |
458 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
459 | 0 | out3 = b4 ^ c3 ^ d2; |
460 | 0 | out4 = c4 ^ d3; |
461 | 0 | out5 = d4; |
462 | |
|
463 | 0 | next1_64 = next5_64 ^ out1; |
464 | 0 | next2_64 = out2; |
465 | 0 | next3_64 = out3; |
466 | 0 | next4_64 = out4; |
467 | 0 | next5_64 = out5; |
468 | |
|
469 | 0 | } |
470 | |
|
471 | | #if BYTE_ORDER == BIG_ENDIAN |
472 | | next1_64 = ZSWAP64(next1_64); |
473 | | next2_64 = ZSWAP64(next2_64); |
474 | | next3_64 = ZSWAP64(next3_64); |
475 | | next4_64 = ZSWAP64(next4_64); |
476 | | next5_64 = ZSWAP64(next5_64); |
477 | | #endif |
478 | |
|
479 | 0 | memcpy(final, inputqwords + (i / sizeof(uint64_t)), len-i); |
480 | 0 | final[0] ^= next1_64; |
481 | 0 | final[1] ^= next2_64; |
482 | 0 | final[2] ^= next3_64; |
483 | 0 | final[3] ^= next4_64; |
484 | 0 | final[4] ^= next5_64; |
485 | |
|
486 | 0 | uint8_t* final_bytes = (uint8_t*) final; |
487 | |
|
488 | 0 | for(size_t j = 0; j < (len-i); j++) { |
489 | 0 | crc = crc_table[(crc ^ final_bytes[j] ^ bitbufferbytes[(j+i) % bitbuffersizebytes]) & 0xff] ^ (crc >> 8); |
490 | 0 | } |
491 | |
|
492 | | #if defined(__EMSCRIPTEN__) |
493 | | zng_free(bitbuffer); |
494 | | #endif |
495 | 0 | return crc; |
496 | 0 | } |
497 | | |
498 | | # if OPTIMAL_CMP == 64 |
499 | | /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */ |
500 | 0 | Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* buf, size_t len) { |
501 | 0 | const uint64_t* input = buf; |
502 | 0 | uint64_t bitbuffer[32768 / sizeof(uint64_t)]; |
503 | 0 | const uint8_t* bitbufferbytes = (const uint8_t*) bitbuffer; |
504 | 0 | memset(bitbuffer, 0, 32768); |
505 | 0 | #if BYTE_ORDER == LITTLE_ENDIAN |
506 | 0 | bitbuffer[0] = crc; |
507 | | #else |
508 | | bitbuffer[0] = ZSWAP64(crc); |
509 | | #endif |
510 | |
|
511 | 0 | crc = 0; |
512 | |
|
513 | 0 | size_t i = 0; |
514 | |
|
515 | 0 | for(; i + 300*8+64 < len; i += 64) { |
516 | 0 | uint64_t in1, in2, in3, in4; |
517 | 0 | uint64_t in5, in6, in7, in8; |
518 | 0 | size_t inoffset = (i/8); |
519 | |
|
520 | 0 | in1 = input[i / sizeof(uint64_t) + 0] ^ bitbuffer[inoffset + 0]; |
521 | 0 | in2 = input[i / sizeof(uint64_t) + 1] ^ bitbuffer[inoffset + 1]; |
522 | 0 | in3 = input[i / sizeof(uint64_t) + 2] ^ bitbuffer[inoffset + 2]; |
523 | 0 | in4 = input[i / sizeof(uint64_t) + 3] ^ bitbuffer[inoffset + 3]; |
524 | 0 | in5 = input[i / sizeof(uint64_t) + 4] ^ bitbuffer[inoffset + 4]; |
525 | 0 | in6 = input[i / sizeof(uint64_t) + 5] ^ bitbuffer[inoffset + 5]; |
526 | 0 | in7 = input[i / sizeof(uint64_t) + 6] ^ bitbuffer[inoffset + 6]; |
527 | 0 | in8 = input[i / sizeof(uint64_t) + 7] ^ bitbuffer[inoffset + 7]; |
528 | | |
529 | | // [0, 145, 183, 211] |
530 | |
|
531 | 0 | bitbuffer[(i/8 + 0 + 145)] ^= in1; |
532 | 0 | bitbuffer[(i/8 + 1 + 145)] ^= in2; |
533 | 0 | bitbuffer[(i/8 + 2 + 145)] ^= in3; |
534 | 0 | bitbuffer[(i/8 + 3 + 145)] ^= in4; |
535 | 0 | bitbuffer[(i/8 + 4 + 145)] ^= in5; |
536 | 0 | bitbuffer[(i/8 + 5 + 145)] ^= in6; |
537 | 0 | bitbuffer[(i/8 + 6 + 145)] ^= in7; |
538 | 0 | bitbuffer[(i/8 + 7 + 145)] ^= in8; |
539 | |
|
540 | 0 | bitbuffer[(i/8 + 0 + 183)] ^= in1; |
541 | 0 | bitbuffer[(i/8 + 1 + 183)] ^= in2; |
542 | 0 | bitbuffer[(i/8 + 2 + 183)] ^= in3; |
543 | 0 | bitbuffer[(i/8 + 3 + 183)] ^= in4; |
544 | 0 | bitbuffer[(i/8 + 4 + 183)] ^= in5; |
545 | 0 | bitbuffer[(i/8 + 5 + 183)] ^= in6; |
546 | 0 | bitbuffer[(i/8 + 6 + 183)] ^= in7; |
547 | 0 | bitbuffer[(i/8 + 7 + 183)] ^= in8; |
548 | |
|
549 | 0 | bitbuffer[(i/8 + 0 + 211)] ^= in1; |
550 | 0 | bitbuffer[(i/8 + 1 + 211)] ^= in2; |
551 | 0 | bitbuffer[(i/8 + 2 + 211)] ^= in3; |
552 | 0 | bitbuffer[(i/8 + 3 + 211)] ^= in4; |
553 | 0 | bitbuffer[(i/8 + 4 + 211)] ^= in5; |
554 | 0 | bitbuffer[(i/8 + 5 + 211)] ^= in6; |
555 | 0 | bitbuffer[(i/8 + 6 + 211)] ^= in7; |
556 | 0 | bitbuffer[(i/8 + 7 + 211)] ^= in8; |
557 | |
|
558 | 0 | bitbuffer[(i/8 + 0 + 300)] = in1; |
559 | 0 | bitbuffer[(i/8 + 1 + 300)] = in2; |
560 | 0 | bitbuffer[(i/8 + 2 + 300)] = in3; |
561 | 0 | bitbuffer[(i/8 + 3 + 300)] = in4; |
562 | 0 | bitbuffer[(i/8 + 4 + 300)] = in5; |
563 | 0 | bitbuffer[(i/8 + 5 + 300)] = in6; |
564 | 0 | bitbuffer[(i/8 + 6 + 300)] = in7; |
565 | 0 | bitbuffer[(i/8 + 7 + 300)] = in8; |
566 | 0 | } |
567 | |
|
568 | 0 | uint64_t next1_64 = 0; |
569 | 0 | uint64_t next2_64 = 0; |
570 | 0 | uint64_t next3_64 = 0; |
571 | 0 | uint64_t next4_64 = 0; |
572 | 0 | uint64_t next5_64 = 0; |
573 | 0 | uint64_t final[9] = {0}; |
574 | |
|
575 | 0 | for(; (i + 72 < len); i += 32) { |
576 | 0 | uint64_t in1; |
577 | 0 | uint64_t in2; |
578 | 0 | uint64_t in3; |
579 | 0 | uint64_t in4; |
580 | 0 | uint64_t a1, a2, a3, a4; |
581 | 0 | uint64_t b1, b2, b3, b4; |
582 | 0 | uint64_t c1, c2, c3, c4; |
583 | 0 | uint64_t d1, d2, d3, d4; |
584 | |
|
585 | 0 | uint64_t out1; |
586 | 0 | uint64_t out2; |
587 | 0 | uint64_t out3; |
588 | 0 | uint64_t out4; |
589 | 0 | uint64_t out5; |
590 | |
|
591 | 0 | in1 = input[i / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t))]; |
592 | 0 | in2 = input[(i + 8) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)]; |
593 | | #if BYTE_ORDER == BIG_ENDIAN |
594 | | in1 = ZSWAP64(in1); |
595 | | in2 = ZSWAP64(in2); |
596 | | #endif |
597 | 0 | in1 ^= next1_64; |
598 | 0 | in2 ^= next2_64; |
599 | |
|
600 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
601 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
602 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
603 | 0 | a4 = (in1 >> 20); |
604 | |
|
605 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
606 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
607 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
608 | 0 | b4 = (in2 >> 20); |
609 | |
|
610 | 0 | in3 = input[(i + 16) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 2)]; |
611 | 0 | in4 = input[(i + 24) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 3)]; |
612 | | #if BYTE_ORDER == BIG_ENDIAN |
613 | | in3 = ZSWAP64(in3); |
614 | | in4 = ZSWAP64(in4); |
615 | | #endif |
616 | 0 | in3 ^= next3_64 ^ a1; |
617 | 0 | in4 ^= next4_64 ^ a2 ^ b1; |
618 | |
|
619 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
620 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
621 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
622 | 0 | c4 = (in3 >> 20); |
623 | |
|
624 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
625 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
626 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
627 | 0 | d4 = (in4 >> 20); |
628 | |
|
629 | 0 | out1 = a3 ^ b2 ^ c1; |
630 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
631 | 0 | out3 = b4 ^ c3 ^ d2; |
632 | 0 | out4 = c4 ^ d3; |
633 | 0 | out5 = d4; |
634 | |
|
635 | 0 | next1_64 = next5_64 ^ out1; |
636 | 0 | next2_64 = out2; |
637 | 0 | next3_64 = out3; |
638 | 0 | next4_64 = out4; |
639 | 0 | next5_64 = out5; |
640 | |
|
641 | 0 | } |
642 | |
|
643 | | #if BYTE_ORDER == BIG_ENDIAN |
644 | | next1_64 = ZSWAP64(next1_64); |
645 | | next2_64 = ZSWAP64(next2_64); |
646 | | next3_64 = ZSWAP64(next3_64); |
647 | | next4_64 = ZSWAP64(next4_64); |
648 | | next5_64 = ZSWAP64(next5_64); |
649 | | #endif |
650 | |
|
651 | 0 | memcpy(final, input+(i / sizeof(uint64_t)), len-i); |
652 | 0 | final[0] ^= next1_64; |
653 | 0 | final[1] ^= next2_64; |
654 | 0 | final[2] ^= next3_64; |
655 | 0 | final[3] ^= next4_64; |
656 | 0 | final[4] ^= next5_64; |
657 | |
|
658 | 0 | uint8_t* final_bytes = (uint8_t*) final; |
659 | |
|
660 | 0 | for (size_t j = 0; j < (len-i); j++) { |
661 | 0 | crc = crc_table[(crc ^ final_bytes[j] ^ bitbufferbytes[(j+i)]) & 0xff] ^ (crc >> 8); |
662 | 0 | } |
663 | |
|
664 | 0 | return crc; |
665 | 0 | } |
666 | | |
667 | | /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */ |
668 | 0 | Z_INTERNAL uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* buf, size_t len) { |
669 | 0 | const uint64_t* input = buf; |
670 | 0 | uint64_t final[9] = {0}; |
671 | 0 | uint64_t next1 = crc; |
672 | 0 | crc = 0; |
673 | 0 | uint64_t next2 = 0; |
674 | 0 | uint64_t next3 = 0; |
675 | 0 | uint64_t next4 = 0; |
676 | 0 | uint64_t next5 = 0; |
677 | |
|
678 | 0 | size_t i = 0; |
679 | | |
680 | | /* This is weird, doing for vs while drops 10% off the exec time */ |
681 | 0 | for(; (i + 256 + 40 + 32 + 32) < len; i += 32) { |
682 | 0 | uint64_t in1; |
683 | 0 | uint64_t in2; |
684 | 0 | uint64_t in3; |
685 | 0 | uint64_t in4; |
686 | 0 | uint64_t a1, a2, a3, a4; |
687 | 0 | uint64_t b1, b2, b3, b4; |
688 | 0 | uint64_t c1, c2, c3, c4; |
689 | 0 | uint64_t d1, d2, d3, d4; |
690 | |
|
691 | 0 | uint64_t out1; |
692 | 0 | uint64_t out2; |
693 | 0 | uint64_t out3; |
694 | 0 | uint64_t out4; |
695 | 0 | uint64_t out5; |
696 | |
|
697 | 0 | uint64_t chorba1 = input[i / sizeof(uint64_t)]; |
698 | 0 | uint64_t chorba2 = input[i / sizeof(uint64_t) + 1]; |
699 | 0 | uint64_t chorba3 = input[i / sizeof(uint64_t) + 2]; |
700 | 0 | uint64_t chorba4 = input[i / sizeof(uint64_t) + 3]; |
701 | 0 | uint64_t chorba5 = input[i / sizeof(uint64_t) + 4]; |
702 | 0 | uint64_t chorba6 = input[i / sizeof(uint64_t) + 5]; |
703 | 0 | uint64_t chorba7 = input[i / sizeof(uint64_t) + 6]; |
704 | 0 | uint64_t chorba8 = input[i / sizeof(uint64_t) + 7]; |
705 | | #if BYTE_ORDER == BIG_ENDIAN |
706 | | chorba1 = ZSWAP64(chorba1); |
707 | | chorba2 = ZSWAP64(chorba2); |
708 | | chorba3 = ZSWAP64(chorba3); |
709 | | chorba4 = ZSWAP64(chorba4); |
710 | | chorba5 = ZSWAP64(chorba5); |
711 | | chorba6 = ZSWAP64(chorba6); |
712 | | chorba7 = ZSWAP64(chorba7); |
713 | | chorba8 = ZSWAP64(chorba8); |
714 | | #endif |
715 | 0 | chorba1 ^= next1; |
716 | 0 | chorba2 ^= next2; |
717 | 0 | chorba3 ^= next3; |
718 | 0 | chorba4 ^= next4; |
719 | 0 | chorba5 ^= next5; |
720 | 0 | chorba7 ^= chorba1; |
721 | 0 | chorba8 ^= chorba2; |
722 | 0 | i += 8 * 8; |
723 | | |
724 | | /* 0-3 */ |
725 | 0 | in1 = input[i / sizeof(uint64_t)]; |
726 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
727 | | #if BYTE_ORDER == BIG_ENDIAN |
728 | | in1 = ZSWAP64(in1); |
729 | | in2 = ZSWAP64(in2); |
730 | | #endif |
731 | 0 | in1 ^= chorba3; |
732 | 0 | in2 ^= chorba4 ^ chorba1; |
733 | |
|
734 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
735 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
736 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
737 | 0 | a4 = (in1 >> 20); |
738 | |
|
739 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
740 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
741 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
742 | 0 | b4 = (in2 >> 20); |
743 | |
|
744 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
745 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
746 | | #if BYTE_ORDER == BIG_ENDIAN |
747 | | in3 = ZSWAP64(in3); |
748 | | in4 = ZSWAP64(in4); |
749 | | #endif |
750 | 0 | in3 ^= a1 ^ chorba5 ^ chorba2 ^ chorba1; |
751 | 0 | in4 ^= a2 ^ b1 ^ chorba6 ^ chorba3 ^ chorba2; |
752 | |
|
753 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
754 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
755 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
756 | 0 | c4 = (in3 >> 20); |
757 | |
|
758 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
759 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
760 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
761 | 0 | d4 = (in4 >> 20); |
762 | |
|
763 | 0 | out1 = a3 ^ b2 ^ c1; |
764 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
765 | 0 | out3 = b4 ^ c3 ^ d2; |
766 | 0 | out4 = c4 ^ d3; |
767 | 0 | out5 = d4; |
768 | |
|
769 | 0 | next1 = out1; |
770 | 0 | next2 = out2; |
771 | 0 | next3 = out3; |
772 | 0 | next4 = out4; |
773 | 0 | next5 = out5; |
774 | |
|
775 | 0 | i += 32; |
776 | | |
777 | | /* 4-7 */ |
778 | 0 | in1 = input[i / sizeof(uint64_t)]; |
779 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
780 | | #if BYTE_ORDER == BIG_ENDIAN |
781 | | in1 = ZSWAP64(in1); |
782 | | in2 = ZSWAP64(in2); |
783 | | #endif |
784 | 0 | in1 ^= next1 ^ chorba7 ^ chorba4 ^ chorba3; |
785 | 0 | in2 ^= next2 ^ chorba8 ^ chorba5 ^ chorba4; |
786 | |
|
787 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
788 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
789 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
790 | 0 | a4 = (in1 >> 20); |
791 | |
|
792 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
793 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
794 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
795 | 0 | b4 = (in2 >> 20); |
796 | |
|
797 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
798 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
799 | | #if BYTE_ORDER == BIG_ENDIAN |
800 | | in3 = ZSWAP64(in3); |
801 | | in4 = ZSWAP64(in4); |
802 | | #endif |
803 | 0 | in3 ^= next3 ^ a1 ^ chorba6 ^ chorba5; |
804 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba7 ^ chorba6; |
805 | |
|
806 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
807 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
808 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
809 | 0 | c4 = (in3 >> 20); |
810 | |
|
811 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
812 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
813 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
814 | 0 | d4 = (in4 >> 20); |
815 | |
|
816 | 0 | out1 = a3 ^ b2 ^ c1; |
817 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
818 | 0 | out3 = b4 ^ c3 ^ d2; |
819 | 0 | out4 = c4 ^ d3; |
820 | 0 | out5 = d4; |
821 | |
|
822 | 0 | next1 = next5 ^ out1; |
823 | 0 | next2 = out2; |
824 | 0 | next3 = out3; |
825 | 0 | next4 = out4; |
826 | 0 | next5 = out5; |
827 | |
|
828 | 0 | i += 32; |
829 | | |
830 | | /* 8-11 */ |
831 | 0 | in1 = input[i / sizeof(uint64_t)]; |
832 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
833 | | #if BYTE_ORDER == BIG_ENDIAN |
834 | | in1 = ZSWAP64(in1); |
835 | | in2 = ZSWAP64(in2); |
836 | | #endif |
837 | 0 | in1 ^= next1 ^ chorba8 ^ chorba7 ^ chorba1; |
838 | 0 | in2 ^= next2 ^ chorba8 ^ chorba2; |
839 | |
|
840 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
841 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
842 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
843 | 0 | a4 = (in1 >> 20); |
844 | |
|
845 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
846 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
847 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
848 | 0 | b4 = (in2 >> 20); |
849 | |
|
850 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
851 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
852 | | #if BYTE_ORDER == BIG_ENDIAN |
853 | | in3 = ZSWAP64(in3); |
854 | | in4 = ZSWAP64(in4); |
855 | | #endif |
856 | 0 | in3 ^= next3 ^ a1 ^ chorba3; |
857 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba4; |
858 | |
|
859 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
860 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
861 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
862 | 0 | c4 = (in3 >> 20); |
863 | |
|
864 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
865 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
866 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
867 | 0 | d4 = (in4 >> 20); |
868 | |
|
869 | 0 | out1 = a3 ^ b2 ^ c1; |
870 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
871 | 0 | out3 = b4 ^ c3 ^ d2; |
872 | 0 | out4 = c4 ^ d3; |
873 | 0 | out5 = d4; |
874 | |
|
875 | 0 | next1 = next5 ^ out1; |
876 | 0 | next2 = out2; |
877 | 0 | next3 = out3; |
878 | 0 | next4 = out4; |
879 | 0 | next5 = out5; |
880 | |
|
881 | 0 | i += 32; |
882 | | |
883 | | /* 12-15 */ |
884 | 0 | in1 = input[i / sizeof(uint64_t)]; |
885 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
886 | | #if BYTE_ORDER == BIG_ENDIAN |
887 | | in1 = ZSWAP64(in1); |
888 | | in2 = ZSWAP64(in2); |
889 | | #endif |
890 | 0 | in1 ^= next1 ^ chorba5 ^ chorba1; |
891 | 0 | in2 ^= next2 ^ chorba6 ^ chorba2 ^ chorba1; |
892 | |
|
893 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
894 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
895 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
896 | 0 | a4 = (in1 >> 20); |
897 | |
|
898 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
899 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
900 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
901 | 0 | b4 = (in2 >> 20); |
902 | |
|
903 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
904 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
905 | | #if BYTE_ORDER == BIG_ENDIAN |
906 | | in3 = ZSWAP64(in3); |
907 | | in4 = ZSWAP64(in4); |
908 | | #endif |
909 | 0 | in3 ^= next3 ^ a1 ^ chorba7 ^ chorba3 ^ chorba2 ^ chorba1; |
910 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba8 ^ chorba4 ^ chorba3 ^ chorba2; |
911 | |
|
912 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
913 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
914 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
915 | 0 | c4 = (in3 >> 20); |
916 | |
|
917 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
918 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
919 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
920 | 0 | d4 = (in4 >> 20); |
921 | |
|
922 | 0 | out1 = a3 ^ b2 ^ c1; |
923 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
924 | 0 | out3 = b4 ^ c3 ^ d2; |
925 | 0 | out4 = c4 ^ d3; |
926 | 0 | out5 = d4; |
927 | |
|
928 | 0 | next1 = next5 ^ out1; |
929 | 0 | next2 = out2; |
930 | 0 | next3 = out3; |
931 | 0 | next4 = out4; |
932 | 0 | next5 = out5; |
933 | |
|
934 | 0 | i += 32; |
935 | | |
936 | | /* 16-19 */ |
937 | 0 | in1 = input[i / sizeof(uint64_t)]; |
938 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
939 | | #if BYTE_ORDER == BIG_ENDIAN |
940 | | in1 = ZSWAP64(in1); |
941 | | in2 = ZSWAP64(in2); |
942 | | #endif |
943 | 0 | in1 ^= next1 ^ chorba5 ^ chorba4 ^ chorba3 ^ chorba1; |
944 | 0 | in2 ^= next2 ^ chorba6 ^ chorba5 ^ chorba4 ^ chorba1 ^ chorba2; |
945 | |
|
946 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
947 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
948 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
949 | 0 | a4 = (in1 >> 20); |
950 | |
|
951 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
952 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
953 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
954 | 0 | b4 = (in2 >> 20); |
955 | |
|
956 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
957 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
958 | | #if BYTE_ORDER == BIG_ENDIAN |
959 | | in3 = ZSWAP64(in3); |
960 | | in4 = ZSWAP64(in4); |
961 | | #endif |
962 | 0 | in3 ^= next3 ^ a1 ^ chorba7 ^ chorba6 ^ chorba5 ^ chorba2 ^ chorba3; |
963 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba8 ^ chorba7 ^ chorba6 ^ chorba3 ^ chorba4 ^ chorba1; |
964 | |
|
965 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
966 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
967 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
968 | 0 | c4 = (in3 >> 20); |
969 | |
|
970 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
971 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
972 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
973 | 0 | d4 = (in4 >> 20); |
974 | |
|
975 | 0 | out1 = a3 ^ b2 ^ c1; |
976 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
977 | 0 | out3 = b4 ^ c3 ^ d2; |
978 | 0 | out4 = c4 ^ d3; |
979 | 0 | out5 = d4; |
980 | |
|
981 | 0 | next1 = next5 ^ out1; |
982 | 0 | next2 = out2; |
983 | 0 | next3 = out3; |
984 | 0 | next4 = out4; |
985 | 0 | next5 = out5; |
986 | |
|
987 | 0 | i += 32; |
988 | | |
989 | | /* 20-23 */ |
990 | 0 | in1 = input[i / sizeof(uint64_t)]; |
991 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
992 | | #if BYTE_ORDER == BIG_ENDIAN |
993 | | in1 = ZSWAP64(in1); |
994 | | in2 = ZSWAP64(in2); |
995 | | #endif |
996 | 0 | in1 ^= next1 ^ chorba8 ^ chorba7 ^ chorba4 ^ chorba5 ^ chorba2 ^ chorba1; |
997 | 0 | in2 ^= next2 ^ chorba8 ^ chorba5 ^ chorba6 ^ chorba3 ^ chorba2; |
998 | |
|
999 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
1000 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
1001 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
1002 | 0 | a4 = (in1 >> 20); |
1003 | |
|
1004 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
1005 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
1006 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
1007 | 0 | b4 = (in2 >> 20); |
1008 | |
|
1009 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
1010 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
1011 | | #if BYTE_ORDER == BIG_ENDIAN |
1012 | | in3 = ZSWAP64(in3); |
1013 | | in4 = ZSWAP64(in4); |
1014 | | #endif |
1015 | 0 | in3 ^= next3 ^ a1 ^ chorba7 ^ chorba6 ^ chorba4 ^ chorba3 ^ chorba1; |
1016 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba8 ^ chorba7 ^ chorba5 ^ chorba4 ^ chorba2 ^ chorba1; |
1017 | |
|
1018 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
1019 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
1020 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
1021 | 0 | c4 = (in3 >> 20); |
1022 | |
|
1023 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
1024 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
1025 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
1026 | 0 | d4 = (in4 >> 20); |
1027 | |
|
1028 | 0 | out1 = a3 ^ b2 ^ c1; |
1029 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
1030 | 0 | out3 = b4 ^ c3 ^ d2; |
1031 | 0 | out4 = c4 ^ d3; |
1032 | 0 | out5 = d4; |
1033 | |
|
1034 | 0 | next1 = next5 ^ out1; |
1035 | 0 | next2 = out2; |
1036 | 0 | next3 = out3; |
1037 | 0 | next4 = out4; |
1038 | 0 | next5 = out5; |
1039 | |
|
1040 | 0 | i += 32; |
1041 | | |
1042 | | /* 24-27 */ |
1043 | 0 | in1 = input[i / sizeof(uint64_t)]; |
1044 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
1045 | | #if BYTE_ORDER == BIG_ENDIAN |
1046 | | in1 = ZSWAP64(in1); |
1047 | | in2 = ZSWAP64(in2); |
1048 | | #endif |
1049 | 0 | in1 ^= next1 ^ chorba8 ^ chorba6 ^ chorba5 ^ chorba3 ^ chorba2 ^ chorba1; |
1050 | 0 | in2 ^= next2 ^ chorba7 ^ chorba6 ^ chorba4 ^ chorba3 ^ chorba2; |
1051 | |
|
1052 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
1053 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
1054 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
1055 | 0 | a4 = (in1 >> 20); |
1056 | |
|
1057 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
1058 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
1059 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
1060 | 0 | b4 = (in2 >> 20); |
1061 | |
|
1062 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
1063 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
1064 | | #if BYTE_ORDER == BIG_ENDIAN |
1065 | | in3 = ZSWAP64(in3); |
1066 | | in4 = ZSWAP64(in4); |
1067 | | #endif |
1068 | 0 | in3 ^= next3 ^ a1 ^ chorba8 ^ chorba7 ^ chorba5 ^ chorba4 ^ chorba3; |
1069 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba8 ^ chorba6 ^ chorba5 ^ chorba4; |
1070 | |
|
1071 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
1072 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
1073 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
1074 | 0 | c4 = (in3 >> 20); |
1075 | |
|
1076 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
1077 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
1078 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
1079 | 0 | d4 = (in4 >> 20); |
1080 | |
|
1081 | 0 | out1 = a3 ^ b2 ^ c1; |
1082 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
1083 | 0 | out3 = b4 ^ c3 ^ d2; |
1084 | 0 | out4 = c4 ^ d3; |
1085 | 0 | out5 = d4; |
1086 | |
|
1087 | 0 | next1 = next5 ^ out1; |
1088 | 0 | next2 = out2; |
1089 | 0 | next3 = out3; |
1090 | 0 | next4 = out4; |
1091 | 0 | next5 = out5; |
1092 | |
|
1093 | 0 | i += 32; |
1094 | | |
1095 | | /* 28-31 */ |
1096 | 0 | in1 = input[i / sizeof(uint64_t)]; |
1097 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
1098 | | #if BYTE_ORDER == BIG_ENDIAN |
1099 | | in1 = ZSWAP64(in1); |
1100 | | in2 = ZSWAP64(in2); |
1101 | | #endif |
1102 | 0 | in1 ^= next1 ^ chorba7 ^ chorba6 ^ chorba5; |
1103 | 0 | in2 ^= next2 ^ chorba8 ^ chorba7 ^ chorba6; |
1104 | |
|
1105 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
1106 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
1107 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
1108 | 0 | a4 = (in1 >> 20); |
1109 | |
|
1110 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
1111 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
1112 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
1113 | 0 | b4 = (in2 >> 20); |
1114 | |
|
1115 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
1116 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
1117 | | #if BYTE_ORDER == BIG_ENDIAN |
1118 | | in3 = ZSWAP64(in3); |
1119 | | in4 = ZSWAP64(in4); |
1120 | | #endif |
1121 | 0 | in3 ^= next3 ^ a1 ^ chorba8 ^ chorba7; |
1122 | 0 | in4 ^= next4 ^ a2 ^ b1 ^ chorba8; |
1123 | |
|
1124 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
1125 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
1126 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
1127 | 0 | c4 = (in3 >> 20); |
1128 | |
|
1129 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
1130 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
1131 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
1132 | 0 | d4 = (in4 >> 20); |
1133 | |
|
1134 | 0 | out1 = a3 ^ b2 ^ c1; |
1135 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
1136 | 0 | out3 = b4 ^ c3 ^ d2; |
1137 | 0 | out4 = c4 ^ d3; |
1138 | 0 | out5 = d4; |
1139 | |
|
1140 | 0 | next1 = next5 ^ out1; |
1141 | 0 | next2 = out2; |
1142 | 0 | next3 = out3; |
1143 | 0 | next4 = out4; |
1144 | 0 | next5 = out5; |
1145 | 0 | } |
1146 | |
|
1147 | 0 | for(; (i + 40 + 32) < len; i += 32) { |
1148 | 0 | uint64_t in1; |
1149 | 0 | uint64_t in2; |
1150 | 0 | uint64_t in3; |
1151 | 0 | uint64_t in4; |
1152 | 0 | uint64_t a1, a2, a3, a4; |
1153 | 0 | uint64_t b1, b2, b3, b4; |
1154 | 0 | uint64_t c1, c2, c3, c4; |
1155 | 0 | uint64_t d1, d2, d3, d4; |
1156 | |
|
1157 | 0 | uint64_t out1; |
1158 | 0 | uint64_t out2; |
1159 | 0 | uint64_t out3; |
1160 | 0 | uint64_t out4; |
1161 | 0 | uint64_t out5; |
1162 | |
|
1163 | 0 | in1 = input[i / sizeof(uint64_t)]; |
1164 | 0 | in2 = input[i / sizeof(uint64_t) + 1]; |
1165 | | #if BYTE_ORDER == BIG_ENDIAN |
1166 | | in1 = ZSWAP64(in1); |
1167 | | in2 = ZSWAP64(in2); |
1168 | | #endif |
1169 | 0 | in1 ^=next1; |
1170 | 0 | in2 ^=next2; |
1171 | |
|
1172 | 0 | a1 = (in1 << 17) ^ (in1 << 55); |
1173 | 0 | a2 = (in1 >> 47) ^ (in1 >> 9) ^ (in1 << 19); |
1174 | 0 | a3 = (in1 >> 45) ^ (in1 << 44); |
1175 | 0 | a4 = (in1 >> 20); |
1176 | |
|
1177 | 0 | b1 = (in2 << 17) ^ (in2 << 55); |
1178 | 0 | b2 = (in2 >> 47) ^ (in2 >> 9) ^ (in2 << 19); |
1179 | 0 | b3 = (in2 >> 45) ^ (in2 << 44); |
1180 | 0 | b4 = (in2 >> 20); |
1181 | |
|
1182 | 0 | in3 = input[i / sizeof(uint64_t) + 2]; |
1183 | 0 | in4 = input[i / sizeof(uint64_t) + 3]; |
1184 | | #if BYTE_ORDER == BIG_ENDIAN |
1185 | | in3 = ZSWAP64(in3); |
1186 | | in4 = ZSWAP64(in4); |
1187 | | #endif |
1188 | 0 | in3 ^= next3 ^ a1; |
1189 | 0 | in4 ^= next4 ^ a2 ^ b1; |
1190 | |
|
1191 | 0 | c1 = (in3 << 17) ^ (in3 << 55); |
1192 | 0 | c2 = (in3 >> 47) ^ (in3 >> 9) ^ (in3 << 19); |
1193 | 0 | c3 = (in3 >> 45) ^ (in3 << 44); |
1194 | 0 | c4 = (in3 >> 20); |
1195 | |
|
1196 | 0 | d1 = (in4 << 17) ^ (in4 << 55); |
1197 | 0 | d2 = (in4 >> 47) ^ (in4 >> 9) ^ (in4 << 19); |
1198 | 0 | d3 = (in4 >> 45) ^ (in4 << 44); |
1199 | 0 | d4 = (in4 >> 20); |
1200 | |
|
1201 | 0 | out1 = a3 ^ b2 ^ c1; |
1202 | 0 | out2 = a4 ^ b3 ^ c2 ^ d1; |
1203 | 0 | out3 = b4 ^ c3 ^ d2; |
1204 | 0 | out4 = c4 ^ d3; |
1205 | 0 | out5 = d4; |
1206 | |
|
1207 | 0 | next1 = next5 ^ out1; |
1208 | 0 | next2 = out2; |
1209 | 0 | next3 = out3; |
1210 | 0 | next4 = out4; |
1211 | 0 | next5 = out5; |
1212 | 0 | } |
1213 | |
|
1214 | | #if BYTE_ORDER == BIG_ENDIAN |
1215 | | next1 = ZSWAP64(next1); |
1216 | | next2 = ZSWAP64(next2); |
1217 | | next3 = ZSWAP64(next3); |
1218 | | next4 = ZSWAP64(next4); |
1219 | | next5 = ZSWAP64(next5); |
1220 | | #endif |
1221 | |
|
1222 | 0 | memcpy(final, input+(i / sizeof(uint64_t)), len-i); |
1223 | 0 | final[0] ^= next1; |
1224 | 0 | final[1] ^= next2; |
1225 | 0 | final[2] ^= next3; |
1226 | 0 | final[3] ^= next4; |
1227 | 0 | final[4] ^= next5; |
1228 | |
|
1229 | 0 | crc = crc32_braid_internal(crc, (uint8_t*) final, len-i); |
1230 | |
|
1231 | 0 | return crc; |
1232 | 0 | } |
1233 | | |
1234 | | #else // OPTIMAL_CMP == 64 |
1235 | | |
1236 | | Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* buf, size_t len) { |
1237 | | const uint32_t* input = buf; |
1238 | | uint32_t final[20] = {0}; |
1239 | | |
1240 | | uint32_t next1 = crc; |
1241 | | crc = 0; |
1242 | | uint32_t next2 = 0; |
1243 | | uint32_t next3 = 0; |
1244 | | uint32_t next4 = 0; |
1245 | | uint32_t next5 = 0; |
1246 | | uint32_t next6 = 0; |
1247 | | uint32_t next7 = 0; |
1248 | | uint32_t next8 = 0; |
1249 | | uint32_t next9 = 0; |
1250 | | uint32_t next10 = 0; |
1251 | | |
1252 | | size_t i = 0; |
1253 | | for(; i + 80 < len; i += 40) { |
1254 | | uint32_t in1; |
1255 | | uint32_t in2; |
1256 | | uint32_t in3; |
1257 | | uint32_t in4; |
1258 | | uint32_t in5; |
1259 | | uint32_t in6; |
1260 | | uint32_t in7; |
1261 | | uint32_t in8; |
1262 | | uint32_t in9; |
1263 | | uint32_t in10; |
1264 | | |
1265 | | uint32_t a1, a2, a3, a4, a6, a7; |
1266 | | uint32_t b1, b2, b3, b4, b6, b7; |
1267 | | uint32_t c1, c2, c3, c4, c6, c7; |
1268 | | uint32_t d1, d2, d3, d4, d6, d7; |
1269 | | uint32_t e1, e2, e3, e4, e6, e7; |
1270 | | uint32_t f1, f2, f3, f4, f6, f7; |
1271 | | uint32_t g1, g2, g3, g4, g6, g7; |
1272 | | uint32_t h1, h2, h3, h4, h6, h7; |
1273 | | uint32_t i1, i2, i3, i4, i6, i7; |
1274 | | uint32_t j1, j2, j3, j4, j6, j7; |
1275 | | |
1276 | | uint32_t out1; |
1277 | | uint32_t out2; |
1278 | | uint32_t out3; |
1279 | | uint32_t out4; |
1280 | | uint32_t out5; |
1281 | | uint32_t out6; |
1282 | | uint32_t out7; |
1283 | | uint32_t out8; |
1284 | | uint32_t out9; |
1285 | | uint32_t out10; |
1286 | | |
1287 | | in1 = input[i/sizeof(uint32_t) + 0]; |
1288 | | in2 = input[i/sizeof(uint32_t) + 1]; |
1289 | | in3 = input[i/sizeof(uint32_t) + 2]; |
1290 | | in4 = input[i/sizeof(uint32_t) + 3]; |
1291 | | #if BYTE_ORDER == BIG_ENDIAN |
1292 | | in1 = ZSWAP32(in1); |
1293 | | in2 = ZSWAP32(in2); |
1294 | | in3 = ZSWAP32(in3); |
1295 | | in4 = ZSWAP32(in4); |
1296 | | #endif |
1297 | | in1 ^= next1; |
1298 | | in2 ^= next2; |
1299 | | in3 ^= next3; |
1300 | | in4 ^= next4; |
1301 | | |
1302 | | a1 = (in1 << 17); |
1303 | | a2 = (in1 >> 15) ^ (in1 << 23); |
1304 | | a3 = (in1 >> 9) ^ (in1 << 19); |
1305 | | a4 = (in1 >> 13); |
1306 | | a6 = (in1 << 12); |
1307 | | a7 = (in1 >> 20); |
1308 | | |
1309 | | b1 = (in2 << 17); |
1310 | | b2 = (in2 >> 15) ^ (in2 << 23); |
1311 | | b3 = (in2 >> 9) ^ (in2 << 19); |
1312 | | b4 = (in2 >> 13); |
1313 | | b6 = (in2 << 12); |
1314 | | b7 = (in2 >> 20); |
1315 | | |
1316 | | c1 = (in3 << 17); |
1317 | | c2 = (in3 >> 15) ^ (in3 << 23); |
1318 | | c3 = (in3 >> 9) ^ (in3 << 19); |
1319 | | c4 = (in3 >> 13); |
1320 | | c6 = (in3 << 12); |
1321 | | c7 = (in3 >> 20); |
1322 | | |
1323 | | d1 = (in4 << 17); |
1324 | | d2 = (in4 >> 15) ^ (in4 << 23); |
1325 | | d3 = (in4 >> 9) ^ (in4 << 19); |
1326 | | d4 = (in4 >> 13); |
1327 | | d6 = (in4 << 12); |
1328 | | d7 = (in4 >> 20); |
1329 | | |
1330 | | in5 = input[i/sizeof(uint32_t) + 4]; |
1331 | | in6 = input[i/sizeof(uint32_t) + 5]; |
1332 | | in7 = input[i/sizeof(uint32_t) + 6]; |
1333 | | in8 = input[i/sizeof(uint32_t) + 7]; |
1334 | | #if BYTE_ORDER == BIG_ENDIAN |
1335 | | in5 = ZSWAP32(in5); |
1336 | | in6 = ZSWAP32(in6); |
1337 | | in7 = ZSWAP32(in7); |
1338 | | in8 = ZSWAP32(in8); |
1339 | | #endif |
1340 | | in5 ^= next5 ^ a1; |
1341 | | in6 ^= next6 ^ a2 ^ b1; |
1342 | | in7 ^= next7 ^ a3 ^ b2 ^ c1; |
1343 | | in8 ^= next8 ^ a4 ^ b3 ^ c2 ^ d1; |
1344 | | |
1345 | | e1 = (in5 << 17); |
1346 | | e2 = (in5 >> 15) ^ (in5 << 23); |
1347 | | e3 = (in5 >> 9) ^ (in5 << 19); |
1348 | | e4 = (in5 >> 13); |
1349 | | e6 = (in5 << 12); |
1350 | | e7 = (in5 >> 20); |
1351 | | |
1352 | | f1 = (in6 << 17); |
1353 | | f2 = (in6 >> 15) ^ (in6 << 23); |
1354 | | f3 = (in6 >> 9) ^ (in6 << 19); |
1355 | | f4 = (in6 >> 13); |
1356 | | f6 = (in6 << 12); |
1357 | | f7 = (in6 >> 20); |
1358 | | |
1359 | | g1 = (in7 << 17); |
1360 | | g2 = (in7 >> 15) ^ (in7 << 23); |
1361 | | g3 = (in7 >> 9) ^ (in7 << 19); |
1362 | | g4 = (in7 >> 13); |
1363 | | g6 = (in7 << 12); |
1364 | | g7 = (in7 >> 20); |
1365 | | |
1366 | | h1 = (in8 << 17); |
1367 | | h2 = (in8 >> 15) ^ (in8 << 23); |
1368 | | h3 = (in8 >> 9) ^ (in8 << 19); |
1369 | | h4 = (in8 >> 13); |
1370 | | h6 = (in8 << 12); |
1371 | | h7 = (in8 >> 20); |
1372 | | |
1373 | | in9 = input[i/sizeof(uint32_t) + 8]; |
1374 | | in10 = input[i/sizeof(uint32_t) + 9]; |
1375 | | #if BYTE_ORDER == BIG_ENDIAN |
1376 | | in9 = ZSWAP32(in9); |
1377 | | in10 = ZSWAP32(in10); |
1378 | | #endif |
1379 | | in9 ^= next9 ^ b4 ^ c3 ^ d2 ^ e1; |
1380 | | in10 ^= next10 ^ a6 ^ c4 ^ d3 ^ e2 ^ f1; |
1381 | | |
1382 | | i1 = (in9 << 17); |
1383 | | i2 = (in9 >> 15) ^ (in9 << 23); |
1384 | | i3 = (in9 >> 9) ^ (in9 << 19); |
1385 | | i4 = (in9 >> 13); |
1386 | | i6 = (in9 << 12); |
1387 | | i7 = (in9 >> 20); |
1388 | | |
1389 | | j1 = (in10 << 17); |
1390 | | j2 = (in10 >> 15) ^ (in10 << 23); |
1391 | | j3 = (in10 >> 9) ^ (in10 << 19); |
1392 | | j4 = (in10 >> 13); |
1393 | | j6 = (in10 << 12); |
1394 | | j7 = (in10 >> 20); |
1395 | | |
1396 | | out1 = a7 ^ b6 ^ d4 ^ e3 ^ f2 ^ g1; |
1397 | | out2 = b7 ^ c6 ^ e4 ^ f3 ^ g2 ^ h1; |
1398 | | out3 = c7 ^ d6 ^ f4 ^ g3 ^ h2 ^ i1; |
1399 | | out4 = d7 ^ e6 ^ g4 ^ h3 ^ i2 ^ j1; |
1400 | | out5 = e7 ^ f6 ^ h4 ^ i3 ^ j2; |
1401 | | out6 = f7 ^ g6 ^ i4 ^ j3; |
1402 | | out7 = g7 ^ h6 ^ j4; |
1403 | | out8 = h7 ^ i6; |
1404 | | out9 = i7 ^ j6; |
1405 | | out10 = j7; |
1406 | | |
1407 | | next1 = out1; |
1408 | | next2 = out2; |
1409 | | next3 = out3; |
1410 | | next4 = out4; |
1411 | | next5 = out5; |
1412 | | next6 = out6; |
1413 | | next7 = out7; |
1414 | | next8 = out8; |
1415 | | next9 = out9; |
1416 | | next10 = out10; |
1417 | | |
1418 | | } |
1419 | | #if BYTE_ORDER == BIG_ENDIAN |
1420 | | next1 = ZSWAP32(next1); |
1421 | | next2 = ZSWAP32(next2); |
1422 | | next3 = ZSWAP32(next3); |
1423 | | next4 = ZSWAP32(next4); |
1424 | | next5 = ZSWAP32(next5); |
1425 | | next6 = ZSWAP32(next6); |
1426 | | next7 = ZSWAP32(next7); |
1427 | | next8 = ZSWAP32(next8); |
1428 | | next9 = ZSWAP32(next9); |
1429 | | next10 = ZSWAP32(next10); |
1430 | | #endif |
1431 | | |
1432 | | memcpy(final, input+(i/sizeof(uint32_t)), len-i); |
1433 | | final[0] ^= next1; |
1434 | | final[1] ^= next2; |
1435 | | final[2] ^= next3; |
1436 | | final[3] ^= next4; |
1437 | | final[4] ^= next5; |
1438 | | final[5] ^= next6; |
1439 | | final[6] ^= next7; |
1440 | | final[7] ^= next8; |
1441 | | final[8] ^= next9; |
1442 | | final[9] ^= next10; |
1443 | | |
1444 | | crc = crc32_braid_internal(crc, (uint8_t*) final, len-i); |
1445 | | |
1446 | | return crc; |
1447 | | } |
1448 | | #endif // OPTIMAL_CMP == 64 |
1449 | | |
1450 | 0 | Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) { |
1451 | 0 | uint64_t* aligned_buf; |
1452 | 0 | uint32_t c = (~crc) & 0xffffffff; |
1453 | 0 | uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7; |
1454 | |
|
1455 | 0 | if (len > algn_diff + CHORBA_SMALL_THRESHOLD) { |
1456 | 0 | if (algn_diff) { |
1457 | 0 | c = crc32_braid_internal(c, buf, algn_diff); |
1458 | 0 | len -= algn_diff; |
1459 | 0 | } |
1460 | 0 | aligned_buf = (uint64_t*) (buf + algn_diff); |
1461 | 0 | if(len > CHORBA_LARGE_THRESHOLD) { |
1462 | 0 | c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); |
1463 | 0 | # if OPTIMAL_CMP == 64 |
1464 | 0 | } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { |
1465 | 0 | c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len); |
1466 | 0 | # endif |
1467 | 0 | } else { |
1468 | 0 | # if OPTIMAL_CMP == 64 |
1469 | 0 | c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len); |
1470 | | # else |
1471 | | c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len); |
1472 | | # endif |
1473 | 0 | } |
1474 | 0 | } else { |
1475 | | // Process too short lengths using crc32_braid |
1476 | 0 | c = crc32_braid_internal(c, buf, len); |
1477 | 0 | } |
1478 | | |
1479 | | /* Return the CRC, post-conditioned. */ |
1480 | 0 | return c ^ 0xffffffff; |
1481 | 0 | } |