/src/openssl/crypto/sha/keccak1600.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #include <openssl/e_os2.h>  | 
11  |  | #include <string.h>  | 
12  |  | #include <assert.h>  | 
13  |  |  | 
14  |  | #include "internal/nelem.h"  | 
15  |  |  | 
16  |  | size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,  | 
17  |  |                    size_t r);  | 
18  |  | void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);  | 
19  |  |  | 
20  |  | #if !defined(KECCAK1600_ASM) || !defined(SELFTEST)  | 
21  |  |  | 
22  |  | /*  | 
23  |  |  * Choose some sensible defaults  | 
24  |  |  */  | 
25  |  | #if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \  | 
26  |  |     !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)  | 
27  |  | # define KECCAK_2X      /* default to KECCAK_2X variant */  | 
28  |  | #endif  | 
29  |  |  | 
30  |  | #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \  | 
31  |  |     (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || \  | 
32  |  |     defined(__mips) || defined(__riscv) || defined(__s390__) || \  | 
33  |  |     defined(__EMSCRIPTEN__)  | 
34  |  | /*  | 
35  |  |  * These don't have "and with complement" instruction, so minimize amount  | 
36  |  |  * of "not"-s. Implemented only in the [default] KECCAK_2X variant.  | 
37  |  |  */  | 
38  |  | # define KECCAK_COMPLEMENTING_TRANSFORM  | 
39  |  | #endif  | 
40  |  |  | 
41  |  | #if defined(__x86_64__) || defined(__aarch64__) || \  | 
42  |  |     defined(__mips64) || defined(__ia64) || \  | 
43  |  |     (defined(__VMS) && !defined(__vax))  | 
44  |  | /*  | 
45  |  |  * These are available even in ILP32 flavours, but even then they are  | 
46  |  |  * capable of performing 64-bit operations as efficiently as in *P64.  | 
47  |  |  * Since it's not given that we can use sizeof(void *), just shunt it.  | 
48  |  |  */  | 
49  | 1.03G  | # define BIT_INTERLEAVE (0)  | 
50  |  | #else  | 
51  |  | # define BIT_INTERLEAVE (sizeof(void *) < 8)  | 
52  |  | #endif  | 
53  |  |  | 
54  | 0  | #define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))  | 
55  |  |  | 
56  |  | static uint64_t ROL64(uint64_t val, int offset)  | 
57  | 1.01G  | { | 
58  | 1.01G  |     if (offset == 0) { | 
59  | 0  |         return val;  | 
60  | 1.01G  |     } else if (!BIT_INTERLEAVE) { | 
61  | 1.01G  |         return (val << offset) | (val >> (64-offset));  | 
62  | 1.01G  |     } else { | 
63  | 0  |         uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;  | 
64  |  | 
  | 
65  | 0  |         if (offset & 1) { | 
66  | 0  |             uint32_t tmp = hi;  | 
67  |  | 
  | 
68  | 0  |             offset >>= 1;  | 
69  | 0  |             hi = ROL32(lo, offset);  | 
70  | 0  |             lo = ROL32(tmp, offset + 1);  | 
71  | 0  |         } else { | 
72  | 0  |             offset >>= 1;  | 
73  | 0  |             lo = ROL32(lo, offset);  | 
74  | 0  |             hi = ROL32(hi, offset);  | 
75  | 0  |         }  | 
76  |  | 
  | 
77  | 0  |         return ((uint64_t)hi << 32) | lo;  | 
78  | 0  |     }  | 
79  | 1.01G  | }  | 
80  |  |  | 
81  |  | static const unsigned char rhotates[5][5] = { | 
82  |  |     {  0,  1, 62, 28, 27 }, | 
83  |  |     { 36, 44,  6, 55, 20 }, | 
84  |  |     {  3, 10, 43, 25, 39 }, | 
85  |  |     { 41, 45, 15, 21,  8 }, | 
86  |  |     { 18,  2, 61, 56, 14 } | 
87  |  | };  | 
88  |  |  | 
89  |  | static const uint64_t iotas[] = { | 
90  |  |     BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,  | 
91  |  |     BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,  | 
92  |  |     BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,  | 
93  |  |     BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,  | 
94  |  |     BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,  | 
95  |  |     BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,  | 
96  |  |     BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,  | 
97  |  |     BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,  | 
98  |  |     BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,  | 
99  |  |     BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,  | 
100  |  |     BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,  | 
101  |  |     BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,  | 
102  |  |     BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,  | 
103  |  |     BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,  | 
104  |  |     BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,  | 
105  |  |     BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,  | 
106  |  |     BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,  | 
107  |  |     BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,  | 
108  |  |     BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,  | 
109  |  |     BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,  | 
110  |  |     BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,  | 
111  |  |     BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,  | 
112  |  |     BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,  | 
113  |  |     BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL  | 
114  |  | };  | 
115  |  |  | 
116  |  | #if defined(KECCAK_REF)  | 
117  |  | /*  | 
118  |  |  * This is straightforward or "maximum clarity" implementation aiming  | 
119  |  |  * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:  | 
120  |  |  * Permutation-Based Hash and Extendible-Output Functions" as much as  | 
121  |  |  * possible. With one caveat. Because of the way C stores matrices,  | 
122  |  |  * references to A[x,y] in the specification are presented as A[y][x].  | 
123  |  |  * Implementation unrolls inner x-loops so that modulo 5 operations are  | 
124  |  |  * explicitly pre-computed.  | 
125  |  |  */  | 
126  |  | static void Theta(uint64_t A[5][5])  | 
127  |  | { | 
128  |  |     uint64_t C[5], D[5];  | 
129  |  |     size_t y;  | 
130  |  |  | 
131  |  |     C[0] = A[0][0];  | 
132  |  |     C[1] = A[0][1];  | 
133  |  |     C[2] = A[0][2];  | 
134  |  |     C[3] = A[0][3];  | 
135  |  |     C[4] = A[0][4];  | 
136  |  |  | 
137  |  |     for (y = 1; y < 5; y++) { | 
138  |  |         C[0] ^= A[y][0];  | 
139  |  |         C[1] ^= A[y][1];  | 
140  |  |         C[2] ^= A[y][2];  | 
141  |  |         C[3] ^= A[y][3];  | 
142  |  |         C[4] ^= A[y][4];  | 
143  |  |     }  | 
144  |  |  | 
145  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
146  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
147  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
148  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
149  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
150  |  |  | 
151  |  |     for (y = 0; y < 5; y++) { | 
152  |  |         A[y][0] ^= D[0];  | 
153  |  |         A[y][1] ^= D[1];  | 
154  |  |         A[y][2] ^= D[2];  | 
155  |  |         A[y][3] ^= D[3];  | 
156  |  |         A[y][4] ^= D[4];  | 
157  |  |     }  | 
158  |  | }  | 
159  |  |  | 
160  |  | static void Rho(uint64_t A[5][5])  | 
161  |  | { | 
162  |  |     size_t y;  | 
163  |  |  | 
164  |  |     for (y = 0; y < 5; y++) { | 
165  |  |         A[y][0] = ROL64(A[y][0], rhotates[y][0]);  | 
166  |  |         A[y][1] = ROL64(A[y][1], rhotates[y][1]);  | 
167  |  |         A[y][2] = ROL64(A[y][2], rhotates[y][2]);  | 
168  |  |         A[y][3] = ROL64(A[y][3], rhotates[y][3]);  | 
169  |  |         A[y][4] = ROL64(A[y][4], rhotates[y][4]);  | 
170  |  |     }  | 
171  |  | }  | 
172  |  |  | 
173  |  | static void Pi(uint64_t A[5][5])  | 
174  |  | { | 
175  |  |     uint64_t T[5][5];  | 
176  |  |  | 
177  |  |     /*  | 
178  |  |      * T = A  | 
179  |  |      * A[y][x] = T[x][(3*y+x)%5]  | 
180  |  |      */  | 
181  |  |     memcpy(T, A, sizeof(T));  | 
182  |  |  | 
183  |  |     A[0][0] = T[0][0];  | 
184  |  |     A[0][1] = T[1][1];  | 
185  |  |     A[0][2] = T[2][2];  | 
186  |  |     A[0][3] = T[3][3];  | 
187  |  |     A[0][4] = T[4][4];  | 
188  |  |  | 
189  |  |     A[1][0] = T[0][3];  | 
190  |  |     A[1][1] = T[1][4];  | 
191  |  |     A[1][2] = T[2][0];  | 
192  |  |     A[1][3] = T[3][1];  | 
193  |  |     A[1][4] = T[4][2];  | 
194  |  |  | 
195  |  |     A[2][0] = T[0][1];  | 
196  |  |     A[2][1] = T[1][2];  | 
197  |  |     A[2][2] = T[2][3];  | 
198  |  |     A[2][3] = T[3][4];  | 
199  |  |     A[2][4] = T[4][0];  | 
200  |  |  | 
201  |  |     A[3][0] = T[0][4];  | 
202  |  |     A[3][1] = T[1][0];  | 
203  |  |     A[3][2] = T[2][1];  | 
204  |  |     A[3][3] = T[3][2];  | 
205  |  |     A[3][4] = T[4][3];  | 
206  |  |  | 
207  |  |     A[4][0] = T[0][2];  | 
208  |  |     A[4][1] = T[1][3];  | 
209  |  |     A[4][2] = T[2][4];  | 
210  |  |     A[4][3] = T[3][0];  | 
211  |  |     A[4][4] = T[4][1];  | 
212  |  | }  | 
213  |  |  | 
214  |  | static void Chi(uint64_t A[5][5])  | 
215  |  | { | 
216  |  |     uint64_t C[5];  | 
217  |  |     size_t y;  | 
218  |  |  | 
219  |  |     for (y = 0; y < 5; y++) { | 
220  |  |         C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);  | 
221  |  |         C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);  | 
222  |  |         C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);  | 
223  |  |         C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);  | 
224  |  |         C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);  | 
225  |  |  | 
226  |  |         A[y][0] = C[0];  | 
227  |  |         A[y][1] = C[1];  | 
228  |  |         A[y][2] = C[2];  | 
229  |  |         A[y][3] = C[3];  | 
230  |  |         A[y][4] = C[4];  | 
231  |  |     }  | 
232  |  | }  | 
233  |  |  | 
234  |  | static void Iota(uint64_t A[5][5], size_t i)  | 
235  |  | { | 
236  |  |     assert(i < OSSL_NELEM(iotas));  | 
237  |  |     A[0][0] ^= iotas[i];  | 
238  |  | }  | 
239  |  |  | 
240  |  | static void KeccakF1600(uint64_t A[5][5])  | 
241  |  | { | 
242  |  |     size_t i;  | 
243  |  |  | 
244  |  |     for (i = 0; i < 24; i++) { | 
245  |  |         Theta(A);  | 
246  |  |         Rho(A);  | 
247  |  |         Pi(A);  | 
248  |  |         Chi(A);  | 
249  |  |         Iota(A, i);  | 
250  |  |     }  | 
251  |  | }  | 
252  |  |  | 
253  |  | #elif defined(KECCAK_1X)  | 
254  |  | /*  | 
255  |  |  * This implementation is optimization of above code featuring unroll  | 
256  |  |  * of even y-loops, their fusion and code motion. It also minimizes  | 
257  |  |  * temporary storage. Compiler would normally do all these things for  | 
258  |  |  * you, purpose of manual optimization is to provide "unobscured"  | 
259  |  |  * reference for assembly implementation [in case this approach is  | 
260  |  |  * chosen for implementation on some platform]. In the nutshell it's  | 
261  |  |  * equivalent of "plane-per-plane processing" approach discussed in  | 
262  |  |  * section 2.4 of "Keccak implementation overview".  | 
263  |  |  */  | 
264  |  | static void Round(uint64_t A[5][5], size_t i)  | 
265  |  | { | 
266  |  |     uint64_t C[5], E[2];        /* registers */  | 
267  |  |     uint64_t D[5], T[2][5];     /* memory    */  | 
268  |  |  | 
269  |  |     assert(i < OSSL_NELEM(iotas));  | 
270  |  |  | 
271  |  |     C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];  | 
272  |  |     C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];  | 
273  |  |     C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];  | 
274  |  |     C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];  | 
275  |  |     C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];  | 
276  |  |  | 
277  |  | #if defined(__arm__)  | 
278  |  |     D[1] = E[0] = ROL64(C[2], 1) ^ C[0];  | 
279  |  |     D[4] = E[1] = ROL64(C[0], 1) ^ C[3];  | 
280  |  |     D[0] = C[0] = ROL64(C[1], 1) ^ C[4];  | 
281  |  |     D[2] = C[1] = ROL64(C[3], 1) ^ C[1];  | 
282  |  |     D[3] = C[2] = ROL64(C[4], 1) ^ C[2];  | 
283  |  |  | 
284  |  |     T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */  | 
285  |  |     T[0][1] = A[0][1] ^ E[0]; /* D[1] */  | 
286  |  |     T[0][2] = A[0][2] ^ C[1]; /* D[2] */  | 
287  |  |     T[0][3] = A[0][3] ^ C[2]; /* D[3] */  | 
288  |  |     T[0][4] = A[0][4] ^ E[1]; /* D[4] */  | 
289  |  |  | 
290  |  |     C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */  | 
291  |  |     C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */  | 
292  |  |     C[0] =       A[0][0] ^ C[0]; /* rotate by 0 */  /* D[0] */  | 
293  |  |     C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);   /* D[2] */  | 
294  |  |     C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);   /* D[1] */  | 
295  |  | #else  | 
296  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
297  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
298  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
299  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
300  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
301  |  |  | 
302  |  |     T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */  | 
303  |  |     T[0][1] = A[0][1] ^ D[1];  | 
304  |  |     T[0][2] = A[0][2] ^ D[2];  | 
305  |  |     T[0][3] = A[0][3] ^ D[3];  | 
306  |  |     T[0][4] = A[0][4] ^ D[4];  | 
307  |  |  | 
308  |  |     C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
309  |  |     C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);  | 
310  |  |     C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);  | 
311  |  |     C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);  | 
312  |  |     C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);  | 
313  |  | #endif  | 
314  |  |     A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];  | 
315  |  |     A[0][1] = C[1] ^ (~C[2] & C[3]);  | 
316  |  |     A[0][2] = C[2] ^ (~C[3] & C[4]);  | 
317  |  |     A[0][3] = C[3] ^ (~C[4] & C[0]);  | 
318  |  |     A[0][4] = C[4] ^ (~C[0] & C[1]);  | 
319  |  |  | 
320  |  |     T[1][0] = A[1][0] ^ (C[3] = D[0]);  | 
321  |  |     T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */  | 
322  |  |     T[1][2] = A[1][2] ^ (E[0] = D[2]);  | 
323  |  |     T[1][3] = A[1][3] ^ (E[1] = D[3]);  | 
324  |  |     T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */  | 
325  |  |  | 
326  |  |     C[0] = ROL64(T[0][3],        rhotates[0][3]);  | 
327  |  |     C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]);   /* D[4] */  | 
328  |  |     C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]);   /* D[0] */  | 
329  |  |     C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]);   /* D[1] */  | 
330  |  |     C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]);   /* D[2] */  | 
331  |  |  | 
332  |  |     A[1][0] = C[0] ^ (~C[1] & C[2]);  | 
333  |  |     A[1][1] = C[1] ^ (~C[2] & C[3]);  | 
334  |  |     A[1][2] = C[2] ^ (~C[3] & C[4]);  | 
335  |  |     A[1][3] = C[3] ^ (~C[4] & C[0]);  | 
336  |  |     A[1][4] = C[4] ^ (~C[0] & C[1]);  | 
337  |  |  | 
338  |  |     C[0] = ROL64(T[0][1],        rhotates[0][1]);  | 
339  |  |     C[1] = ROL64(T[1][2],        rhotates[1][2]);  | 
340  |  |     C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
341  |  |     C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);  | 
342  |  |     C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);  | 
343  |  |  | 
344  |  |     A[2][0] = C[0] ^ (~C[1] & C[2]);  | 
345  |  |     A[2][1] = C[1] ^ (~C[2] & C[3]);  | 
346  |  |     A[2][2] = C[2] ^ (~C[3] & C[4]);  | 
347  |  |     A[2][3] = C[3] ^ (~C[4] & C[0]);  | 
348  |  |     A[2][4] = C[4] ^ (~C[0] & C[1]);  | 
349  |  |  | 
350  |  |     C[0] = ROL64(T[0][4],        rhotates[0][4]);  | 
351  |  |     C[1] = ROL64(T[1][0],        rhotates[1][0]);  | 
352  |  |     C[2] = ROL64(T[1][1],        rhotates[2][1]); /* originally A[2][1] */  | 
353  |  |     C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
354  |  |     C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);  | 
355  |  |  | 
356  |  |     A[3][0] = C[0] ^ (~C[1] & C[2]);  | 
357  |  |     A[3][1] = C[1] ^ (~C[2] & C[3]);  | 
358  |  |     A[3][2] = C[2] ^ (~C[3] & C[4]);  | 
359  |  |     A[3][3] = C[3] ^ (~C[4] & C[0]);  | 
360  |  |     A[3][4] = C[4] ^ (~C[0] & C[1]);  | 
361  |  |  | 
362  |  |     C[0] = ROL64(T[0][2],        rhotates[0][2]);  | 
363  |  |     C[1] = ROL64(T[1][3],        rhotates[1][3]);  | 
364  |  |     C[2] = ROL64(T[1][4],        rhotates[2][4]); /* originally A[2][4] */  | 
365  |  |     C[3] = ROL64(T[0][0],        rhotates[3][0]); /* originally A[3][0] */  | 
366  |  |     C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
367  |  |  | 
368  |  |     A[4][0] = C[0] ^ (~C[1] & C[2]);  | 
369  |  |     A[4][1] = C[1] ^ (~C[2] & C[3]);  | 
370  |  |     A[4][2] = C[2] ^ (~C[3] & C[4]);  | 
371  |  |     A[4][3] = C[3] ^ (~C[4] & C[0]);  | 
372  |  |     A[4][4] = C[4] ^ (~C[0] & C[1]);  | 
373  |  | }  | 
374  |  |  | 
375  |  | static void KeccakF1600(uint64_t A[5][5])  | 
376  |  | { | 
377  |  |     size_t i;  | 
378  |  |  | 
379  |  |     for (i = 0; i < 24; i++) { | 
380  |  |         Round(A, i);  | 
381  |  |     }  | 
382  |  | }  | 
383  |  |  | 
384  |  | #elif defined(KECCAK_1X_ALT)  | 
385  |  | /*  | 
386  |  |  * This is variant of above KECCAK_1X that reduces requirement for  | 
387  |  |  * temporary storage even further, but at cost of more updates to A[][].  | 
388  |  |  * It's less suitable if A[][] is memory bound, but better if it's  | 
389  |  |  * register bound.  | 
390  |  |  */  | 
391  |  |  | 
392  |  | static void Round(uint64_t A[5][5], size_t i)  | 
393  |  | { | 
394  |  |     uint64_t C[5], D[5];  | 
395  |  |  | 
396  |  |     assert(i < OSSL_NELEM(iotas));  | 
397  |  |  | 
398  |  |     C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];  | 
399  |  |     C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];  | 
400  |  |     C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];  | 
401  |  |     C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];  | 
402  |  |     C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];  | 
403  |  |  | 
404  |  |     D[1] = C[0] ^  ROL64(C[2], 1);  | 
405  |  |     D[2] = C[1] ^  ROL64(C[3], 1);  | 
406  |  |     D[3] = C[2] ^= ROL64(C[4], 1);  | 
407  |  |     D[4] = C[3] ^= ROL64(C[0], 1);  | 
408  |  |     D[0] = C[4] ^= ROL64(C[1], 1);  | 
409  |  |  | 
410  |  |     A[0][1] ^= D[1];  | 
411  |  |     A[1][1] ^= D[1];  | 
412  |  |     A[2][1] ^= D[1];  | 
413  |  |     A[3][1] ^= D[1];  | 
414  |  |     A[4][1] ^= D[1];  | 
415  |  |  | 
416  |  |     A[0][2] ^= D[2];  | 
417  |  |     A[1][2] ^= D[2];  | 
418  |  |     A[2][2] ^= D[2];  | 
419  |  |     A[3][2] ^= D[2];  | 
420  |  |     A[4][2] ^= D[2];  | 
421  |  |  | 
422  |  |     A[0][3] ^= C[2];  | 
423  |  |     A[1][3] ^= C[2];  | 
424  |  |     A[2][3] ^= C[2];  | 
425  |  |     A[3][3] ^= C[2];  | 
426  |  |     A[4][3] ^= C[2];  | 
427  |  |  | 
428  |  |     A[0][4] ^= C[3];  | 
429  |  |     A[1][4] ^= C[3];  | 
430  |  |     A[2][4] ^= C[3];  | 
431  |  |     A[3][4] ^= C[3];  | 
432  |  |     A[4][4] ^= C[3];  | 
433  |  |  | 
434  |  |     A[0][0] ^= C[4];  | 
435  |  |     A[1][0] ^= C[4];  | 
436  |  |     A[2][0] ^= C[4];  | 
437  |  |     A[3][0] ^= C[4];  | 
438  |  |     A[4][0] ^= C[4];  | 
439  |  |  | 
440  |  |     C[1] = A[0][1];  | 
441  |  |     C[2] = A[0][2];  | 
442  |  |     C[3] = A[0][3];  | 
443  |  |     C[4] = A[0][4];  | 
444  |  |  | 
445  |  |     A[0][1] = ROL64(A[1][1], rhotates[1][1]);  | 
446  |  |     A[0][2] = ROL64(A[2][2], rhotates[2][2]);  | 
447  |  |     A[0][3] = ROL64(A[3][3], rhotates[3][3]);  | 
448  |  |     A[0][4] = ROL64(A[4][4], rhotates[4][4]);  | 
449  |  |  | 
450  |  |     A[1][1] = ROL64(A[1][4], rhotates[1][4]);  | 
451  |  |     A[2][2] = ROL64(A[2][3], rhotates[2][3]);  | 
452  |  |     A[3][3] = ROL64(A[3][2], rhotates[3][2]);  | 
453  |  |     A[4][4] = ROL64(A[4][1], rhotates[4][1]);  | 
454  |  |  | 
455  |  |     A[1][4] = ROL64(A[4][2], rhotates[4][2]);  | 
456  |  |     A[2][3] = ROL64(A[3][4], rhotates[3][4]);  | 
457  |  |     A[3][2] = ROL64(A[2][1], rhotates[2][1]);  | 
458  |  |     A[4][1] = ROL64(A[1][3], rhotates[1][3]);  | 
459  |  |  | 
460  |  |     A[4][2] = ROL64(A[2][4], rhotates[2][4]);  | 
461  |  |     A[3][4] = ROL64(A[4][3], rhotates[4][3]);  | 
462  |  |     A[2][1] = ROL64(A[1][2], rhotates[1][2]);  | 
463  |  |     A[1][3] = ROL64(A[3][1], rhotates[3][1]);  | 
464  |  |  | 
465  |  |     A[2][4] = ROL64(A[4][0], rhotates[4][0]);  | 
466  |  |     A[4][3] = ROL64(A[3][0], rhotates[3][0]);  | 
467  |  |     A[1][2] = ROL64(A[2][0], rhotates[2][0]);  | 
468  |  |     A[3][1] = ROL64(A[1][0], rhotates[1][0]);  | 
469  |  |  | 
470  |  |     A[1][0] = ROL64(C[3],    rhotates[0][3]);  | 
471  |  |     A[2][0] = ROL64(C[1],    rhotates[0][1]);  | 
472  |  |     A[3][0] = ROL64(C[4],    rhotates[0][4]);  | 
473  |  |     A[4][0] = ROL64(C[2],    rhotates[0][2]);  | 
474  |  |  | 
475  |  |     C[0] = A[0][0];  | 
476  |  |     C[1] = A[1][0];  | 
477  |  |     D[0] = A[0][1];  | 
478  |  |     D[1] = A[1][1];  | 
479  |  |  | 
480  |  |     A[0][0] ^= (~A[0][1] & A[0][2]);  | 
481  |  |     A[1][0] ^= (~A[1][1] & A[1][2]);  | 
482  |  |     A[0][1] ^= (~A[0][2] & A[0][3]);  | 
483  |  |     A[1][1] ^= (~A[1][2] & A[1][3]);  | 
484  |  |     A[0][2] ^= (~A[0][3] & A[0][4]);  | 
485  |  |     A[1][2] ^= (~A[1][3] & A[1][4]);  | 
486  |  |     A[0][3] ^= (~A[0][4] & C[0]);  | 
487  |  |     A[1][3] ^= (~A[1][4] & C[1]);  | 
488  |  |     A[0][4] ^= (~C[0]    & D[0]);  | 
489  |  |     A[1][4] ^= (~C[1]    & D[1]);  | 
490  |  |  | 
491  |  |     C[2] = A[2][0];  | 
492  |  |     C[3] = A[3][0];  | 
493  |  |     D[2] = A[2][1];  | 
494  |  |     D[3] = A[3][1];  | 
495  |  |  | 
496  |  |     A[2][0] ^= (~A[2][1] & A[2][2]);  | 
497  |  |     A[3][0] ^= (~A[3][1] & A[3][2]);  | 
498  |  |     A[2][1] ^= (~A[2][2] & A[2][3]);  | 
499  |  |     A[3][1] ^= (~A[3][2] & A[3][3]);  | 
500  |  |     A[2][2] ^= (~A[2][3] & A[2][4]);  | 
501  |  |     A[3][2] ^= (~A[3][3] & A[3][4]);  | 
502  |  |     A[2][3] ^= (~A[2][4] & C[2]);  | 
503  |  |     A[3][3] ^= (~A[3][4] & C[3]);  | 
504  |  |     A[2][4] ^= (~C[2]    & D[2]);  | 
505  |  |     A[3][4] ^= (~C[3]    & D[3]);  | 
506  |  |  | 
507  |  |     C[4] = A[4][0];  | 
508  |  |     D[4] = A[4][1];  | 
509  |  |  | 
510  |  |     A[4][0] ^= (~A[4][1] & A[4][2]);  | 
511  |  |     A[4][1] ^= (~A[4][2] & A[4][3]);  | 
512  |  |     A[4][2] ^= (~A[4][3] & A[4][4]);  | 
513  |  |     A[4][3] ^= (~A[4][4] & C[4]);  | 
514  |  |     A[4][4] ^= (~C[4]    & D[4]);  | 
515  |  |     A[0][0] ^= iotas[i];  | 
516  |  | }  | 
517  |  |  | 
518  |  | static void KeccakF1600(uint64_t A[5][5])  | 
519  |  | { | 
520  |  |     size_t i;  | 
521  |  |  | 
522  |  |     for (i = 0; i < 24; i++) { | 
523  |  |         Round(A, i);  | 
524  |  |     }  | 
525  |  | }  | 
526  |  |  | 
527  |  | #elif defined(KECCAK_2X)  | 
528  |  | /*  | 
529  |  |  * This implementation is variant of KECCAK_1X above with outer-most  | 
530  |  |  * round loop unrolled twice. This allows to take temporary storage  | 
531  |  |  * out of round procedure and simplify references to it by alternating  | 
532  |  |  * it with actual data (see round loop below). Originally it was meant  | 
533  |  |  * rather as reference for an assembly implementation, but it seems to  | 
534  |  |  * play best with compilers [as well as provide best instruction per  | 
535  |  |  * processed byte ratio at minimal round unroll factor]...  | 
536  |  |  */  | 
537  |  | static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)  | 
538  | 35.0M  | { | 
539  | 35.0M  |     uint64_t C[5], D[5];  | 
540  |  |  | 
541  | 35.0M  |     assert(i < OSSL_NELEM(iotas));  | 
542  |  |  | 
543  | 35.0M  |     C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];  | 
544  | 35.0M  |     C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];  | 
545  | 35.0M  |     C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];  | 
546  | 35.0M  |     C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];  | 
547  | 35.0M  |     C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];  | 
548  |  |  | 
549  | 35.0M  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
550  | 35.0M  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
551  | 35.0M  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
552  | 35.0M  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
553  | 35.0M  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
554  |  |  | 
555  | 35.0M  |     C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
556  | 35.0M  |     C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);  | 
557  | 35.0M  |     C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);  | 
558  | 35.0M  |     C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);  | 
559  | 35.0M  |     C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);  | 
560  |  |  | 
561  | 35.0M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
562  | 35.0M  |     R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];  | 
563  | 35.0M  |     R[0][1] = C[1] ^ (~C[2] | C[3]);  | 
564  | 35.0M  |     R[0][2] = C[2] ^ ( C[3] & C[4]);  | 
565  | 35.0M  |     R[0][3] = C[3] ^ ( C[4] | C[0]);  | 
566  | 35.0M  |     R[0][4] = C[4] ^ ( C[0] & C[1]);  | 
567  |  | #else  | 
568  |  |     R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];  | 
569  |  |     R[0][1] = C[1] ^ (~C[2] & C[3]);  | 
570  |  |     R[0][2] = C[2] ^ (~C[3] & C[4]);  | 
571  |  |     R[0][3] = C[3] ^ (~C[4] & C[0]);  | 
572  |  |     R[0][4] = C[4] ^ (~C[0] & C[1]);  | 
573  |  | #endif  | 
574  |  |  | 
575  | 35.0M  |     C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);  | 
576  | 35.0M  |     C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);  | 
577  | 35.0M  |     C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);  | 
578  | 35.0M  |     C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);  | 
579  | 35.0M  |     C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);  | 
580  |  |  | 
581  | 35.0M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
582  | 35.0M  |     R[1][0] = C[0] ^ (C[1] |  C[2]);  | 
583  | 35.0M  |     R[1][1] = C[1] ^ (C[2] &  C[3]);  | 
584  | 35.0M  |     R[1][2] = C[2] ^ (C[3] | ~C[4]);  | 
585  | 35.0M  |     R[1][3] = C[3] ^ (C[4] |  C[0]);  | 
586  | 35.0M  |     R[1][4] = C[4] ^ (C[0] &  C[1]);  | 
587  |  | #else  | 
588  |  |     R[1][0] = C[0] ^ (~C[1] & C[2]);  | 
589  |  |     R[1][1] = C[1] ^ (~C[2] & C[3]);  | 
590  |  |     R[1][2] = C[2] ^ (~C[3] & C[4]);  | 
591  |  |     R[1][3] = C[3] ^ (~C[4] & C[0]);  | 
592  |  |     R[1][4] = C[4] ^ (~C[0] & C[1]);  | 
593  |  | #endif  | 
594  |  |  | 
595  | 35.0M  |     C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);  | 
596  | 35.0M  |     C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);  | 
597  | 35.0M  |     C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
598  | 35.0M  |     C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);  | 
599  | 35.0M  |     C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);  | 
600  |  |  | 
601  | 35.0M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
602  | 35.0M  |     R[2][0] =  C[0] ^ ( C[1] | C[2]);  | 
603  | 35.0M  |     R[2][1] =  C[1] ^ ( C[2] & C[3]);  | 
604  | 35.0M  |     R[2][2] =  C[2] ^ (~C[3] & C[4]);  | 
605  | 35.0M  |     R[2][3] = ~C[3] ^ ( C[4] | C[0]);  | 
606  | 35.0M  |     R[2][4] =  C[4] ^ ( C[0] & C[1]);  | 
607  |  | #else  | 
608  |  |     R[2][0] = C[0] ^ (~C[1] & C[2]);  | 
609  |  |     R[2][1] = C[1] ^ (~C[2] & C[3]);  | 
610  |  |     R[2][2] = C[2] ^ (~C[3] & C[4]);  | 
611  |  |     R[2][3] = C[3] ^ (~C[4] & C[0]);  | 
612  |  |     R[2][4] = C[4] ^ (~C[0] & C[1]);  | 
613  |  | #endif  | 
614  |  |  | 
615  | 35.0M  |     C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);  | 
616  | 35.0M  |     C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);  | 
617  | 35.0M  |     C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);  | 
618  | 35.0M  |     C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
619  | 35.0M  |     C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);  | 
620  |  |  | 
621  | 35.0M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
622  | 35.0M  |     R[3][0] =  C[0] ^ ( C[1] & C[2]);  | 
623  | 35.0M  |     R[3][1] =  C[1] ^ ( C[2] | C[3]);  | 
624  | 35.0M  |     R[3][2] =  C[2] ^ (~C[3] | C[4]);  | 
625  | 35.0M  |     R[3][3] = ~C[3] ^ ( C[4] & C[0]);  | 
626  | 35.0M  |     R[3][4] =  C[4] ^ ( C[0] | C[1]);  | 
627  |  | #else  | 
628  |  |     R[3][0] = C[0] ^ (~C[1] & C[2]);  | 
629  |  |     R[3][1] = C[1] ^ (~C[2] & C[3]);  | 
630  |  |     R[3][2] = C[2] ^ (~C[3] & C[4]);  | 
631  |  |     R[3][3] = C[3] ^ (~C[4] & C[0]);  | 
632  |  |     R[3][4] = C[4] ^ (~C[0] & C[1]);  | 
633  |  | #endif  | 
634  |  |  | 
635  | 35.0M  |     C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);  | 
636  | 35.0M  |     C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);  | 
637  | 35.0M  |     C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);  | 
638  | 35.0M  |     C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);  | 
639  | 35.0M  |     C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
640  |  |  | 
641  | 35.0M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
642  | 35.0M  |     R[4][0] =  C[0] ^ (~C[1] & C[2]);  | 
643  | 35.0M  |     R[4][1] = ~C[1] ^ ( C[2] | C[3]);  | 
644  | 35.0M  |     R[4][2] =  C[2] ^ ( C[3] & C[4]);  | 
645  | 35.0M  |     R[4][3] =  C[3] ^ ( C[4] | C[0]);  | 
646  | 35.0M  |     R[4][4] =  C[4] ^ ( C[0] & C[1]);  | 
647  |  | #else  | 
648  |  |     R[4][0] = C[0] ^ (~C[1] & C[2]);  | 
649  |  |     R[4][1] = C[1] ^ (~C[2] & C[3]);  | 
650  |  |     R[4][2] = C[2] ^ (~C[3] & C[4]);  | 
651  |  |     R[4][3] = C[3] ^ (~C[4] & C[0]);  | 
652  |  |     R[4][4] = C[4] ^ (~C[0] & C[1]);  | 
653  |  | #endif  | 
654  | 35.0M  | }  | 
655  |  |  | 
656  |  | static void KeccakF1600(uint64_t A[5][5])  | 
657  | 1.45M  | { | 
658  | 1.45M  |     uint64_t T[5][5];  | 
659  | 1.45M  |     size_t i;  | 
660  |  |  | 
661  | 1.45M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
662  | 1.45M  |     A[0][1] = ~A[0][1];  | 
663  | 1.45M  |     A[0][2] = ~A[0][2];  | 
664  | 1.45M  |     A[1][3] = ~A[1][3];  | 
665  | 1.45M  |     A[2][2] = ~A[2][2];  | 
666  | 1.45M  |     A[3][2] = ~A[3][2];  | 
667  | 1.45M  |     A[4][0] = ~A[4][0];  | 
668  | 1.45M  | #endif  | 
669  |  |  | 
670  | 18.9M  |     for (i = 0; i < 24; i += 2) { | 
671  | 17.5M  |         Round(T, A, i);  | 
672  | 17.5M  |         Round(A, T, i + 1);  | 
673  | 17.5M  |     }  | 
674  |  |  | 
675  | 1.45M  | #ifdef KECCAK_COMPLEMENTING_TRANSFORM  | 
676  | 1.45M  |     A[0][1] = ~A[0][1];  | 
677  | 1.45M  |     A[0][2] = ~A[0][2];  | 
678  | 1.45M  |     A[1][3] = ~A[1][3];  | 
679  | 1.45M  |     A[2][2] = ~A[2][2];  | 
680  | 1.45M  |     A[3][2] = ~A[3][2];  | 
681  | 1.45M  |     A[4][0] = ~A[4][0];  | 
682  | 1.45M  | #endif  | 
683  | 1.45M  | }  | 
684  |  |  | 
685  |  | #else   /* define KECCAK_INPLACE to compile this code path */  | 
686  |  | /*  | 
687  |  |  * This implementation is KECCAK_1X from above combined 4 times with  | 
688  |  |  * a twist that allows to omit temporary storage and perform in-place  | 
689  |  |  * processing. It's discussed in section 2.5 of "Keccak implementation  | 
690  |  |  * overview". It's likely to be best suited for processors with large  | 
691  |  |  * register bank... On the other hand processor with large register  | 
692  |  |  * bank can as well use KECCAK_1X_ALT, it would be as fast but much  | 
693  |  |  * more compact...  | 
694  |  |  */  | 
695  |  | static void FourRounds(uint64_t A[5][5], size_t i)  | 
696  |  | { | 
697  |  |     uint64_t B[5], C[5], D[5];  | 
698  |  |  | 
699  |  |     assert(i <= OSSL_NELEM(iotas) - 4);  | 
700  |  |  | 
701  |  |     /* Round 4*n */  | 
702  |  |     C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];  | 
703  |  |     C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];  | 
704  |  |     C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];  | 
705  |  |     C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];  | 
706  |  |     C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];  | 
707  |  |  | 
708  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
709  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
710  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
711  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
712  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
713  |  |  | 
714  |  |     B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
715  |  |     B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);  | 
716  |  |     B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);  | 
717  |  |     B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);  | 
718  |  |     B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);  | 
719  |  |  | 
720  |  |     C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];  | 
721  |  |     C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);  | 
722  |  |     C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);  | 
723  |  |     C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);  | 
724  |  |     C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);  | 
725  |  |  | 
726  |  |     B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);  | 
727  |  |     B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);  | 
728  |  |     B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);  | 
729  |  |     B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);  | 
730  |  |     B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);  | 
731  |  |  | 
732  |  |     C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);  | 
733  |  |     C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);  | 
734  |  |     C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);  | 
735  |  |     C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);  | 
736  |  |     C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);  | 
737  |  |  | 
738  |  |     B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);  | 
739  |  |     B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);  | 
740  |  |     B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
741  |  |     B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);  | 
742  |  |     B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);  | 
743  |  |  | 
744  |  |     C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);  | 
745  |  |     C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);  | 
746  |  |     C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);  | 
747  |  |     C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);  | 
748  |  |     C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);  | 
749  |  |  | 
750  |  |     B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);  | 
751  |  |     B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);  | 
752  |  |     B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);  | 
753  |  |     B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
754  |  |     B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);  | 
755  |  |  | 
756  |  |     C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);  | 
757  |  |     C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);  | 
758  |  |     C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);  | 
759  |  |     C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);  | 
760  |  |     C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);  | 
761  |  |  | 
762  |  |     B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);  | 
763  |  |     B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);  | 
764  |  |     B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);  | 
765  |  |     B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);  | 
766  |  |     B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
767  |  |  | 
768  |  |     C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);  | 
769  |  |     C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);  | 
770  |  |     C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);  | 
771  |  |     C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);  | 
772  |  |     C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);  | 
773  |  |  | 
774  |  |     /* Round 4*n+1 */  | 
775  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
776  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
777  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
778  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
779  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
780  |  |  | 
781  |  |     B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
782  |  |     B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);  | 
783  |  |     B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);  | 
784  |  |     B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);  | 
785  |  |     B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);  | 
786  |  |  | 
787  |  |     C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];  | 
788  |  |     C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);  | 
789  |  |     C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);  | 
790  |  |     C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);  | 
791  |  |     C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);  | 
792  |  |  | 
793  |  |     B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);  | 
794  |  |     B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);  | 
795  |  |     B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);  | 
796  |  |     B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);  | 
797  |  |     B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);  | 
798  |  |  | 
799  |  |     C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);  | 
800  |  |     C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);  | 
801  |  |     C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);  | 
802  |  |     C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);  | 
803  |  |     C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);  | 
804  |  |  | 
805  |  |     B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);  | 
806  |  |     B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);  | 
807  |  |     B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
808  |  |     B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);  | 
809  |  |     B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);  | 
810  |  |  | 
811  |  |     C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);  | 
812  |  |     C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);  | 
813  |  |     C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);  | 
814  |  |     C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);  | 
815  |  |     C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);  | 
816  |  |  | 
817  |  |     B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);  | 
818  |  |     B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);  | 
819  |  |     B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);  | 
820  |  |     B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
821  |  |     B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);  | 
822  |  |  | 
823  |  |     C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);  | 
824  |  |     C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);  | 
825  |  |     C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);  | 
826  |  |     C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);  | 
827  |  |     C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);  | 
828  |  |  | 
829  |  |     B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);  | 
830  |  |     B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);  | 
831  |  |     B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);  | 
832  |  |     B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);  | 
833  |  |     B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
834  |  |  | 
835  |  |     C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);  | 
836  |  |     C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);  | 
837  |  |     C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);  | 
838  |  |     C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);  | 
839  |  |     C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);  | 
840  |  |  | 
841  |  |     /* Round 4*n+2 */  | 
842  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
843  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
844  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
845  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
846  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
847  |  |  | 
848  |  |     B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
849  |  |     B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);  | 
850  |  |     B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);  | 
851  |  |     B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);  | 
852  |  |     B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);  | 
853  |  |  | 
854  |  |     C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];  | 
855  |  |     C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);  | 
856  |  |     C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);  | 
857  |  |     C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);  | 
858  |  |     C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);  | 
859  |  |  | 
860  |  |     B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);  | 
861  |  |     B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);  | 
862  |  |     B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);  | 
863  |  |     B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);  | 
864  |  |     B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);  | 
865  |  |  | 
866  |  |     C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);  | 
867  |  |     C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);  | 
868  |  |     C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);  | 
869  |  |     C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);  | 
870  |  |     C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);  | 
871  |  |  | 
872  |  |     B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);  | 
873  |  |     B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);  | 
874  |  |     B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
875  |  |     B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);  | 
876  |  |     B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);  | 
877  |  |  | 
878  |  |     C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);  | 
879  |  |     C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);  | 
880  |  |     C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);  | 
881  |  |     C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);  | 
882  |  |     C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);  | 
883  |  |  | 
884  |  |     B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);  | 
885  |  |     B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);  | 
886  |  |     B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);  | 
887  |  |     B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
888  |  |     B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);  | 
889  |  |  | 
890  |  |     C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);  | 
891  |  |     C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);  | 
892  |  |     C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);  | 
893  |  |     C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);  | 
894  |  |     C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);  | 
895  |  |  | 
896  |  |     B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);  | 
897  |  |     B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);  | 
898  |  |     B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);  | 
899  |  |     B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);  | 
900  |  |     B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
901  |  |  | 
902  |  |     C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);  | 
903  |  |     C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);  | 
904  |  |     C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);  | 
905  |  |     C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);  | 
906  |  |     C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);  | 
907  |  |  | 
908  |  |     /* Round 4*n+3 */  | 
909  |  |     D[0] = ROL64(C[1], 1) ^ C[4];  | 
910  |  |     D[1] = ROL64(C[2], 1) ^ C[0];  | 
911  |  |     D[2] = ROL64(C[3], 1) ^ C[1];  | 
912  |  |     D[3] = ROL64(C[4], 1) ^ C[2];  | 
913  |  |     D[4] = ROL64(C[0], 1) ^ C[3];  | 
914  |  |  | 
915  |  |     B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */  | 
916  |  |     B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);  | 
917  |  |     B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);  | 
918  |  |     B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);  | 
919  |  |     B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);  | 
920  |  |  | 
921  |  |     /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];  | 
922  |  |     /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);  | 
923  |  |     /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);  | 
924  |  |     /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);  | 
925  |  |     /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);  | 
926  |  |  | 
927  |  |     B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);  | 
928  |  |     B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);  | 
929  |  |     B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);  | 
930  |  |     B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);  | 
931  |  |     B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);  | 
932  |  |  | 
933  |  |     /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);  | 
934  |  |     /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);  | 
935  |  |     /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);  | 
936  |  |     /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);  | 
937  |  |     /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);  | 
938  |  |  | 
939  |  |     B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);  | 
940  |  |     B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);  | 
941  |  |     B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);  | 
942  |  |     B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);  | 
943  |  |     B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);  | 
944  |  |  | 
945  |  |     /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);  | 
946  |  |     /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);  | 
947  |  |     /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);  | 
948  |  |     /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);  | 
949  |  |     /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);  | 
950  |  |  | 
951  |  |     B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);  | 
952  |  |     B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);  | 
953  |  |     B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);  | 
954  |  |     B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);  | 
955  |  |     B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);  | 
956  |  |  | 
957  |  |     /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);  | 
958  |  |     /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);  | 
959  |  |     /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);  | 
960  |  |     /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);  | 
961  |  |     /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);  | 
962  |  |  | 
963  |  |     B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);  | 
964  |  |     B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);  | 
965  |  |     B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);  | 
966  |  |     B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);  | 
967  |  |     B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);  | 
968  |  |  | 
969  |  |     /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);  | 
970  |  |     /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);  | 
971  |  |     /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);  | 
972  |  |     /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);  | 
973  |  |     /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);  | 
974  |  | }  | 
975  |  |  | 
976  |  | static void KeccakF1600(uint64_t A[5][5])  | 
977  |  | { | 
978  |  |     size_t i;  | 
979  |  |  | 
980  |  |     for (i = 0; i < 24; i += 4) { | 
981  |  |         FourRounds(A, i);  | 
982  |  |     }  | 
983  |  | }  | 
984  |  |  | 
985  |  | #endif  | 
986  |  |  | 
987  |  | static uint64_t BitInterleave(uint64_t Ai)  | 
988  | 18.5M  | { | 
989  | 18.5M  |     if (BIT_INTERLEAVE) { | 
990  | 0  |         uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;  | 
991  | 0  |         uint32_t t0, t1;  | 
992  |  | 
  | 
993  | 0  |         t0 = lo & 0x55555555;  | 
994  | 0  |         t0 |= t0 >> 1;  t0 &= 0x33333333;  | 
995  | 0  |         t0 |= t0 >> 2;  t0 &= 0x0f0f0f0f;  | 
996  | 0  |         t0 |= t0 >> 4;  t0 &= 0x00ff00ff;  | 
997  | 0  |         t0 |= t0 >> 8;  t0 &= 0x0000ffff;  | 
998  |  | 
  | 
999  | 0  |         t1 = hi & 0x55555555;  | 
1000  | 0  |         t1 |= t1 >> 1;  t1 &= 0x33333333;  | 
1001  | 0  |         t1 |= t1 >> 2;  t1 &= 0x0f0f0f0f;  | 
1002  | 0  |         t1 |= t1 >> 4;  t1 &= 0x00ff00ff;  | 
1003  | 0  |         t1 |= t1 >> 8;  t1 <<= 16;  | 
1004  |  | 
  | 
1005  | 0  |         lo &= 0xaaaaaaaa;  | 
1006  | 0  |         lo |= lo << 1;  lo &= 0xcccccccc;  | 
1007  | 0  |         lo |= lo << 2;  lo &= 0xf0f0f0f0;  | 
1008  | 0  |         lo |= lo << 4;  lo &= 0xff00ff00;  | 
1009  | 0  |         lo |= lo << 8;  lo >>= 16;  | 
1010  |  | 
  | 
1011  | 0  |         hi &= 0xaaaaaaaa;  | 
1012  | 0  |         hi |= hi << 1;  hi &= 0xcccccccc;  | 
1013  | 0  |         hi |= hi << 2;  hi &= 0xf0f0f0f0;  | 
1014  | 0  |         hi |= hi << 4;  hi &= 0xff00ff00;  | 
1015  | 0  |         hi |= hi << 8;  hi &= 0xffff0000;  | 
1016  |  | 
  | 
1017  | 0  |         Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);  | 
1018  | 0  |     }  | 
1019  |  |  | 
1020  | 18.5M  |     return Ai;  | 
1021  | 18.5M  | }  | 
1022  |  |  | 
1023  |  | static uint64_t BitDeinterleave(uint64_t Ai)  | 
1024  | 374  | { | 
1025  | 374  |     if (BIT_INTERLEAVE) { | 
1026  | 0  |         uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;  | 
1027  | 0  |         uint32_t t0, t1;  | 
1028  |  | 
  | 
1029  | 0  |         t0 = lo & 0x0000ffff;  | 
1030  | 0  |         t0 |= t0 << 8;  t0 &= 0x00ff00ff;  | 
1031  | 0  |         t0 |= t0 << 4;  t0 &= 0x0f0f0f0f;  | 
1032  | 0  |         t0 |= t0 << 2;  t0 &= 0x33333333;  | 
1033  | 0  |         t0 |= t0 << 1;  t0 &= 0x55555555;  | 
1034  |  | 
  | 
1035  | 0  |         t1 = hi << 16;  | 
1036  | 0  |         t1 |= t1 >> 8;  t1 &= 0xff00ff00;  | 
1037  | 0  |         t1 |= t1 >> 4;  t1 &= 0xf0f0f0f0;  | 
1038  | 0  |         t1 |= t1 >> 2;  t1 &= 0xcccccccc;  | 
1039  | 0  |         t1 |= t1 >> 1;  t1 &= 0xaaaaaaaa;  | 
1040  |  | 
  | 
1041  | 0  |         lo >>= 16;  | 
1042  | 0  |         lo |= lo << 8;  lo &= 0x00ff00ff;  | 
1043  | 0  |         lo |= lo << 4;  lo &= 0x0f0f0f0f;  | 
1044  | 0  |         lo |= lo << 2;  lo &= 0x33333333;  | 
1045  | 0  |         lo |= lo << 1;  lo &= 0x55555555;  | 
1046  |  | 
  | 
1047  | 0  |         hi &= 0xffff0000;  | 
1048  | 0  |         hi |= hi >> 8;  hi &= 0xff00ff00;  | 
1049  | 0  |         hi |= hi >> 4;  hi &= 0xf0f0f0f0;  | 
1050  | 0  |         hi |= hi >> 2;  hi &= 0xcccccccc;  | 
1051  | 0  |         hi |= hi >> 1;  hi &= 0xaaaaaaaa;  | 
1052  |  | 
  | 
1053  | 0  |         Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);  | 
1054  | 0  |     }  | 
1055  |  |  | 
1056  | 374  |     return Ai;  | 
1057  | 374  | }  | 
1058  |  |  | 
1059  |  | /*  | 
1060  |  |  * SHA3_absorb can be called multiple times, but at each invocation  | 
1061  |  |  * largest multiple of |r| out of |len| bytes are processed. Then  | 
1062  |  |  * remaining amount of bytes is returned. This is done to spare caller  | 
1063  |  |  * trouble of calculating the largest multiple of |r|. |r| can be viewed  | 
1064  |  |  * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,  | 
1065  |  |  * 72, but can also be (1600 - 448)/8 = 144. All this means that message  | 
1066  |  |  * padding and intermediate sub-block buffering, byte- or bitwise, is  | 
1067  |  |  * caller's responsibility.  | 
1068  |  |  */  | 
1069  |  | size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,  | 
1070  |  |                    size_t r)  | 
1071  | 276  | { | 
1072  | 276  |     uint64_t *A_flat = (uint64_t *)A;  | 
1073  | 276  |     size_t i, w = r / 8;  | 
1074  |  |  | 
1075  | 276  |     assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);  | 
1076  |  |  | 
1077  | 1.45M  |     while (len >= r) { | 
1078  | 20.0M  |         for (i = 0; i < w; i++) { | 
1079  | 18.5M  |             uint64_t Ai = (uint64_t)inp[0]       | (uint64_t)inp[1] << 8  |  | 
1080  | 18.5M  |                           (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |  | 
1081  | 18.5M  |                           (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |  | 
1082  | 18.5M  |                           (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;  | 
1083  | 18.5M  |             inp += 8;  | 
1084  |  |  | 
1085  | 18.5M  |             A_flat[i] ^= BitInterleave(Ai);  | 
1086  | 18.5M  |         }  | 
1087  | 1.45M  |         KeccakF1600(A);  | 
1088  | 1.45M  |         len -= r;  | 
1089  | 1.45M  |     }  | 
1090  |  |  | 
1091  | 276  |     return len;  | 
1092  | 276  | }  | 
1093  |  |  | 
1094  |  | /*  | 
1095  |  |  * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of  | 
1096  |  |  * |len| bytes.  | 
1097  |  |  * If multiple SHA3_squeeze calls are required the output length |len| must be a  | 
1098  |  |  * multiple of the blocksize, with |next| being 0 on the first call and 1 on  | 
1099  |  |  * subsequent calls. It is the callers responsibility to buffer the results.  | 
1100  |  |  * When only a single call to SHA3_squeeze is required, |len| can be any size  | 
1101  |  |  * and |next| must be 0.  | 
1102  |  |  */  | 
1103  |  | void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r,  | 
1104  |  |                   int next)  | 
1105  | 69  | { | 
1106  | 69  |     uint64_t *A_flat = (uint64_t *)A;  | 
1107  | 69  |     size_t i, w = r / 8;  | 
1108  |  |  | 
1109  | 69  |     assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);  | 
1110  |  |  | 
1111  | 113  |     while (len != 0) { | 
1112  | 69  |         if (next)  | 
1113  | 0  |             KeccakF1600(A);  | 
1114  | 69  |         next = 1;  | 
1115  | 418  |         for (i = 0; i < w && len != 0; i++) { | 
1116  | 374  |             uint64_t Ai = BitDeinterleave(A_flat[i]);  | 
1117  |  |  | 
1118  | 374  |             if (len < 8) { | 
1119  | 125  |                 for (i = 0; i < len; i++) { | 
1120  | 100  |                     *out++ = (unsigned char)Ai;  | 
1121  | 100  |                     Ai >>= 8;  | 
1122  | 100  |                 }  | 
1123  | 25  |                 return;  | 
1124  | 25  |             }  | 
1125  |  |  | 
1126  | 349  |             out[0] = (unsigned char)(Ai);  | 
1127  | 349  |             out[1] = (unsigned char)(Ai >> 8);  | 
1128  | 349  |             out[2] = (unsigned char)(Ai >> 16);  | 
1129  | 349  |             out[3] = (unsigned char)(Ai >> 24);  | 
1130  | 349  |             out[4] = (unsigned char)(Ai >> 32);  | 
1131  | 349  |             out[5] = (unsigned char)(Ai >> 40);  | 
1132  | 349  |             out[6] = (unsigned char)(Ai >> 48);  | 
1133  | 349  |             out[7] = (unsigned char)(Ai >> 56);  | 
1134  | 349  |             out += 8;  | 
1135  | 349  |             len -= 8;  | 
1136  | 349  |         }  | 
1137  | 69  |     }  | 
1138  | 69  | }  | 
1139  |  | #endif  | 
1140  |  |  | 
1141  |  | #ifdef SELFTEST  | 
1142  |  | /*  | 
1143  |  |  * Post-padding one-shot implementations would look as following:  | 
1144  |  |  *  | 
1145  |  |  * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);  | 
1146  |  |  * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);  | 
1147  |  |  * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);  | 
1148  |  |  * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);  | 
1149  |  |  * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);  | 
1150  |  |  * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);  | 
1151  |  |  */  | 
1152  |  |  | 
1153  |  | void SHA3_sponge(const unsigned char *inp, size_t len,  | 
1154  |  |                  unsigned char *out, size_t d, size_t r)  | 
1155  |  | { | 
1156  |  |     uint64_t A[5][5];  | 
1157  |  |  | 
1158  |  |     memset(A, 0, sizeof(A));  | 
1159  |  |     SHA3_absorb(A, inp, len, r);  | 
1160  |  |     SHA3_squeeze(A, out, d, r);  | 
1161  |  | }  | 
1162  |  |  | 
1163  |  | # include <stdio.h>  | 
1164  |  |  | 
1165  |  | int main(void)  | 
1166  |  | { | 
1167  |  |     /*  | 
1168  |  |      * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing  | 
1169  |  |      */  | 
1170  |  |     unsigned char test[168] = { '\xf3', '\x3' }; | 
1171  |  |     unsigned char out[512];  | 
1172  |  |     size_t i;  | 
1173  |  |     static const unsigned char result[512] = { | 
1174  |  |         0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,  | 
1175  |  |         0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,  | 
1176  |  |         0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,  | 
1177  |  |         0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,  | 
1178  |  |         0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,  | 
1179  |  |         0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,  | 
1180  |  |         0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,  | 
1181  |  |         0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,  | 
1182  |  |         0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,  | 
1183  |  |         0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,  | 
1184  |  |         0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,  | 
1185  |  |         0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,  | 
1186  |  |         0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,  | 
1187  |  |         0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,  | 
1188  |  |         0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,  | 
1189  |  |         0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,  | 
1190  |  |         0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,  | 
1191  |  |         0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,  | 
1192  |  |         0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,  | 
1193  |  |         0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,  | 
1194  |  |         0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,  | 
1195  |  |         0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,  | 
1196  |  |         0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,  | 
1197  |  |         0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,  | 
1198  |  |         0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,  | 
1199  |  |         0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,  | 
1200  |  |         0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,  | 
1201  |  |         0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,  | 
1202  |  |         0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,  | 
1203  |  |         0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,  | 
1204  |  |         0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,  | 
1205  |  |         0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,  | 
1206  |  |         0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,  | 
1207  |  |         0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,  | 
1208  |  |         0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,  | 
1209  |  |         0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,  | 
1210  |  |         0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,  | 
1211  |  |         0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,  | 
1212  |  |         0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,  | 
1213  |  |         0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,  | 
1214  |  |         0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,  | 
1215  |  |         0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,  | 
1216  |  |         0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,  | 
1217  |  |         0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,  | 
1218  |  |         0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,  | 
1219  |  |         0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,  | 
1220  |  |         0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,  | 
1221  |  |         0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,  | 
1222  |  |         0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,  | 
1223  |  |         0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,  | 
1224  |  |         0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,  | 
1225  |  |         0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,  | 
1226  |  |         0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,  | 
1227  |  |         0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,  | 
1228  |  |         0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,  | 
1229  |  |         0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,  | 
1230  |  |         0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,  | 
1231  |  |         0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,  | 
1232  |  |         0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,  | 
1233  |  |         0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,  | 
1234  |  |         0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,  | 
1235  |  |         0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,  | 
1236  |  |         0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,  | 
1237  |  |         0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9  | 
1238  |  |     };  | 
1239  |  |  | 
1240  |  |     test[167] = '\x80';  | 
1241  |  |     SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));  | 
1242  |  |  | 
1243  |  |     /*  | 
1244  |  |      * Rationale behind keeping output [formatted as below] is that  | 
1245  |  |      * one should be able to redirect it to a file, then copy-n-paste  | 
1246  |  |      * final "output val" from official example to another file, and  | 
1247  |  |      * compare the two with diff(1).  | 
1248  |  |      */  | 
1249  |  |     for (i = 0; i < sizeof(out);) { | 
1250  |  |         printf("%02X", out[i]); | 
1251  |  |         printf(++i % 16 && i != sizeof(out) ? " " : "\n");  | 
1252  |  |     }  | 
1253  |  |  | 
1254  |  |     if (memcmp(out, result, sizeof(out))) { | 
1255  |  |         fprintf(stderr, "failure\n");  | 
1256  |  |         return 1;  | 
1257  |  |     } else { | 
1258  |  |         fprintf(stderr, "success\n");  | 
1259  |  |         return 0;  | 
1260  |  |     }  | 
1261  |  | }  | 
1262  |  | #endif  |