Coverage Report

Created: 2025-12-10 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/keccak1600.c
Line
Count
Source
1
/*
2
 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/e_os2.h>
11
#include <string.h>
12
#include <assert.h>
13
14
#include "internal/nelem.h"
15
16
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
17
    size_t r);
18
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);
19
20
#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
21
22
/*
23
 * Choose some sensible defaults
24
 */
25
#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
26
#define KECCAK_2X /* default to KECCAK_2X variant */
27
#endif
28
29
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || defined(__mips) || defined(__riscv) || defined(__s390__) || defined(__EMSCRIPTEN__)
30
/*
31
 * These don't have "and with complement" instruction, so minimize amount
32
 * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
33
 */
34
#define KECCAK_COMPLEMENTING_TRANSFORM
35
#endif
36
37
#if defined(__x86_64__) || defined(__aarch64__) || defined(__mips64) || defined(__ia64) || (defined(__VMS) && !defined(__vax))
38
/*
39
 * These are available even in ILP32 flavours, but even then they are
40
 * capable of performing 64-bit operations as efficiently as in *P64.
41
 * Since it's not given that we can use sizeof(void *), just shunt it.
42
 */
43
472M
#define BIT_INTERLEAVE (0)
44
#else
45
#define BIT_INTERLEAVE (sizeof(void *) < 8)
46
#endif
47
48
0
#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
49
50
static uint64_t ROL64(uint64_t val, int offset)
51
461M
{
52
461M
    if (offset == 0) {
53
0
        return val;
54
461M
    } else if (!BIT_INTERLEAVE) {
55
461M
        return (val << offset) | (val >> (64 - offset));
56
461M
    } else {
57
0
        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
58
59
0
        if (offset & 1) {
60
0
            uint32_t tmp = hi;
61
62
0
            offset >>= 1;
63
0
            hi = ROL32(lo, offset);
64
0
            lo = ROL32(tmp, offset + 1);
65
0
        } else {
66
0
            offset >>= 1;
67
0
            lo = ROL32(lo, offset);
68
0
            hi = ROL32(hi, offset);
69
0
        }
70
71
0
        return ((uint64_t)hi << 32) | lo;
72
0
    }
73
461M
}
74
75
static const unsigned char rhotates[5][5] = {
76
    { 0, 1, 62, 28, 27 },
77
    { 36, 44, 6, 55, 20 },
78
    { 3, 10, 43, 25, 39 },
79
    { 41, 45, 15, 21, 8 },
80
    { 18, 2, 61, 56, 14 }
81
};
82
83
static const uint64_t iotas[] = {
84
    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
85
    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
86
    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
87
    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
88
    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
89
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
90
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
91
    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
92
    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
93
    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
94
    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
95
    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
96
    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
97
    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
98
    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
99
    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
100
    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
101
    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
102
    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
103
    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
104
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
105
    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
106
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
107
    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
108
};
109
110
#if defined(KECCAK_REF)
111
/*
112
 * This is straightforward or "maximum clarity" implementation aiming
113
 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
114
 * Permutation-Based Hash and Extendible-Output Functions" as much as
115
 * possible. With one caveat. Because of the way C stores matrices,
116
 * references to A[x,y] in the specification are presented as A[y][x].
117
 * Implementation unrolls inner x-loops so that modulo 5 operations are
118
 * explicitly pre-computed.
119
 */
120
static void Theta(uint64_t A[5][5])
121
{
122
    uint64_t C[5], D[5];
123
    size_t y;
124
125
    C[0] = A[0][0];
126
    C[1] = A[0][1];
127
    C[2] = A[0][2];
128
    C[3] = A[0][3];
129
    C[4] = A[0][4];
130
131
    for (y = 1; y < 5; y++) {
132
        C[0] ^= A[y][0];
133
        C[1] ^= A[y][1];
134
        C[2] ^= A[y][2];
135
        C[3] ^= A[y][3];
136
        C[4] ^= A[y][4];
137
    }
138
139
    D[0] = ROL64(C[1], 1) ^ C[4];
140
    D[1] = ROL64(C[2], 1) ^ C[0];
141
    D[2] = ROL64(C[3], 1) ^ C[1];
142
    D[3] = ROL64(C[4], 1) ^ C[2];
143
    D[4] = ROL64(C[0], 1) ^ C[3];
144
145
    for (y = 0; y < 5; y++) {
146
        A[y][0] ^= D[0];
147
        A[y][1] ^= D[1];
148
        A[y][2] ^= D[2];
149
        A[y][3] ^= D[3];
150
        A[y][4] ^= D[4];
151
    }
152
}
153
154
static void Rho(uint64_t A[5][5])
155
{
156
    size_t y;
157
158
    for (y = 0; y < 5; y++) {
159
        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
160
        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
161
        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
162
        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
163
        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
164
    }
165
}
166
167
static void Pi(uint64_t A[5][5])
168
{
169
    uint64_t T[5][5];
170
171
    /*
172
     * T = A
173
     * A[y][x] = T[x][(3*y+x)%5]
174
     */
175
    memcpy(T, A, sizeof(T));
176
177
    A[0][0] = T[0][0];
178
    A[0][1] = T[1][1];
179
    A[0][2] = T[2][2];
180
    A[0][3] = T[3][3];
181
    A[0][4] = T[4][4];
182
183
    A[1][0] = T[0][3];
184
    A[1][1] = T[1][4];
185
    A[1][2] = T[2][0];
186
    A[1][3] = T[3][1];
187
    A[1][4] = T[4][2];
188
189
    A[2][0] = T[0][1];
190
    A[2][1] = T[1][2];
191
    A[2][2] = T[2][3];
192
    A[2][3] = T[3][4];
193
    A[2][4] = T[4][0];
194
195
    A[3][0] = T[0][4];
196
    A[3][1] = T[1][0];
197
    A[3][2] = T[2][1];
198
    A[3][3] = T[3][2];
199
    A[3][4] = T[4][3];
200
201
    A[4][0] = T[0][2];
202
    A[4][1] = T[1][3];
203
    A[4][2] = T[2][4];
204
    A[4][3] = T[3][0];
205
    A[4][4] = T[4][1];
206
}
207
208
static void Chi(uint64_t A[5][5])
209
{
210
    uint64_t C[5];
211
    size_t y;
212
213
    for (y = 0; y < 5; y++) {
214
        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
215
        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
216
        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
217
        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
218
        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
219
220
        A[y][0] = C[0];
221
        A[y][1] = C[1];
222
        A[y][2] = C[2];
223
        A[y][3] = C[3];
224
        A[y][4] = C[4];
225
    }
226
}
227
228
static void Iota(uint64_t A[5][5], size_t i)
229
{
230
    assert(i < OSSL_NELEM(iotas));
231
    A[0][0] ^= iotas[i];
232
}
233
234
static void KeccakF1600(uint64_t A[5][5])
235
{
236
    size_t i;
237
238
    for (i = 0; i < 24; i++) {
239
        Theta(A);
240
        Rho(A);
241
        Pi(A);
242
        Chi(A);
243
        Iota(A, i);
244
    }
245
}
246
247
#elif defined(KECCAK_1X)
248
/*
249
 * This implementation is optimization of above code featuring unroll
250
 * of even y-loops, their fusion and code motion. It also minimizes
251
 * temporary storage. Compiler would normally do all these things for
252
 * you, purpose of manual optimization is to provide "unobscured"
253
 * reference for assembly implementation [in case this approach is
254
 * chosen for implementation on some platform]. In the nutshell it's
255
 * equivalent of "plane-per-plane processing" approach discussed in
256
 * section 2.4 of "Keccak implementation overview".
257
 */
258
static void Round(uint64_t A[5][5], size_t i)
259
{
260
    uint64_t C[5], E[2]; /* registers */
261
    uint64_t D[5], T[2][5]; /* memory    */
262
263
    assert(i < OSSL_NELEM(iotas));
264
265
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
266
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
267
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
268
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
269
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
270
271
#if defined(__arm__)
272
    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
273
    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
274
    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
275
    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
276
    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
277
278
    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
279
    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
280
    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
281
    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
282
    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
283
284
    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
285
    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
286
    C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
287
    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
288
    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
289
#else
290
    D[0] = ROL64(C[1], 1) ^ C[4];
291
    D[1] = ROL64(C[2], 1) ^ C[0];
292
    D[2] = ROL64(C[3], 1) ^ C[1];
293
    D[3] = ROL64(C[4], 1) ^ C[2];
294
    D[4] = ROL64(C[0], 1) ^ C[3];
295
296
    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
297
    T[0][1] = A[0][1] ^ D[1];
298
    T[0][2] = A[0][2] ^ D[2];
299
    T[0][3] = A[0][3] ^ D[3];
300
    T[0][4] = A[0][4] ^ D[4];
301
302
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
303
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
304
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
305
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
306
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
307
#endif
308
    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
309
    A[0][1] = C[1] ^ (~C[2] & C[3]);
310
    A[0][2] = C[2] ^ (~C[3] & C[4]);
311
    A[0][3] = C[3] ^ (~C[4] & C[0]);
312
    A[0][4] = C[4] ^ (~C[0] & C[1]);
313
314
    T[1][0] = A[1][0] ^ (C[3] = D[0]);
315
    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
316
    T[1][2] = A[1][2] ^ (E[0] = D[2]);
317
    T[1][3] = A[1][3] ^ (E[1] = D[3]);
318
    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
319
320
    C[0] = ROL64(T[0][3], rhotates[0][3]);
321
    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
322
    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
323
    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
324
    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
325
326
    A[1][0] = C[0] ^ (~C[1] & C[2]);
327
    A[1][1] = C[1] ^ (~C[2] & C[3]);
328
    A[1][2] = C[2] ^ (~C[3] & C[4]);
329
    A[1][3] = C[3] ^ (~C[4] & C[0]);
330
    A[1][4] = C[4] ^ (~C[0] & C[1]);
331
332
    C[0] = ROL64(T[0][1], rhotates[0][1]);
333
    C[1] = ROL64(T[1][2], rhotates[1][2]);
334
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
335
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
336
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
337
338
    A[2][0] = C[0] ^ (~C[1] & C[2]);
339
    A[2][1] = C[1] ^ (~C[2] & C[3]);
340
    A[2][2] = C[2] ^ (~C[3] & C[4]);
341
    A[2][3] = C[3] ^ (~C[4] & C[0]);
342
    A[2][4] = C[4] ^ (~C[0] & C[1]);
343
344
    C[0] = ROL64(T[0][4], rhotates[0][4]);
345
    C[1] = ROL64(T[1][0], rhotates[1][0]);
346
    C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
347
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
348
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
349
350
    A[3][0] = C[0] ^ (~C[1] & C[2]);
351
    A[3][1] = C[1] ^ (~C[2] & C[3]);
352
    A[3][2] = C[2] ^ (~C[3] & C[4]);
353
    A[3][3] = C[3] ^ (~C[4] & C[0]);
354
    A[3][4] = C[4] ^ (~C[0] & C[1]);
355
356
    C[0] = ROL64(T[0][2], rhotates[0][2]);
357
    C[1] = ROL64(T[1][3], rhotates[1][3]);
358
    C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
359
    C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
360
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
361
362
    A[4][0] = C[0] ^ (~C[1] & C[2]);
363
    A[4][1] = C[1] ^ (~C[2] & C[3]);
364
    A[4][2] = C[2] ^ (~C[3] & C[4]);
365
    A[4][3] = C[3] ^ (~C[4] & C[0]);
366
    A[4][4] = C[4] ^ (~C[0] & C[1]);
367
}
368
369
static void KeccakF1600(uint64_t A[5][5])
370
{
371
    size_t i;
372
373
    for (i = 0; i < 24; i++) {
374
        Round(A, i);
375
    }
376
}
377
378
#elif defined(KECCAK_1X_ALT)
379
/*
380
 * This is variant of above KECCAK_1X that reduces requirement for
381
 * temporary storage even further, but at cost of more updates to A[][].
382
 * It's less suitable if A[][] is memory bound, but better if it's
383
 * register bound.
384
 */
385
386
static void Round(uint64_t A[5][5], size_t i)
387
{
388
    uint64_t C[5], D[5];
389
390
    assert(i < OSSL_NELEM(iotas));
391
392
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
393
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
394
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
395
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
396
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
397
398
    D[1] = C[0] ^ ROL64(C[2], 1);
399
    D[2] = C[1] ^ ROL64(C[3], 1);
400
    D[3] = C[2] ^= ROL64(C[4], 1);
401
    D[4] = C[3] ^= ROL64(C[0], 1);
402
    D[0] = C[4] ^= ROL64(C[1], 1);
403
404
    A[0][1] ^= D[1];
405
    A[1][1] ^= D[1];
406
    A[2][1] ^= D[1];
407
    A[3][1] ^= D[1];
408
    A[4][1] ^= D[1];
409
410
    A[0][2] ^= D[2];
411
    A[1][2] ^= D[2];
412
    A[2][2] ^= D[2];
413
    A[3][2] ^= D[2];
414
    A[4][2] ^= D[2];
415
416
    A[0][3] ^= C[2];
417
    A[1][3] ^= C[2];
418
    A[2][3] ^= C[2];
419
    A[3][3] ^= C[2];
420
    A[4][3] ^= C[2];
421
422
    A[0][4] ^= C[3];
423
    A[1][4] ^= C[3];
424
    A[2][4] ^= C[3];
425
    A[3][4] ^= C[3];
426
    A[4][4] ^= C[3];
427
428
    A[0][0] ^= C[4];
429
    A[1][0] ^= C[4];
430
    A[2][0] ^= C[4];
431
    A[3][0] ^= C[4];
432
    A[4][0] ^= C[4];
433
434
    C[1] = A[0][1];
435
    C[2] = A[0][2];
436
    C[3] = A[0][3];
437
    C[4] = A[0][4];
438
439
    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
440
    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
441
    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
442
    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
443
444
    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
445
    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
446
    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
447
    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
448
449
    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
450
    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
451
    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
452
    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
453
454
    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
455
    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
456
    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
457
    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
458
459
    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
460
    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
461
    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
462
    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
463
464
    A[1][0] = ROL64(C[3], rhotates[0][3]);
465
    A[2][0] = ROL64(C[1], rhotates[0][1]);
466
    A[3][0] = ROL64(C[4], rhotates[0][4]);
467
    A[4][0] = ROL64(C[2], rhotates[0][2]);
468
469
    C[0] = A[0][0];
470
    C[1] = A[1][0];
471
    D[0] = A[0][1];
472
    D[1] = A[1][1];
473
474
    A[0][0] ^= (~A[0][1] & A[0][2]);
475
    A[1][0] ^= (~A[1][1] & A[1][2]);
476
    A[0][1] ^= (~A[0][2] & A[0][3]);
477
    A[1][1] ^= (~A[1][2] & A[1][3]);
478
    A[0][2] ^= (~A[0][3] & A[0][4]);
479
    A[1][2] ^= (~A[1][3] & A[1][4]);
480
    A[0][3] ^= (~A[0][4] & C[0]);
481
    A[1][3] ^= (~A[1][4] & C[1]);
482
    A[0][4] ^= (~C[0] & D[0]);
483
    A[1][4] ^= (~C[1] & D[1]);
484
485
    C[2] = A[2][0];
486
    C[3] = A[3][0];
487
    D[2] = A[2][1];
488
    D[3] = A[3][1];
489
490
    A[2][0] ^= (~A[2][1] & A[2][2]);
491
    A[3][0] ^= (~A[3][1] & A[3][2]);
492
    A[2][1] ^= (~A[2][2] & A[2][3]);
493
    A[3][1] ^= (~A[3][2] & A[3][3]);
494
    A[2][2] ^= (~A[2][3] & A[2][4]);
495
    A[3][2] ^= (~A[3][3] & A[3][4]);
496
    A[2][3] ^= (~A[2][4] & C[2]);
497
    A[3][3] ^= (~A[3][4] & C[3]);
498
    A[2][4] ^= (~C[2] & D[2]);
499
    A[3][4] ^= (~C[3] & D[3]);
500
501
    C[4] = A[4][0];
502
    D[4] = A[4][1];
503
504
    A[4][0] ^= (~A[4][1] & A[4][2]);
505
    A[4][1] ^= (~A[4][2] & A[4][3]);
506
    A[4][2] ^= (~A[4][3] & A[4][4]);
507
    A[4][3] ^= (~A[4][4] & C[4]);
508
    A[4][4] ^= (~C[4] & D[4]);
509
    A[0][0] ^= iotas[i];
510
}
511
512
static void KeccakF1600(uint64_t A[5][5])
513
{
514
    size_t i;
515
516
    for (i = 0; i < 24; i++) {
517
        Round(A, i);
518
    }
519
}
520
521
#elif defined(KECCAK_2X)
522
/*
523
 * This implementation is variant of KECCAK_1X above with outer-most
524
 * round loop unrolled twice. This allows to take temporary storage
525
 * out of round procedure and simplify references to it by alternating
526
 * it with actual data (see round loop below). Originally it was meant
527
 * rather as reference for an assembly implementation, but it seems to
528
 * play best with compilers [as well as provide best instruction per
529
 * processed byte ratio at minimal round unroll factor]...
530
 */
531
static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
532
15.9M
{
533
15.9M
    uint64_t C[5], D[5];
534
535
15.9M
    assert(i < OSSL_NELEM(iotas));
536
537
15.9M
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
538
15.9M
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
539
15.9M
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
540
15.9M
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
541
15.9M
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
542
543
15.9M
    D[0] = ROL64(C[1], 1) ^ C[4];
544
15.9M
    D[1] = ROL64(C[2], 1) ^ C[0];
545
15.9M
    D[2] = ROL64(C[3], 1) ^ C[1];
546
15.9M
    D[3] = ROL64(C[4], 1) ^ C[2];
547
15.9M
    D[4] = ROL64(C[0], 1) ^ C[3];
548
549
15.9M
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
550
15.9M
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
551
15.9M
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
552
15.9M
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
553
15.9M
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
554
555
15.9M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
556
15.9M
    R[0][0] = C[0] ^ (C[1] | C[2]) ^ iotas[i];
557
15.9M
    R[0][1] = C[1] ^ (~C[2] | C[3]);
558
15.9M
    R[0][2] = C[2] ^ (C[3] & C[4]);
559
15.9M
    R[0][3] = C[3] ^ (C[4] | C[0]);
560
15.9M
    R[0][4] = C[4] ^ (C[0] & C[1]);
561
#else
562
    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
563
    R[0][1] = C[1] ^ (~C[2] & C[3]);
564
    R[0][2] = C[2] ^ (~C[3] & C[4]);
565
    R[0][3] = C[3] ^ (~C[4] & C[0]);
566
    R[0][4] = C[4] ^ (~C[0] & C[1]);
567
#endif
568
569
15.9M
    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
570
15.9M
    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
571
15.9M
    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
572
15.9M
    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
573
15.9M
    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
574
575
15.9M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
576
15.9M
    R[1][0] = C[0] ^ (C[1] | C[2]);
577
15.9M
    R[1][1] = C[1] ^ (C[2] & C[3]);
578
15.9M
    R[1][2] = C[2] ^ (C[3] | ~C[4]);
579
15.9M
    R[1][3] = C[3] ^ (C[4] | C[0]);
580
15.9M
    R[1][4] = C[4] ^ (C[0] & C[1]);
581
#else
582
    R[1][0] = C[0] ^ (~C[1] & C[2]);
583
    R[1][1] = C[1] ^ (~C[2] & C[3]);
584
    R[1][2] = C[2] ^ (~C[3] & C[4]);
585
    R[1][3] = C[3] ^ (~C[4] & C[0]);
586
    R[1][4] = C[4] ^ (~C[0] & C[1]);
587
#endif
588
589
15.9M
    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
590
15.9M
    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
591
15.9M
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
592
15.9M
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
593
15.9M
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
594
595
15.9M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
596
15.9M
    R[2][0] = C[0] ^ (C[1] | C[2]);
597
15.9M
    R[2][1] = C[1] ^ (C[2] & C[3]);
598
15.9M
    R[2][2] = C[2] ^ (~C[3] & C[4]);
599
15.9M
    R[2][3] = ~C[3] ^ (C[4] | C[0]);
600
15.9M
    R[2][4] = C[4] ^ (C[0] & C[1]);
601
#else
602
    R[2][0] = C[0] ^ (~C[1] & C[2]);
603
    R[2][1] = C[1] ^ (~C[2] & C[3]);
604
    R[2][2] = C[2] ^ (~C[3] & C[4]);
605
    R[2][3] = C[3] ^ (~C[4] & C[0]);
606
    R[2][4] = C[4] ^ (~C[0] & C[1]);
607
#endif
608
609
15.9M
    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
610
15.9M
    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
611
15.9M
    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
612
15.9M
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
613
15.9M
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
614
615
15.9M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
616
15.9M
    R[3][0] = C[0] ^ (C[1] & C[2]);
617
15.9M
    R[3][1] = C[1] ^ (C[2] | C[3]);
618
15.9M
    R[3][2] = C[2] ^ (~C[3] | C[4]);
619
15.9M
    R[3][3] = ~C[3] ^ (C[4] & C[0]);
620
15.9M
    R[3][4] = C[4] ^ (C[0] | C[1]);
621
#else
622
    R[3][0] = C[0] ^ (~C[1] & C[2]);
623
    R[3][1] = C[1] ^ (~C[2] & C[3]);
624
    R[3][2] = C[2] ^ (~C[3] & C[4]);
625
    R[3][3] = C[3] ^ (~C[4] & C[0]);
626
    R[3][4] = C[4] ^ (~C[0] & C[1]);
627
#endif
628
629
15.9M
    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
630
15.9M
    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
631
15.9M
    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
632
15.9M
    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
633
15.9M
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
634
635
15.9M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
636
15.9M
    R[4][0] = C[0] ^ (~C[1] & C[2]);
637
15.9M
    R[4][1] = ~C[1] ^ (C[2] | C[3]);
638
15.9M
    R[4][2] = C[2] ^ (C[3] & C[4]);
639
15.9M
    R[4][3] = C[3] ^ (C[4] | C[0]);
640
15.9M
    R[4][4] = C[4] ^ (C[0] & C[1]);
641
#else
642
    R[4][0] = C[0] ^ (~C[1] & C[2]);
643
    R[4][1] = C[1] ^ (~C[2] & C[3]);
644
    R[4][2] = C[2] ^ (~C[3] & C[4]);
645
    R[4][3] = C[3] ^ (~C[4] & C[0]);
646
    R[4][4] = C[4] ^ (~C[0] & C[1]);
647
#endif
648
15.9M
}
649
650
static void KeccakF1600(uint64_t A[5][5])
651
663k
{
652
663k
    uint64_t T[5][5];
653
663k
    size_t i;
654
655
663k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
656
663k
    A[0][1] = ~A[0][1];
657
663k
    A[0][2] = ~A[0][2];
658
663k
    A[1][3] = ~A[1][3];
659
663k
    A[2][2] = ~A[2][2];
660
663k
    A[3][2] = ~A[3][2];
661
663k
    A[4][0] = ~A[4][0];
662
663k
#endif
663
664
8.62M
    for (i = 0; i < 24; i += 2) {
665
7.96M
        Round(T, A, i);
666
7.96M
        Round(A, T, i + 1);
667
7.96M
    }
668
669
663k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
670
663k
    A[0][1] = ~A[0][1];
671
663k
    A[0][2] = ~A[0][2];
672
663k
    A[1][3] = ~A[1][3];
673
663k
    A[2][2] = ~A[2][2];
674
663k
    A[3][2] = ~A[3][2];
675
663k
    A[4][0] = ~A[4][0];
676
663k
#endif
677
663k
}
678
679
#else /* define KECCAK_INPLACE to compile this code path */
680
/*
681
 * This implementation is KECCAK_1X from above combined 4 times with
682
 * a twist that allows to omit temporary storage and perform in-place
683
 * processing. It's discussed in section 2.5 of "Keccak implementation
684
 * overview". It's likely to be best suited for processors with large
685
 * register bank... On the other hand processor with large register
686
 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
687
 * more compact...
688
 */
689
static void FourRounds(uint64_t A[5][5], size_t i)
690
{
691
    uint64_t B[5], C[5], D[5];
692
693
    assert(i <= OSSL_NELEM(iotas) - 4);
694
695
    /* Round 4*n */
696
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
697
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
698
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
699
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
700
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
701
702
    D[0] = ROL64(C[1], 1) ^ C[4];
703
    D[1] = ROL64(C[2], 1) ^ C[0];
704
    D[2] = ROL64(C[3], 1) ^ C[1];
705
    D[3] = ROL64(C[4], 1) ^ C[2];
706
    D[4] = ROL64(C[0], 1) ^ C[3];
707
708
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
709
    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
710
    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
711
    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
712
    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
713
714
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
715
    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
716
    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
717
    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
718
    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
719
720
    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
721
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
722
    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
723
    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
724
    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
725
726
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
727
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
728
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
729
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
730
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
731
732
    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
733
    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
734
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
735
    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
736
    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
737
738
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
739
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
740
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
741
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
742
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
743
744
    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
745
    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
746
    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
747
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
748
    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
749
750
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
751
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
752
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
753
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
754
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
755
756
    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
757
    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
758
    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
759
    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
760
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
761
762
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
763
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
764
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
765
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
766
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
767
768
    /* Round 4*n+1 */
769
    D[0] = ROL64(C[1], 1) ^ C[4];
770
    D[1] = ROL64(C[2], 1) ^ C[0];
771
    D[2] = ROL64(C[3], 1) ^ C[1];
772
    D[3] = ROL64(C[4], 1) ^ C[2];
773
    D[4] = ROL64(C[0], 1) ^ C[3];
774
775
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
776
    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
777
    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
778
    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
779
    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
780
781
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
782
    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
783
    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
784
    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
785
    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
786
787
    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
788
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
789
    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
790
    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
791
    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
792
793
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
794
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
795
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
796
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
797
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
798
799
    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
800
    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
801
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
802
    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
803
    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
804
805
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
806
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
807
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
808
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
809
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
810
811
    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
812
    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
813
    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
814
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
815
    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
816
817
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
818
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
819
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
820
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
821
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
822
823
    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
824
    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
825
    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
826
    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
827
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
828
829
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
830
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
831
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
832
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
833
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
834
835
    /* Round 4*n+2 */
836
    D[0] = ROL64(C[1], 1) ^ C[4];
837
    D[1] = ROL64(C[2], 1) ^ C[0];
838
    D[2] = ROL64(C[3], 1) ^ C[1];
839
    D[3] = ROL64(C[4], 1) ^ C[2];
840
    D[4] = ROL64(C[0], 1) ^ C[3];
841
842
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
843
    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
844
    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
845
    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
846
    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
847
848
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
849
    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
850
    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
851
    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
852
    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
853
854
    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
855
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
856
    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
857
    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
858
    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
859
860
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
861
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
862
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
863
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
864
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
865
866
    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
867
    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
868
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
869
    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
870
    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
871
872
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
873
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
874
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
875
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
876
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
877
878
    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
879
    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
880
    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
881
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
882
    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
883
884
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
885
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
886
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
887
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
888
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
889
890
    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
891
    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
892
    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
893
    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
894
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
895
896
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
897
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
898
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
899
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
900
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
901
902
    /* Round 4*n+3 */
903
    D[0] = ROL64(C[1], 1) ^ C[4];
904
    D[1] = ROL64(C[2], 1) ^ C[0];
905
    D[2] = ROL64(C[3], 1) ^ C[1];
906
    D[3] = ROL64(C[4], 1) ^ C[2];
907
    D[4] = ROL64(C[0], 1) ^ C[3];
908
909
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
910
    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
911
    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
912
    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
913
    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
914
915
    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
916
    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
917
    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
918
    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
919
    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
920
921
    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
922
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
923
    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
924
    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
925
    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
926
927
    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
928
    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
929
    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
930
    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
931
    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
932
933
    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
934
    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
935
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
936
    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
937
    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
938
939
    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
940
    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
941
    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
942
    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
943
    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
944
945
    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
946
    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
947
    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
948
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
949
    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
950
951
    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
952
    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
953
    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
954
    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
955
    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
956
957
    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
958
    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
959
    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
960
    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
961
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
962
963
    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
964
    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
965
    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
966
    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
967
    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
968
}
969
970
static void KeccakF1600(uint64_t A[5][5])
971
{
972
    size_t i;
973
974
    for (i = 0; i < 24; i += 4) {
975
        FourRounds(A, i);
976
    }
977
}
978
979
#endif
980
981
static uint64_t BitInterleave(uint64_t Ai)
982
10.6M
{
983
10.6M
    if (BIT_INTERLEAVE) {
984
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
985
0
        uint32_t t0, t1;
986
987
0
        t0 = lo & 0x55555555;
988
0
        t0 |= t0 >> 1;
989
0
        t0 &= 0x33333333;
990
0
        t0 |= t0 >> 2;
991
0
        t0 &= 0x0f0f0f0f;
992
0
        t0 |= t0 >> 4;
993
0
        t0 &= 0x00ff00ff;
994
0
        t0 |= t0 >> 8;
995
0
        t0 &= 0x0000ffff;
996
997
0
        t1 = hi & 0x55555555;
998
0
        t1 |= t1 >> 1;
999
0
        t1 &= 0x33333333;
1000
0
        t1 |= t1 >> 2;
1001
0
        t1 &= 0x0f0f0f0f;
1002
0
        t1 |= t1 >> 4;
1003
0
        t1 &= 0x00ff00ff;
1004
0
        t1 |= t1 >> 8;
1005
0
        t1 <<= 16;
1006
1007
0
        lo &= 0xaaaaaaaa;
1008
0
        lo |= lo << 1;
1009
0
        lo &= 0xcccccccc;
1010
0
        lo |= lo << 2;
1011
0
        lo &= 0xf0f0f0f0;
1012
0
        lo |= lo << 4;
1013
0
        lo &= 0xff00ff00;
1014
0
        lo |= lo << 8;
1015
0
        lo >>= 16;
1016
1017
0
        hi &= 0xaaaaaaaa;
1018
0
        hi |= hi << 1;
1019
0
        hi &= 0xcccccccc;
1020
0
        hi |= hi << 2;
1021
0
        hi &= 0xf0f0f0f0;
1022
0
        hi |= hi << 4;
1023
0
        hi &= 0xff00ff00;
1024
0
        hi |= hi << 8;
1025
0
        hi &= 0xffff0000;
1026
1027
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1028
0
    }
1029
1030
10.6M
    return Ai;
1031
10.6M
}
1032
1033
static uint64_t BitDeinterleave(uint64_t Ai)
1034
254
{
1035
254
    if (BIT_INTERLEAVE) {
1036
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1037
0
        uint32_t t0, t1;
1038
1039
0
        t0 = lo & 0x0000ffff;
1040
0
        t0 |= t0 << 8;
1041
0
        t0 &= 0x00ff00ff;
1042
0
        t0 |= t0 << 4;
1043
0
        t0 &= 0x0f0f0f0f;
1044
0
        t0 |= t0 << 2;
1045
0
        t0 &= 0x33333333;
1046
0
        t0 |= t0 << 1;
1047
0
        t0 &= 0x55555555;
1048
1049
0
        t1 = hi << 16;
1050
0
        t1 |= t1 >> 8;
1051
0
        t1 &= 0xff00ff00;
1052
0
        t1 |= t1 >> 4;
1053
0
        t1 &= 0xf0f0f0f0;
1054
0
        t1 |= t1 >> 2;
1055
0
        t1 &= 0xcccccccc;
1056
0
        t1 |= t1 >> 1;
1057
0
        t1 &= 0xaaaaaaaa;
1058
1059
0
        lo >>= 16;
1060
0
        lo |= lo << 8;
1061
0
        lo &= 0x00ff00ff;
1062
0
        lo |= lo << 4;
1063
0
        lo &= 0x0f0f0f0f;
1064
0
        lo |= lo << 2;
1065
0
        lo &= 0x33333333;
1066
0
        lo |= lo << 1;
1067
0
        lo &= 0x55555555;
1068
1069
0
        hi &= 0xffff0000;
1070
0
        hi |= hi >> 8;
1071
0
        hi &= 0xff00ff00;
1072
0
        hi |= hi >> 4;
1073
0
        hi &= 0xf0f0f0f0;
1074
0
        hi |= hi >> 2;
1075
0
        hi &= 0xcccccccc;
1076
0
        hi |= hi >> 1;
1077
0
        hi &= 0xaaaaaaaa;
1078
1079
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1080
0
    }
1081
1082
254
    return Ai;
1083
254
}
1084
1085
/*
1086
 * SHA3_absorb can be called multiple times, but at each invocation
1087
 * largest multiple of |r| out of |len| bytes are processed. Then
1088
 * remaining amount of bytes is returned. This is done to spare caller
1089
 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1090
 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1091
 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1092
 * padding and intermediate sub-block buffering, byte- or bitwise, is
1093
 * caller's responsibility.
1094
 */
1095
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1096
    size_t r)
1097
220
{
1098
220
    uint64_t *A_flat = (uint64_t *)A;
1099
220
    size_t i, w = r / 8;
1100
1101
220
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1102
1103
663k
    while (len >= r) {
1104
11.3M
        for (i = 0; i < w; i++) {
1105
10.6M
            uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 | (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 | (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 | (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1106
10.6M
            inp += 8;
1107
1108
10.6M
            A_flat[i] ^= BitInterleave(Ai);
1109
10.6M
        }
1110
663k
        KeccakF1600(A);
1111
663k
        len -= r;
1112
663k
    }
1113
1114
220
    return len;
1115
220
}
1116
1117
/*
1118
 * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of
1119
 * |len| bytes.
1120
 * If multiple SHA3_squeeze calls are required the output length |len| must be a
1121
 * multiple of the blocksize, with |next| being 0 on the first call and 1 on
1122
 * subsequent calls. It is the callers responsibility to buffer the results.
1123
 * When only a single call to SHA3_squeeze is required, |len| can be any size
1124
 * and |next| must be 0.
1125
 */
1126
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r,
1127
    int next)
1128
55
{
1129
55
    uint64_t *A_flat = (uint64_t *)A;
1130
55
    size_t i, w = r / 8;
1131
1132
55
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1133
1134
78
    while (len != 0) {
1135
55
        if (next)
1136
0
            KeccakF1600(A);
1137
55
        next = 1;
1138
277
        for (i = 0; i < w && len != 0; i++) {
1139
254
            uint64_t Ai = BitDeinterleave(A_flat[i]);
1140
1141
254
            if (len < 8) {
1142
160
                for (i = 0; i < len; i++) {
1143
128
                    *out++ = (unsigned char)Ai;
1144
128
                    Ai >>= 8;
1145
128
                }
1146
32
                return;
1147
32
            }
1148
1149
222
            out[0] = (unsigned char)(Ai);
1150
222
            out[1] = (unsigned char)(Ai >> 8);
1151
222
            out[2] = (unsigned char)(Ai >> 16);
1152
222
            out[3] = (unsigned char)(Ai >> 24);
1153
222
            out[4] = (unsigned char)(Ai >> 32);
1154
222
            out[5] = (unsigned char)(Ai >> 40);
1155
222
            out[6] = (unsigned char)(Ai >> 48);
1156
222
            out[7] = (unsigned char)(Ai >> 56);
1157
222
            out += 8;
1158
222
            len -= 8;
1159
222
        }
1160
55
    }
1161
55
}
1162
#endif
1163
1164
#ifdef SELFTEST
1165
/*
1166
 * Post-padding one-shot implementations would look as following:
1167
 *
1168
 * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1169
 * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1170
 * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1171
 * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1172
 * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
1173
 * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
1174
 */
1175
1176
void SHA3_sponge(const unsigned char *inp, size_t len,
1177
    unsigned char *out, size_t d, size_t r)
1178
{
1179
    uint64_t A[5][5];
1180
1181
    memset(A, 0, sizeof(A));
1182
    SHA3_absorb(A, inp, len, r);
1183
    SHA3_squeeze(A, out, d, r);
1184
}
1185
1186
#include <stdio.h>
1187
1188
int main(void)
1189
{
1190
    /*
1191
     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1192
     */
1193
    unsigned char test[168] = { '\xf3', '\x3' };
1194
    unsigned char out[512];
1195
    size_t i;
1196
    static const unsigned char result[512] = {
1197
        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1198
        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1199
        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1200
        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1201
        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1202
        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1203
        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1204
        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1205
        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1206
        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1207
        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1208
        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1209
        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1210
        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1211
        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1212
        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1213
        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1214
        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1215
        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1216
        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1217
        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1218
        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1219
        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1220
        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1221
        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1222
        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1223
        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1224
        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1225
        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1226
        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1227
        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1228
        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1229
        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1230
        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1231
        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1232
        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1233
        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1234
        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1235
        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1236
        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1237
        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1238
        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1239
        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1240
        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1241
        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1242
        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1243
        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1244
        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1245
        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1246
        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1247
        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1248
        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1249
        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1250
        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1251
        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1252
        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1253
        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1254
        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1255
        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1256
        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1257
        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1258
        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1259
        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1260
        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1261
    };
1262
1263
    test[167] = '\x80';
1264
    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1265
1266
    /*
1267
     * Rationale behind keeping output [formatted as below] is that
1268
     * one should be able to redirect it to a file, then copy-n-paste
1269
     * final "output val" from official example to another file, and
1270
     * compare the two with diff(1).
1271
     */
1272
    for (i = 0; i < sizeof(out);) {
1273
        printf("%02X", out[i]);
1274
        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1275
    }
1276
1277
    if (memcmp(out, result, sizeof(out))) {
1278
        fprintf(stderr, "failure\n");
1279
        return 1;
1280
    } else {
1281
        fprintf(stderr, "success\n");
1282
        return 0;
1283
    }
1284
}
1285
#endif