Coverage Report

Created: 2025-12-14 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/keccak1600.c
Line
Count
Source
1
/*
2
 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/e_os2.h>
11
#include <string.h>
12
#include <assert.h>
13
14
#include "internal/nelem.h"
15
16
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
17
    size_t r);
18
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);
19
20
#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
21
22
/*
23
 * Choose some sensible defaults
24
 */
25
#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
26
#define KECCAK_2X /* default to KECCAK_2X variant */
27
#endif
28
29
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || defined(__mips) || defined(__riscv) || defined(__s390__) || defined(__EMSCRIPTEN__)
30
/*
31
 * These don't have "and with complement" instruction, so minimize amount
32
 * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
33
 */
34
#define KECCAK_COMPLEMENTING_TRANSFORM
35
#endif
36
37
#if defined(__x86_64__) || defined(__aarch64__) || defined(__mips64) || defined(__ia64) || (defined(__VMS) && !defined(__vax))
38
/*
39
 * These are available even in ILP32 flavours, but even then they are
40
 * capable of performing 64-bit operations as efficiently as in *P64.
41
 * Since it's not given that we can use sizeof(void *), just shunt it.
42
 */
43
570M
#define BIT_INTERLEAVE (0)
44
#else
45
#define BIT_INTERLEAVE (sizeof(void *) < 8)
46
#endif
47
48
0
#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
49
50
static uint64_t ROL64(uint64_t val, int offset)
51
557M
{
52
557M
    if (offset == 0) {
53
0
        return val;
54
557M
    } else if (!BIT_INTERLEAVE) {
55
557M
        return (val << offset) | (val >> (64 - offset));
56
557M
    } else {
57
0
        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
58
59
0
        if (offset & 1) {
60
0
            uint32_t tmp = hi;
61
62
0
            offset >>= 1;
63
0
            hi = ROL32(lo, offset);
64
0
            lo = ROL32(tmp, offset + 1);
65
0
        } else {
66
0
            offset >>= 1;
67
0
            lo = ROL32(lo, offset);
68
0
            hi = ROL32(hi, offset);
69
0
        }
70
71
0
        return ((uint64_t)hi << 32) | lo;
72
0
    }
73
557M
}
74
75
static const unsigned char rhotates[5][5] = {
76
    { 0, 1, 62, 28, 27 },
77
    { 36, 44, 6, 55, 20 },
78
    { 3, 10, 43, 25, 39 },
79
    { 41, 45, 15, 21, 8 },
80
    { 18, 2, 61, 56, 14 }
81
};
82
83
static const uint64_t iotas[] = {
84
    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
85
    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
86
    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
87
    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
88
    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
89
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
90
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
91
    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
92
    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
93
    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
94
    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
95
    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
96
    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
97
    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
98
    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
99
    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
100
    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
101
    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
102
    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
103
    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
104
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
105
    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
106
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
107
    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
108
};
109
110
#if defined(KECCAK_REF)
111
/*
112
 * This is straightforward or "maximum clarity" implementation aiming
113
 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
114
 * Permutation-Based Hash and Extendible-Output Functions" as much as
115
 * possible. With one caveat. Because of the way C stores matrices,
116
 * references to A[x,y] in the specification are presented as A[y][x].
117
 * Implementation unrolls inner x-loops so that modulo 5 operations are
118
 * explicitly pre-computed.
119
 */
120
static void Theta(uint64_t A[5][5])
121
{
122
    uint64_t C[5], D[5];
123
    size_t y;
124
125
    C[0] = A[0][0];
126
    C[1] = A[0][1];
127
    C[2] = A[0][2];
128
    C[3] = A[0][3];
129
    C[4] = A[0][4];
130
131
    for (y = 1; y < 5; y++) {
132
        C[0] ^= A[y][0];
133
        C[1] ^= A[y][1];
134
        C[2] ^= A[y][2];
135
        C[3] ^= A[y][3];
136
        C[4] ^= A[y][4];
137
    }
138
139
    D[0] = ROL64(C[1], 1) ^ C[4];
140
    D[1] = ROL64(C[2], 1) ^ C[0];
141
    D[2] = ROL64(C[3], 1) ^ C[1];
142
    D[3] = ROL64(C[4], 1) ^ C[2];
143
    D[4] = ROL64(C[0], 1) ^ C[3];
144
145
    for (y = 0; y < 5; y++) {
146
        A[y][0] ^= D[0];
147
        A[y][1] ^= D[1];
148
        A[y][2] ^= D[2];
149
        A[y][3] ^= D[3];
150
        A[y][4] ^= D[4];
151
    }
152
}
153
154
static void Rho(uint64_t A[5][5])
155
{
156
    size_t y;
157
158
    for (y = 0; y < 5; y++) {
159
        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
160
        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
161
        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
162
        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
163
        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
164
    }
165
}
166
167
static void Pi(uint64_t A[5][5])
168
{
169
    uint64_t T[5][5];
170
171
    /*
172
     * T = A
173
     * A[y][x] = T[x][(3*y+x)%5]
174
     */
175
    memcpy(T, A, sizeof(T));
176
177
    A[0][0] = T[0][0];
178
    A[0][1] = T[1][1];
179
    A[0][2] = T[2][2];
180
    A[0][3] = T[3][3];
181
    A[0][4] = T[4][4];
182
183
    A[1][0] = T[0][3];
184
    A[1][1] = T[1][4];
185
    A[1][2] = T[2][0];
186
    A[1][3] = T[3][1];
187
    A[1][4] = T[4][2];
188
189
    A[2][0] = T[0][1];
190
    A[2][1] = T[1][2];
191
    A[2][2] = T[2][3];
192
    A[2][3] = T[3][4];
193
    A[2][4] = T[4][0];
194
195
    A[3][0] = T[0][4];
196
    A[3][1] = T[1][0];
197
    A[3][2] = T[2][1];
198
    A[3][3] = T[3][2];
199
    A[3][4] = T[4][3];
200
201
    A[4][0] = T[0][2];
202
    A[4][1] = T[1][3];
203
    A[4][2] = T[2][4];
204
    A[4][3] = T[3][0];
205
    A[4][4] = T[4][1];
206
}
207
208
static void Chi(uint64_t A[5][5])
209
{
210
    uint64_t C[5];
211
    size_t y;
212
213
    for (y = 0; y < 5; y++) {
214
        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
215
        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
216
        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
217
        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
218
        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
219
220
        A[y][0] = C[0];
221
        A[y][1] = C[1];
222
        A[y][2] = C[2];
223
        A[y][3] = C[3];
224
        A[y][4] = C[4];
225
    }
226
}
227
228
static void Iota(uint64_t A[5][5], size_t i)
229
{
230
    assert(i < OSSL_NELEM(iotas));
231
    A[0][0] ^= iotas[i];
232
}
233
234
static void KeccakF1600(uint64_t A[5][5])
235
{
236
    size_t i;
237
238
    for (i = 0; i < 24; i++) {
239
        Theta(A);
240
        Rho(A);
241
        Pi(A);
242
        Chi(A);
243
        Iota(A, i);
244
    }
245
}
246
247
#elif defined(KECCAK_1X)
248
/*
249
 * This implementation is optimization of above code featuring unroll
250
 * of even y-loops, their fusion and code motion. It also minimizes
251
 * temporary storage. Compiler would normally do all these things for
252
 * you, purpose of manual optimization is to provide "unobscured"
253
 * reference for assembly implementation [in case this approach is
254
 * chosen for implementation on some platform]. In the nutshell it's
255
 * equivalent of "plane-per-plane processing" approach discussed in
256
 * section 2.4 of "Keccak implementation overview".
257
 */
258
static void Round(uint64_t A[5][5], size_t i)
259
{
260
    uint64_t C[5], E[2]; /* registers */
261
    uint64_t D[5], T[2][5]; /* memory    */
262
263
    assert(i < OSSL_NELEM(iotas));
264
265
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
266
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
267
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
268
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
269
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
270
271
#if defined(__arm__)
272
    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
273
    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
274
    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
275
    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
276
    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
277
278
    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
279
    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
280
    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
281
    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
282
    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
283
284
    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
285
    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
286
    C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
287
    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
288
    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
289
#else
290
    D[0] = ROL64(C[1], 1) ^ C[4];
291
    D[1] = ROL64(C[2], 1) ^ C[0];
292
    D[2] = ROL64(C[3], 1) ^ C[1];
293
    D[3] = ROL64(C[4], 1) ^ C[2];
294
    D[4] = ROL64(C[0], 1) ^ C[3];
295
296
    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
297
    T[0][1] = A[0][1] ^ D[1];
298
    T[0][2] = A[0][2] ^ D[2];
299
    T[0][3] = A[0][3] ^ D[3];
300
    T[0][4] = A[0][4] ^ D[4];
301
302
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
303
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
304
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
305
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
306
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
307
#endif
308
    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
309
    A[0][1] = C[1] ^ (~C[2] & C[3]);
310
    A[0][2] = C[2] ^ (~C[3] & C[4]);
311
    A[0][3] = C[3] ^ (~C[4] & C[0]);
312
    A[0][4] = C[4] ^ (~C[0] & C[1]);
313
314
    T[1][0] = A[1][0] ^ (C[3] = D[0]);
315
    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
316
    T[1][2] = A[1][2] ^ (E[0] = D[2]);
317
    T[1][3] = A[1][3] ^ (E[1] = D[3]);
318
    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
319
320
    C[0] = ROL64(T[0][3], rhotates[0][3]);
321
    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
322
    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
323
    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
324
    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
325
326
    A[1][0] = C[0] ^ (~C[1] & C[2]);
327
    A[1][1] = C[1] ^ (~C[2] & C[3]);
328
    A[1][2] = C[2] ^ (~C[3] & C[4]);
329
    A[1][3] = C[3] ^ (~C[4] & C[0]);
330
    A[1][4] = C[4] ^ (~C[0] & C[1]);
331
332
    C[0] = ROL64(T[0][1], rhotates[0][1]);
333
    C[1] = ROL64(T[1][2], rhotates[1][2]);
334
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
335
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
336
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
337
338
    A[2][0] = C[0] ^ (~C[1] & C[2]);
339
    A[2][1] = C[1] ^ (~C[2] & C[3]);
340
    A[2][2] = C[2] ^ (~C[3] & C[4]);
341
    A[2][3] = C[3] ^ (~C[4] & C[0]);
342
    A[2][4] = C[4] ^ (~C[0] & C[1]);
343
344
    C[0] = ROL64(T[0][4], rhotates[0][4]);
345
    C[1] = ROL64(T[1][0], rhotates[1][0]);
346
    C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
347
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
348
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
349
350
    A[3][0] = C[0] ^ (~C[1] & C[2]);
351
    A[3][1] = C[1] ^ (~C[2] & C[3]);
352
    A[3][2] = C[2] ^ (~C[3] & C[4]);
353
    A[3][3] = C[3] ^ (~C[4] & C[0]);
354
    A[3][4] = C[4] ^ (~C[0] & C[1]);
355
356
    C[0] = ROL64(T[0][2], rhotates[0][2]);
357
    C[1] = ROL64(T[1][3], rhotates[1][3]);
358
    C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
359
    C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
360
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
361
362
    A[4][0] = C[0] ^ (~C[1] & C[2]);
363
    A[4][1] = C[1] ^ (~C[2] & C[3]);
364
    A[4][2] = C[2] ^ (~C[3] & C[4]);
365
    A[4][3] = C[3] ^ (~C[4] & C[0]);
366
    A[4][4] = C[4] ^ (~C[0] & C[1]);
367
}
368
369
static void KeccakF1600(uint64_t A[5][5])
370
{
371
    size_t i;
372
373
    for (i = 0; i < 24; i++) {
374
        Round(A, i);
375
    }
376
}
377
378
#elif defined(KECCAK_1X_ALT)
379
/*
380
 * This is variant of above KECCAK_1X that reduces requirement for
381
 * temporary storage even further, but at cost of more updates to A[][].
382
 * It's less suitable if A[][] is memory bound, but better if it's
383
 * register bound.
384
 */
385
386
static void Round(uint64_t A[5][5], size_t i)
387
{
388
    uint64_t C[5], D[5];
389
390
    assert(i < OSSL_NELEM(iotas));
391
392
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
393
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
394
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
395
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
396
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
397
398
    D[1] = C[0] ^ ROL64(C[2], 1);
399
    D[2] = C[1] ^ ROL64(C[3], 1);
400
    D[3] = C[2] ^= ROL64(C[4], 1);
401
    D[4] = C[3] ^= ROL64(C[0], 1);
402
    D[0] = C[4] ^= ROL64(C[1], 1);
403
404
    A[0][1] ^= D[1];
405
    A[1][1] ^= D[1];
406
    A[2][1] ^= D[1];
407
    A[3][1] ^= D[1];
408
    A[4][1] ^= D[1];
409
410
    A[0][2] ^= D[2];
411
    A[1][2] ^= D[2];
412
    A[2][2] ^= D[2];
413
    A[3][2] ^= D[2];
414
    A[4][2] ^= D[2];
415
416
    A[0][3] ^= C[2];
417
    A[1][3] ^= C[2];
418
    A[2][3] ^= C[2];
419
    A[3][3] ^= C[2];
420
    A[4][3] ^= C[2];
421
422
    A[0][4] ^= C[3];
423
    A[1][4] ^= C[3];
424
    A[2][4] ^= C[3];
425
    A[3][4] ^= C[3];
426
    A[4][4] ^= C[3];
427
428
    A[0][0] ^= C[4];
429
    A[1][0] ^= C[4];
430
    A[2][0] ^= C[4];
431
    A[3][0] ^= C[4];
432
    A[4][0] ^= C[4];
433
434
    C[1] = A[0][1];
435
    C[2] = A[0][2];
436
    C[3] = A[0][3];
437
    C[4] = A[0][4];
438
439
    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
440
    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
441
    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
442
    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
443
444
    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
445
    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
446
    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
447
    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
448
449
    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
450
    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
451
    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
452
    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
453
454
    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
455
    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
456
    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
457
    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
458
459
    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
460
    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
461
    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
462
    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
463
464
    A[1][0] = ROL64(C[3], rhotates[0][3]);
465
    A[2][0] = ROL64(C[1], rhotates[0][1]);
466
    A[3][0] = ROL64(C[4], rhotates[0][4]);
467
    A[4][0] = ROL64(C[2], rhotates[0][2]);
468
469
    C[0] = A[0][0];
470
    C[1] = A[1][0];
471
    D[0] = A[0][1];
472
    D[1] = A[1][1];
473
474
    A[0][0] ^= (~A[0][1] & A[0][2]);
475
    A[1][0] ^= (~A[1][1] & A[1][2]);
476
    A[0][1] ^= (~A[0][2] & A[0][3]);
477
    A[1][1] ^= (~A[1][2] & A[1][3]);
478
    A[0][2] ^= (~A[0][3] & A[0][4]);
479
    A[1][2] ^= (~A[1][3] & A[1][4]);
480
    A[0][3] ^= (~A[0][4] & C[0]);
481
    A[1][3] ^= (~A[1][4] & C[1]);
482
    A[0][4] ^= (~C[0] & D[0]);
483
    A[1][4] ^= (~C[1] & D[1]);
484
485
    C[2] = A[2][0];
486
    C[3] = A[3][0];
487
    D[2] = A[2][1];
488
    D[3] = A[3][1];
489
490
    A[2][0] ^= (~A[2][1] & A[2][2]);
491
    A[3][0] ^= (~A[3][1] & A[3][2]);
492
    A[2][1] ^= (~A[2][2] & A[2][3]);
493
    A[3][1] ^= (~A[3][2] & A[3][3]);
494
    A[2][2] ^= (~A[2][3] & A[2][4]);
495
    A[3][2] ^= (~A[3][3] & A[3][4]);
496
    A[2][3] ^= (~A[2][4] & C[2]);
497
    A[3][3] ^= (~A[3][4] & C[3]);
498
    A[2][4] ^= (~C[2] & D[2]);
499
    A[3][4] ^= (~C[3] & D[3]);
500
501
    C[4] = A[4][0];
502
    D[4] = A[4][1];
503
504
    A[4][0] ^= (~A[4][1] & A[4][2]);
505
    A[4][1] ^= (~A[4][2] & A[4][3]);
506
    A[4][2] ^= (~A[4][3] & A[4][4]);
507
    A[4][3] ^= (~A[4][4] & C[4]);
508
    A[4][4] ^= (~C[4] & D[4]);
509
    A[0][0] ^= iotas[i];
510
}
511
512
static void KeccakF1600(uint64_t A[5][5])
513
{
514
    size_t i;
515
516
    for (i = 0; i < 24; i++) {
517
        Round(A, i);
518
    }
519
}
520
521
#elif defined(KECCAK_2X)
522
/*
523
 * This implementation is variant of KECCAK_1X above with outer-most
524
 * round loop unrolled twice. This allows to take temporary storage
525
 * out of round procedure and simplify references to it by alternating
526
 * it with actual data (see round loop below). Originally it was meant
527
 * rather as reference for an assembly implementation, but it seems to
528
 * play best with compilers [as well as provide best instruction per
529
 * processed byte ratio at minimal round unroll factor]...
530
 */
531
static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
532
19.2M
{
533
19.2M
    uint64_t C[5], D[5];
534
535
19.2M
    assert(i < OSSL_NELEM(iotas));
536
537
19.2M
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
538
19.2M
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
539
19.2M
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
540
19.2M
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
541
19.2M
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
542
543
19.2M
    D[0] = ROL64(C[1], 1) ^ C[4];
544
19.2M
    D[1] = ROL64(C[2], 1) ^ C[0];
545
19.2M
    D[2] = ROL64(C[3], 1) ^ C[1];
546
19.2M
    D[3] = ROL64(C[4], 1) ^ C[2];
547
19.2M
    D[4] = ROL64(C[0], 1) ^ C[3];
548
549
19.2M
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
550
19.2M
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
551
19.2M
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
552
19.2M
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
553
19.2M
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
554
555
19.2M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
556
19.2M
    R[0][0] = C[0] ^ (C[1] | C[2]) ^ iotas[i];
557
19.2M
    R[0][1] = C[1] ^ (~C[2] | C[3]);
558
19.2M
    R[0][2] = C[2] ^ (C[3] & C[4]);
559
19.2M
    R[0][3] = C[3] ^ (C[4] | C[0]);
560
19.2M
    R[0][4] = C[4] ^ (C[0] & C[1]);
561
#else
562
    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
563
    R[0][1] = C[1] ^ (~C[2] & C[3]);
564
    R[0][2] = C[2] ^ (~C[3] & C[4]);
565
    R[0][3] = C[3] ^ (~C[4] & C[0]);
566
    R[0][4] = C[4] ^ (~C[0] & C[1]);
567
#endif
568
569
19.2M
    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
570
19.2M
    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
571
19.2M
    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
572
19.2M
    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
573
19.2M
    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
574
575
19.2M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
576
19.2M
    R[1][0] = C[0] ^ (C[1] | C[2]);
577
19.2M
    R[1][1] = C[1] ^ (C[2] & C[3]);
578
19.2M
    R[1][2] = C[2] ^ (C[3] | ~C[4]);
579
19.2M
    R[1][3] = C[3] ^ (C[4] | C[0]);
580
19.2M
    R[1][4] = C[4] ^ (C[0] & C[1]);
581
#else
582
    R[1][0] = C[0] ^ (~C[1] & C[2]);
583
    R[1][1] = C[1] ^ (~C[2] & C[3]);
584
    R[1][2] = C[2] ^ (~C[3] & C[4]);
585
    R[1][3] = C[3] ^ (~C[4] & C[0]);
586
    R[1][4] = C[4] ^ (~C[0] & C[1]);
587
#endif
588
589
19.2M
    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
590
19.2M
    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
591
19.2M
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
592
19.2M
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
593
19.2M
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
594
595
19.2M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
596
19.2M
    R[2][0] = C[0] ^ (C[1] | C[2]);
597
19.2M
    R[2][1] = C[1] ^ (C[2] & C[3]);
598
19.2M
    R[2][2] = C[2] ^ (~C[3] & C[4]);
599
19.2M
    R[2][3] = ~C[3] ^ (C[4] | C[0]);
600
19.2M
    R[2][4] = C[4] ^ (C[0] & C[1]);
601
#else
602
    R[2][0] = C[0] ^ (~C[1] & C[2]);
603
    R[2][1] = C[1] ^ (~C[2] & C[3]);
604
    R[2][2] = C[2] ^ (~C[3] & C[4]);
605
    R[2][3] = C[3] ^ (~C[4] & C[0]);
606
    R[2][4] = C[4] ^ (~C[0] & C[1]);
607
#endif
608
609
19.2M
    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
610
19.2M
    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
611
19.2M
    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
612
19.2M
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
613
19.2M
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
614
615
19.2M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
616
19.2M
    R[3][0] = C[0] ^ (C[1] & C[2]);
617
19.2M
    R[3][1] = C[1] ^ (C[2] | C[3]);
618
19.2M
    R[3][2] = C[2] ^ (~C[3] | C[4]);
619
19.2M
    R[3][3] = ~C[3] ^ (C[4] & C[0]);
620
19.2M
    R[3][4] = C[4] ^ (C[0] | C[1]);
621
#else
622
    R[3][0] = C[0] ^ (~C[1] & C[2]);
623
    R[3][1] = C[1] ^ (~C[2] & C[3]);
624
    R[3][2] = C[2] ^ (~C[3] & C[4]);
625
    R[3][3] = C[3] ^ (~C[4] & C[0]);
626
    R[3][4] = C[4] ^ (~C[0] & C[1]);
627
#endif
628
629
19.2M
    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
630
19.2M
    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
631
19.2M
    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
632
19.2M
    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
633
19.2M
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
634
635
19.2M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
636
19.2M
    R[4][0] = C[0] ^ (~C[1] & C[2]);
637
19.2M
    R[4][1] = ~C[1] ^ (C[2] | C[3]);
638
19.2M
    R[4][2] = C[2] ^ (C[3] & C[4]);
639
19.2M
    R[4][3] = C[3] ^ (C[4] | C[0]);
640
19.2M
    R[4][4] = C[4] ^ (C[0] & C[1]);
641
#else
642
    R[4][0] = C[0] ^ (~C[1] & C[2]);
643
    R[4][1] = C[1] ^ (~C[2] & C[3]);
644
    R[4][2] = C[2] ^ (~C[3] & C[4]);
645
    R[4][3] = C[3] ^ (~C[4] & C[0]);
646
    R[4][4] = C[4] ^ (~C[0] & C[1]);
647
#endif
648
19.2M
}
649
650
static void KeccakF1600(uint64_t A[5][5])
651
801k
{
652
801k
    uint64_t T[5][5];
653
801k
    size_t i;
654
655
801k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
656
801k
    A[0][1] = ~A[0][1];
657
801k
    A[0][2] = ~A[0][2];
658
801k
    A[1][3] = ~A[1][3];
659
801k
    A[2][2] = ~A[2][2];
660
801k
    A[3][2] = ~A[3][2];
661
801k
    A[4][0] = ~A[4][0];
662
801k
#endif
663
664
10.4M
    for (i = 0; i < 24; i += 2) {
665
9.62M
        Round(T, A, i);
666
9.62M
        Round(A, T, i + 1);
667
9.62M
    }
668
669
801k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
670
801k
    A[0][1] = ~A[0][1];
671
801k
    A[0][2] = ~A[0][2];
672
801k
    A[1][3] = ~A[1][3];
673
801k
    A[2][2] = ~A[2][2];
674
801k
    A[3][2] = ~A[3][2];
675
801k
    A[4][0] = ~A[4][0];
676
801k
#endif
677
801k
}
678
679
#else /* define KECCAK_INPLACE to compile this code path */
680
/*
681
 * This implementation is KECCAK_1X from above combined 4 times with
682
 * a twist that allows to omit temporary storage and perform in-place
683
 * processing. It's discussed in section 2.5 of "Keccak implementation
684
 * overview". It's likely to be best suited for processors with large
685
 * register bank... On the other hand processor with large register
686
 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
687
 * more compact...
688
 */
689
static void FourRounds(uint64_t A[5][5], size_t i)
690
{
691
    uint64_t B[5], C[5], D[5];
692
693
    assert(i <= OSSL_NELEM(iotas) - 4);
694
695
    /* Round 4*n */
696
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
697
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
698
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
699
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
700
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
701
702
    D[0] = ROL64(C[1], 1) ^ C[4];
703
    D[1] = ROL64(C[2], 1) ^ C[0];
704
    D[2] = ROL64(C[3], 1) ^ C[1];
705
    D[3] = ROL64(C[4], 1) ^ C[2];
706
    D[4] = ROL64(C[0], 1) ^ C[3];
707
708
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
709
    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
710
    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
711
    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
712
    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
713
714
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
715
    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
716
    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
717
    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
718
    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
719
720
    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
721
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
722
    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
723
    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
724
    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
725
726
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
727
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
728
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
729
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
730
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
731
732
    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
733
    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
734
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
735
    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
736
    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
737
738
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
739
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
740
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
741
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
742
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
743
744
    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
745
    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
746
    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
747
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
748
    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
749
750
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
751
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
752
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
753
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
754
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
755
756
    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
757
    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
758
    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
759
    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
760
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
761
762
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
763
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
764
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
765
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
766
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
767
768
    /* Round 4*n+1 */
769
    D[0] = ROL64(C[1], 1) ^ C[4];
770
    D[1] = ROL64(C[2], 1) ^ C[0];
771
    D[2] = ROL64(C[3], 1) ^ C[1];
772
    D[3] = ROL64(C[4], 1) ^ C[2];
773
    D[4] = ROL64(C[0], 1) ^ C[3];
774
775
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
776
    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
777
    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
778
    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
779
    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
780
781
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
782
    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
783
    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
784
    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
785
    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
786
787
    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
788
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
789
    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
790
    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
791
    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
792
793
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
794
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
795
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
796
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
797
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
798
799
    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
800
    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
801
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
802
    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
803
    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
804
805
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
806
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
807
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
808
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
809
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
810
811
    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
812
    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
813
    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
814
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
815
    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
816
817
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
818
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
819
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
820
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
821
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
822
823
    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
824
    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
825
    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
826
    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
827
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
828
829
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
830
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
831
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
832
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
833
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
834
835
    /* Round 4*n+2 */
836
    D[0] = ROL64(C[1], 1) ^ C[4];
837
    D[1] = ROL64(C[2], 1) ^ C[0];
838
    D[2] = ROL64(C[3], 1) ^ C[1];
839
    D[3] = ROL64(C[4], 1) ^ C[2];
840
    D[4] = ROL64(C[0], 1) ^ C[3];
841
842
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
843
    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
844
    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
845
    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
846
    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
847
848
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
849
    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
850
    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
851
    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
852
    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
853
854
    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
855
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
856
    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
857
    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
858
    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
859
860
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
861
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
862
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
863
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
864
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
865
866
    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
867
    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
868
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
869
    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
870
    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
871
872
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
873
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
874
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
875
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
876
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
877
878
    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
879
    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
880
    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
881
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
882
    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
883
884
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
885
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
886
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
887
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
888
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
889
890
    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
891
    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
892
    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
893
    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
894
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
895
896
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
897
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
898
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
899
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
900
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
901
902
    /* Round 4*n+3 */
903
    D[0] = ROL64(C[1], 1) ^ C[4];
904
    D[1] = ROL64(C[2], 1) ^ C[0];
905
    D[2] = ROL64(C[3], 1) ^ C[1];
906
    D[3] = ROL64(C[4], 1) ^ C[2];
907
    D[4] = ROL64(C[0], 1) ^ C[3];
908
909
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
910
    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
911
    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
912
    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
913
    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
914
915
    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
916
    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
917
    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
918
    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
919
    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
920
921
    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
922
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
923
    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
924
    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
925
    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
926
927
    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
928
    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
929
    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
930
    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
931
    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
932
933
    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
934
    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
935
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
936
    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
937
    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
938
939
    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
940
    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
941
    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
942
    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
943
    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
944
945
    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
946
    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
947
    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
948
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
949
    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
950
951
    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
952
    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
953
    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
954
    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
955
    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
956
957
    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
958
    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
959
    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
960
    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
961
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
962
963
    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
964
    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
965
    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
966
    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
967
    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
968
}
969
970
static void KeccakF1600(uint64_t A[5][5])
971
{
972
    size_t i;
973
974
    for (i = 0; i < 24; i += 4) {
975
        FourRounds(A, i);
976
    }
977
}
978
979
#endif
980
981
static uint64_t BitInterleave(uint64_t Ai)
982
12.3M
{
983
12.3M
    if (BIT_INTERLEAVE) {
984
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
985
0
        uint32_t t0, t1;
986
987
0
        t0 = lo & 0x55555555;
988
0
        t0 |= t0 >> 1;
989
0
        t0 &= 0x33333333;
990
0
        t0 |= t0 >> 2;
991
0
        t0 &= 0x0f0f0f0f;
992
0
        t0 |= t0 >> 4;
993
0
        t0 &= 0x00ff00ff;
994
0
        t0 |= t0 >> 8;
995
0
        t0 &= 0x0000ffff;
996
997
0
        t1 = hi & 0x55555555;
998
0
        t1 |= t1 >> 1;
999
0
        t1 &= 0x33333333;
1000
0
        t1 |= t1 >> 2;
1001
0
        t1 &= 0x0f0f0f0f;
1002
0
        t1 |= t1 >> 4;
1003
0
        t1 &= 0x00ff00ff;
1004
0
        t1 |= t1 >> 8;
1005
0
        t1 <<= 16;
1006
1007
0
        lo &= 0xaaaaaaaa;
1008
0
        lo |= lo << 1;
1009
0
        lo &= 0xcccccccc;
1010
0
        lo |= lo << 2;
1011
0
        lo &= 0xf0f0f0f0;
1012
0
        lo |= lo << 4;
1013
0
        lo &= 0xff00ff00;
1014
0
        lo |= lo << 8;
1015
0
        lo >>= 16;
1016
1017
0
        hi &= 0xaaaaaaaa;
1018
0
        hi |= hi << 1;
1019
0
        hi &= 0xcccccccc;
1020
0
        hi |= hi << 2;
1021
0
        hi &= 0xf0f0f0f0;
1022
0
        hi |= hi << 4;
1023
0
        hi &= 0xff00ff00;
1024
0
        hi |= hi << 8;
1025
0
        hi &= 0xffff0000;
1026
1027
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1028
0
    }
1029
1030
12.3M
    return Ai;
1031
12.3M
}
1032
1033
static uint64_t BitDeinterleave(uint64_t Ai)
1034
256
{
1035
256
    if (BIT_INTERLEAVE) {
1036
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1037
0
        uint32_t t0, t1;
1038
1039
0
        t0 = lo & 0x0000ffff;
1040
0
        t0 |= t0 << 8;
1041
0
        t0 &= 0x00ff00ff;
1042
0
        t0 |= t0 << 4;
1043
0
        t0 &= 0x0f0f0f0f;
1044
0
        t0 |= t0 << 2;
1045
0
        t0 &= 0x33333333;
1046
0
        t0 |= t0 << 1;
1047
0
        t0 &= 0x55555555;
1048
1049
0
        t1 = hi << 16;
1050
0
        t1 |= t1 >> 8;
1051
0
        t1 &= 0xff00ff00;
1052
0
        t1 |= t1 >> 4;
1053
0
        t1 &= 0xf0f0f0f0;
1054
0
        t1 |= t1 >> 2;
1055
0
        t1 &= 0xcccccccc;
1056
0
        t1 |= t1 >> 1;
1057
0
        t1 &= 0xaaaaaaaa;
1058
1059
0
        lo >>= 16;
1060
0
        lo |= lo << 8;
1061
0
        lo &= 0x00ff00ff;
1062
0
        lo |= lo << 4;
1063
0
        lo &= 0x0f0f0f0f;
1064
0
        lo |= lo << 2;
1065
0
        lo &= 0x33333333;
1066
0
        lo |= lo << 1;
1067
0
        lo &= 0x55555555;
1068
1069
0
        hi &= 0xffff0000;
1070
0
        hi |= hi >> 8;
1071
0
        hi &= 0xff00ff00;
1072
0
        hi |= hi >> 4;
1073
0
        hi &= 0xf0f0f0f0;
1074
0
        hi |= hi >> 2;
1075
0
        hi &= 0xcccccccc;
1076
0
        hi |= hi >> 1;
1077
0
        hi &= 0xaaaaaaaa;
1078
1079
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1080
0
    }
1081
1082
256
    return Ai;
1083
256
}
1084
1085
/*
1086
 * SHA3_absorb can be called multiple times, but at each invocation
1087
 * largest multiple of |r| out of |len| bytes are processed. Then
1088
 * remaining amount of bytes is returned. This is done to spare caller
1089
 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1090
 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1091
 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1092
 * padding and intermediate sub-block buffering, byte- or bitwise, is
1093
 * caller's responsibility.
1094
 */
1095
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1096
    size_t r)
1097
204
{
1098
204
    uint64_t *A_flat = (uint64_t *)A;
1099
204
    size_t i, w = r / 8;
1100
1101
204
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1102
1103
801k
    while (len >= r) {
1104
13.1M
        for (i = 0; i < w; i++) {
1105
12.3M
            uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 | (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 | (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 | (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1106
12.3M
            inp += 8;
1107
1108
12.3M
            A_flat[i] ^= BitInterleave(Ai);
1109
12.3M
        }
1110
801k
        KeccakF1600(A);
1111
801k
        len -= r;
1112
801k
    }
1113
1114
204
    return len;
1115
204
}
1116
1117
/*
1118
 * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of
1119
 * |len| bytes.
1120
 * If multiple SHA3_squeeze calls are required the output length |len| must be a
1121
 * multiple of the blocksize, with |next| being 0 on the first call and 1 on
1122
 * subsequent calls. It is the callers responsibility to buffer the results.
1123
 * When only a single call to SHA3_squeeze is required, |len| can be any size
1124
 * and |next| must be 0.
1125
 */
1126
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r,
1127
    int next)
1128
51
{
1129
51
    uint64_t *A_flat = (uint64_t *)A;
1130
51
    size_t i, w = r / 8;
1131
1132
51
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1133
1134
76
    while (len != 0) {
1135
51
        if (next)
1136
0
            KeccakF1600(A);
1137
51
        next = 1;
1138
281
        for (i = 0; i < w && len != 0; i++) {
1139
256
            uint64_t Ai = BitDeinterleave(A_flat[i]);
1140
1141
256
            if (len < 8) {
1142
130
                for (i = 0; i < len; i++) {
1143
104
                    *out++ = (unsigned char)Ai;
1144
104
                    Ai >>= 8;
1145
104
                }
1146
26
                return;
1147
26
            }
1148
1149
230
            out[0] = (unsigned char)(Ai);
1150
230
            out[1] = (unsigned char)(Ai >> 8);
1151
230
            out[2] = (unsigned char)(Ai >> 16);
1152
230
            out[3] = (unsigned char)(Ai >> 24);
1153
230
            out[4] = (unsigned char)(Ai >> 32);
1154
230
            out[5] = (unsigned char)(Ai >> 40);
1155
230
            out[6] = (unsigned char)(Ai >> 48);
1156
230
            out[7] = (unsigned char)(Ai >> 56);
1157
230
            out += 8;
1158
230
            len -= 8;
1159
230
        }
1160
51
    }
1161
51
}
1162
#endif
1163
1164
#ifdef SELFTEST
1165
/*
1166
 * Post-padding one-shot implementations would look as following:
1167
 *
1168
 * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1169
 * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1170
 * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1171
 * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1172
 * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
1173
 * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
1174
 */
1175
1176
void SHA3_sponge(const unsigned char *inp, size_t len,
1177
    unsigned char *out, size_t d, size_t r)
1178
{
1179
    uint64_t A[5][5];
1180
1181
    memset(A, 0, sizeof(A));
1182
    SHA3_absorb(A, inp, len, r);
1183
    SHA3_squeeze(A, out, d, r);
1184
}
1185
1186
#include <stdio.h>
1187
1188
int main(void)
1189
{
1190
    /*
1191
     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1192
     */
1193
    unsigned char test[168] = { '\xf3', '\x3' };
1194
    unsigned char out[512];
1195
    size_t i;
1196
    static const unsigned char result[512] = {
1197
        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1198
        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1199
        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1200
        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1201
        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1202
        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1203
        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1204
        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1205
        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1206
        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1207
        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1208
        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1209
        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1210
        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1211
        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1212
        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1213
        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1214
        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1215
        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1216
        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1217
        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1218
        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1219
        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1220
        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1221
        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1222
        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1223
        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1224
        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1225
        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1226
        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1227
        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1228
        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1229
        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1230
        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1231
        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1232
        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1233
        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1234
        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1235
        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1236
        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1237
        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1238
        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1239
        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1240
        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1241
        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1242
        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1243
        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1244
        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1245
        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1246
        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1247
        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1248
        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1249
        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1250
        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1251
        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1252
        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1253
        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1254
        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1255
        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1256
        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1257
        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1258
        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1259
        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1260
        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1261
    };
1262
1263
    test[167] = '\x80';
1264
    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1265
1266
    /*
1267
     * Rationale behind keeping output [formatted as below] is that
1268
     * one should be able to redirect it to a file, then copy-n-paste
1269
     * final "output val" from official example to another file, and
1270
     * compare the two with diff(1).
1271
     */
1272
    for (i = 0; i < sizeof(out);) {
1273
        printf("%02X", out[i]);
1274
        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1275
    }
1276
1277
    if (memcmp(out, result, sizeof(out))) {
1278
        fprintf(stderr, "failure\n");
1279
        return 1;
1280
    } else {
1281
        fprintf(stderr, "success\n");
1282
        return 0;
1283
    }
1284
}
1285
#endif