Coverage Report

Created: 2024-11-21 07:03

/src/openssl/crypto/sha/keccak1600.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/e_os2.h>
11
#include <string.h>
12
#include <assert.h>
13
14
#include "internal/nelem.h"
15
16
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
17
                   size_t r);
18
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);
19
20
#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
21
22
/*
23
 * Choose some sensible defaults
24
 */
25
#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \
26
    !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
27
# define KECCAK_2X      /* default to KECCAK_2X variant */
28
#endif
29
30
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
31
    (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || \
32
    defined(__mips) || defined(__riscv) || defined(__s390__) || \
33
    defined(__EMSCRIPTEN__)
34
/*
35
 * These don't have "and with complement" instruction, so minimize amount
36
 * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
37
 */
38
# define KECCAK_COMPLEMENTING_TRANSFORM
39
#endif
40
41
#if defined(__x86_64__) || defined(__aarch64__) || \
42
    defined(__mips64) || defined(__ia64) || \
43
    (defined(__VMS) && !defined(__vax))
44
/*
45
 * These are available even in ILP32 flavours, but even then they are
46
 * capable of performing 64-bit operations as efficiently as in *P64.
47
 * Since it's not given that we can use sizeof(void *), just shunt it.
48
 */
49
112M
# define BIT_INTERLEAVE (0)
50
#else
51
# define BIT_INTERLEAVE (sizeof(void *) < 8)
52
#endif
53
54
0
#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
55
56
static uint64_t ROL64(uint64_t val, int offset)
57
110M
{
58
110M
    if (offset == 0) {
59
0
        return val;
60
110M
    } else if (!BIT_INTERLEAVE) {
61
110M
        return (val << offset) | (val >> (64-offset));
62
110M
    } else {
63
0
        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
64
65
0
        if (offset & 1) {
66
0
            uint32_t tmp = hi;
67
68
0
            offset >>= 1;
69
0
            hi = ROL32(lo, offset);
70
0
            lo = ROL32(tmp, offset + 1);
71
0
        } else {
72
0
            offset >>= 1;
73
0
            lo = ROL32(lo, offset);
74
0
            hi = ROL32(hi, offset);
75
0
        }
76
77
0
        return ((uint64_t)hi << 32) | lo;
78
0
    }
79
110M
}
80
81
static const unsigned char rhotates[5][5] = {
82
    {  0,  1, 62, 28, 27 },
83
    { 36, 44,  6, 55, 20 },
84
    {  3, 10, 43, 25, 39 },
85
    { 41, 45, 15, 21,  8 },
86
    { 18,  2, 61, 56, 14 }
87
};
88
89
static const uint64_t iotas[] = {
90
    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
91
    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
92
    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
93
    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
94
    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
95
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
96
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
97
    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
98
    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
99
    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
100
    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
101
    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
102
    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
103
    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
104
    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
105
    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
106
    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
107
    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
108
    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
109
    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
110
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
111
    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
112
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
113
    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
114
};
115
116
#if defined(KECCAK_REF)
117
/*
118
 * This is straightforward or "maximum clarity" implementation aiming
119
 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
120
 * Permutation-Based Hash and Extendible-Output Functions" as much as
121
 * possible. With one caveat. Because of the way C stores matrices,
122
 * references to A[x,y] in the specification are presented as A[y][x].
123
 * Implementation unrolls inner x-loops so that modulo 5 operations are
124
 * explicitly pre-computed.
125
 */
126
static void Theta(uint64_t A[5][5])
127
{
128
    uint64_t C[5], D[5];
129
    size_t y;
130
131
    C[0] = A[0][0];
132
    C[1] = A[0][1];
133
    C[2] = A[0][2];
134
    C[3] = A[0][3];
135
    C[4] = A[0][4];
136
137
    for (y = 1; y < 5; y++) {
138
        C[0] ^= A[y][0];
139
        C[1] ^= A[y][1];
140
        C[2] ^= A[y][2];
141
        C[3] ^= A[y][3];
142
        C[4] ^= A[y][4];
143
    }
144
145
    D[0] = ROL64(C[1], 1) ^ C[4];
146
    D[1] = ROL64(C[2], 1) ^ C[0];
147
    D[2] = ROL64(C[3], 1) ^ C[1];
148
    D[3] = ROL64(C[4], 1) ^ C[2];
149
    D[4] = ROL64(C[0], 1) ^ C[3];
150
151
    for (y = 0; y < 5; y++) {
152
        A[y][0] ^= D[0];
153
        A[y][1] ^= D[1];
154
        A[y][2] ^= D[2];
155
        A[y][3] ^= D[3];
156
        A[y][4] ^= D[4];
157
    }
158
}
159
160
static void Rho(uint64_t A[5][5])
161
{
162
    size_t y;
163
164
    for (y = 0; y < 5; y++) {
165
        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
166
        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
167
        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
168
        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
169
        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
170
    }
171
}
172
173
static void Pi(uint64_t A[5][5])
174
{
175
    uint64_t T[5][5];
176
177
    /*
178
     * T = A
179
     * A[y][x] = T[x][(3*y+x)%5]
180
     */
181
    memcpy(T, A, sizeof(T));
182
183
    A[0][0] = T[0][0];
184
    A[0][1] = T[1][1];
185
    A[0][2] = T[2][2];
186
    A[0][3] = T[3][3];
187
    A[0][4] = T[4][4];
188
189
    A[1][0] = T[0][3];
190
    A[1][1] = T[1][4];
191
    A[1][2] = T[2][0];
192
    A[1][3] = T[3][1];
193
    A[1][4] = T[4][2];
194
195
    A[2][0] = T[0][1];
196
    A[2][1] = T[1][2];
197
    A[2][2] = T[2][3];
198
    A[2][3] = T[3][4];
199
    A[2][4] = T[4][0];
200
201
    A[3][0] = T[0][4];
202
    A[3][1] = T[1][0];
203
    A[3][2] = T[2][1];
204
    A[3][3] = T[3][2];
205
    A[3][4] = T[4][3];
206
207
    A[4][0] = T[0][2];
208
    A[4][1] = T[1][3];
209
    A[4][2] = T[2][4];
210
    A[4][3] = T[3][0];
211
    A[4][4] = T[4][1];
212
}
213
214
static void Chi(uint64_t A[5][5])
215
{
216
    uint64_t C[5];
217
    size_t y;
218
219
    for (y = 0; y < 5; y++) {
220
        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
221
        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
222
        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
223
        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
224
        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
225
226
        A[y][0] = C[0];
227
        A[y][1] = C[1];
228
        A[y][2] = C[2];
229
        A[y][3] = C[3];
230
        A[y][4] = C[4];
231
    }
232
}
233
234
static void Iota(uint64_t A[5][5], size_t i)
235
{
236
    assert(i < OSSL_NELEM(iotas));
237
    A[0][0] ^= iotas[i];
238
}
239
240
static void KeccakF1600(uint64_t A[5][5])
241
{
242
    size_t i;
243
244
    for (i = 0; i < 24; i++) {
245
        Theta(A);
246
        Rho(A);
247
        Pi(A);
248
        Chi(A);
249
        Iota(A, i);
250
    }
251
}
252
253
#elif defined(KECCAK_1X)
254
/*
255
 * This implementation is optimization of above code featuring unroll
256
 * of even y-loops, their fusion and code motion. It also minimizes
257
 * temporary storage. Compiler would normally do all these things for
258
 * you, purpose of manual optimization is to provide "unobscured"
259
 * reference for assembly implementation [in case this approach is
260
 * chosen for implementation on some platform]. In the nutshell it's
261
 * equivalent of "plane-per-plane processing" approach discussed in
262
 * section 2.4 of "Keccak implementation overview".
263
 */
264
static void Round(uint64_t A[5][5], size_t i)
265
{
266
    uint64_t C[5], E[2];        /* registers */
267
    uint64_t D[5], T[2][5];     /* memory    */
268
269
    assert(i < OSSL_NELEM(iotas));
270
271
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
272
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
273
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
274
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
275
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
276
277
#if defined(__arm__)
278
    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
279
    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
280
    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
281
    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
282
    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
283
284
    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
285
    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
286
    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
287
    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
288
    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
289
290
    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
291
    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
292
    C[0] =       A[0][0] ^ C[0]; /* rotate by 0 */  /* D[0] */
293
    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);   /* D[2] */
294
    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);   /* D[1] */
295
#else
296
    D[0] = ROL64(C[1], 1) ^ C[4];
297
    D[1] = ROL64(C[2], 1) ^ C[0];
298
    D[2] = ROL64(C[3], 1) ^ C[1];
299
    D[3] = ROL64(C[4], 1) ^ C[2];
300
    D[4] = ROL64(C[0], 1) ^ C[3];
301
302
    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
303
    T[0][1] = A[0][1] ^ D[1];
304
    T[0][2] = A[0][2] ^ D[2];
305
    T[0][3] = A[0][3] ^ D[3];
306
    T[0][4] = A[0][4] ^ D[4];
307
308
    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
309
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
310
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
311
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
312
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
313
#endif
314
    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
315
    A[0][1] = C[1] ^ (~C[2] & C[3]);
316
    A[0][2] = C[2] ^ (~C[3] & C[4]);
317
    A[0][3] = C[3] ^ (~C[4] & C[0]);
318
    A[0][4] = C[4] ^ (~C[0] & C[1]);
319
320
    T[1][0] = A[1][0] ^ (C[3] = D[0]);
321
    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
322
    T[1][2] = A[1][2] ^ (E[0] = D[2]);
323
    T[1][3] = A[1][3] ^ (E[1] = D[3]);
324
    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
325
326
    C[0] = ROL64(T[0][3],        rhotates[0][3]);
327
    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]);   /* D[4] */
328
    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]);   /* D[0] */
329
    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]);   /* D[1] */
330
    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]);   /* D[2] */
331
332
    A[1][0] = C[0] ^ (~C[1] & C[2]);
333
    A[1][1] = C[1] ^ (~C[2] & C[3]);
334
    A[1][2] = C[2] ^ (~C[3] & C[4]);
335
    A[1][3] = C[3] ^ (~C[4] & C[0]);
336
    A[1][4] = C[4] ^ (~C[0] & C[1]);
337
338
    C[0] = ROL64(T[0][1],        rhotates[0][1]);
339
    C[1] = ROL64(T[1][2],        rhotates[1][2]);
340
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
341
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
342
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
343
344
    A[2][0] = C[0] ^ (~C[1] & C[2]);
345
    A[2][1] = C[1] ^ (~C[2] & C[3]);
346
    A[2][2] = C[2] ^ (~C[3] & C[4]);
347
    A[2][3] = C[3] ^ (~C[4] & C[0]);
348
    A[2][4] = C[4] ^ (~C[0] & C[1]);
349
350
    C[0] = ROL64(T[0][4],        rhotates[0][4]);
351
    C[1] = ROL64(T[1][0],        rhotates[1][0]);
352
    C[2] = ROL64(T[1][1],        rhotates[2][1]); /* originally A[2][1] */
353
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
354
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
355
356
    A[3][0] = C[0] ^ (~C[1] & C[2]);
357
    A[3][1] = C[1] ^ (~C[2] & C[3]);
358
    A[3][2] = C[2] ^ (~C[3] & C[4]);
359
    A[3][3] = C[3] ^ (~C[4] & C[0]);
360
    A[3][4] = C[4] ^ (~C[0] & C[1]);
361
362
    C[0] = ROL64(T[0][2],        rhotates[0][2]);
363
    C[1] = ROL64(T[1][3],        rhotates[1][3]);
364
    C[2] = ROL64(T[1][4],        rhotates[2][4]); /* originally A[2][4] */
365
    C[3] = ROL64(T[0][0],        rhotates[3][0]); /* originally A[3][0] */
366
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
367
368
    A[4][0] = C[0] ^ (~C[1] & C[2]);
369
    A[4][1] = C[1] ^ (~C[2] & C[3]);
370
    A[4][2] = C[2] ^ (~C[3] & C[4]);
371
    A[4][3] = C[3] ^ (~C[4] & C[0]);
372
    A[4][4] = C[4] ^ (~C[0] & C[1]);
373
}
374
375
static void KeccakF1600(uint64_t A[5][5])
376
{
377
    size_t i;
378
379
    for (i = 0; i < 24; i++) {
380
        Round(A, i);
381
    }
382
}
383
384
#elif defined(KECCAK_1X_ALT)
385
/*
386
 * This is variant of above KECCAK_1X that reduces requirement for
387
 * temporary storage even further, but at cost of more updates to A[][].
388
 * It's less suitable if A[][] is memory bound, but better if it's
389
 * register bound.
390
 */
391
392
static void Round(uint64_t A[5][5], size_t i)
393
{
394
    uint64_t C[5], D[5];
395
396
    assert(i < OSSL_NELEM(iotas));
397
398
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
399
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
400
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
401
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
402
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
403
404
    D[1] = C[0] ^  ROL64(C[2], 1);
405
    D[2] = C[1] ^  ROL64(C[3], 1);
406
    D[3] = C[2] ^= ROL64(C[4], 1);
407
    D[4] = C[3] ^= ROL64(C[0], 1);
408
    D[0] = C[4] ^= ROL64(C[1], 1);
409
410
    A[0][1] ^= D[1];
411
    A[1][1] ^= D[1];
412
    A[2][1] ^= D[1];
413
    A[3][1] ^= D[1];
414
    A[4][1] ^= D[1];
415
416
    A[0][2] ^= D[2];
417
    A[1][2] ^= D[2];
418
    A[2][2] ^= D[2];
419
    A[3][2] ^= D[2];
420
    A[4][2] ^= D[2];
421
422
    A[0][3] ^= C[2];
423
    A[1][3] ^= C[2];
424
    A[2][3] ^= C[2];
425
    A[3][3] ^= C[2];
426
    A[4][3] ^= C[2];
427
428
    A[0][4] ^= C[3];
429
    A[1][4] ^= C[3];
430
    A[2][4] ^= C[3];
431
    A[3][4] ^= C[3];
432
    A[4][4] ^= C[3];
433
434
    A[0][0] ^= C[4];
435
    A[1][0] ^= C[4];
436
    A[2][0] ^= C[4];
437
    A[3][0] ^= C[4];
438
    A[4][0] ^= C[4];
439
440
    C[1] = A[0][1];
441
    C[2] = A[0][2];
442
    C[3] = A[0][3];
443
    C[4] = A[0][4];
444
445
    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
446
    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
447
    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
448
    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
449
450
    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
451
    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
452
    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
453
    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
454
455
    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
456
    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
457
    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
458
    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
459
460
    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
461
    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
462
    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
463
    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
464
465
    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
466
    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
467
    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
468
    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
469
470
    A[1][0] = ROL64(C[3],    rhotates[0][3]);
471
    A[2][0] = ROL64(C[1],    rhotates[0][1]);
472
    A[3][0] = ROL64(C[4],    rhotates[0][4]);
473
    A[4][0] = ROL64(C[2],    rhotates[0][2]);
474
475
    C[0] = A[0][0];
476
    C[1] = A[1][0];
477
    D[0] = A[0][1];
478
    D[1] = A[1][1];
479
480
    A[0][0] ^= (~A[0][1] & A[0][2]);
481
    A[1][0] ^= (~A[1][1] & A[1][2]);
482
    A[0][1] ^= (~A[0][2] & A[0][3]);
483
    A[1][1] ^= (~A[1][2] & A[1][3]);
484
    A[0][2] ^= (~A[0][3] & A[0][4]);
485
    A[1][2] ^= (~A[1][3] & A[1][4]);
486
    A[0][3] ^= (~A[0][4] & C[0]);
487
    A[1][3] ^= (~A[1][4] & C[1]);
488
    A[0][4] ^= (~C[0]    & D[0]);
489
    A[1][4] ^= (~C[1]    & D[1]);
490
491
    C[2] = A[2][0];
492
    C[3] = A[3][0];
493
    D[2] = A[2][1];
494
    D[3] = A[3][1];
495
496
    A[2][0] ^= (~A[2][1] & A[2][2]);
497
    A[3][0] ^= (~A[3][1] & A[3][2]);
498
    A[2][1] ^= (~A[2][2] & A[2][3]);
499
    A[3][1] ^= (~A[3][2] & A[3][3]);
500
    A[2][2] ^= (~A[2][3] & A[2][4]);
501
    A[3][2] ^= (~A[3][3] & A[3][4]);
502
    A[2][3] ^= (~A[2][4] & C[2]);
503
    A[3][3] ^= (~A[3][4] & C[3]);
504
    A[2][4] ^= (~C[2]    & D[2]);
505
    A[3][4] ^= (~C[3]    & D[3]);
506
507
    C[4] = A[4][0];
508
    D[4] = A[4][1];
509
510
    A[4][0] ^= (~A[4][1] & A[4][2]);
511
    A[4][1] ^= (~A[4][2] & A[4][3]);
512
    A[4][2] ^= (~A[4][3] & A[4][4]);
513
    A[4][3] ^= (~A[4][4] & C[4]);
514
    A[4][4] ^= (~C[4]    & D[4]);
515
    A[0][0] ^= iotas[i];
516
}
517
518
static void KeccakF1600(uint64_t A[5][5])
519
{
520
    size_t i;
521
522
    for (i = 0; i < 24; i++) {
523
        Round(A, i);
524
    }
525
}
526
527
#elif defined(KECCAK_2X)
528
/*
529
 * This implementation is variant of KECCAK_1X above with outer-most
530
 * round loop unrolled twice. This allows to take temporary storage
531
 * out of round procedure and simplify references to it by alternating
532
 * it with actual data (see round loop below). Originally it was meant
533
 * rather as reference for an assembly implementation, but it seems to
534
 * play best with compilers [as well as provide best instruction per
535
 * processed byte ratio at minimal round unroll factor]...
536
 */
537
static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
538
3.81M
{
539
3.81M
    uint64_t C[5], D[5];
540
541
3.81M
    assert(i < OSSL_NELEM(iotas));
542
543
3.81M
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
544
3.81M
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
545
3.81M
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
546
3.81M
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
547
3.81M
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
548
549
3.81M
    D[0] = ROL64(C[1], 1) ^ C[4];
550
3.81M
    D[1] = ROL64(C[2], 1) ^ C[0];
551
3.81M
    D[2] = ROL64(C[3], 1) ^ C[1];
552
3.81M
    D[3] = ROL64(C[4], 1) ^ C[2];
553
3.81M
    D[4] = ROL64(C[0], 1) ^ C[3];
554
555
3.81M
    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
556
3.81M
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
557
3.81M
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
558
3.81M
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
559
3.81M
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
560
561
3.81M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
562
3.81M
    R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
563
3.81M
    R[0][1] = C[1] ^ (~C[2] | C[3]);
564
3.81M
    R[0][2] = C[2] ^ ( C[3] & C[4]);
565
3.81M
    R[0][3] = C[3] ^ ( C[4] | C[0]);
566
3.81M
    R[0][4] = C[4] ^ ( C[0] & C[1]);
567
#else
568
    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
569
    R[0][1] = C[1] ^ (~C[2] & C[3]);
570
    R[0][2] = C[2] ^ (~C[3] & C[4]);
571
    R[0][3] = C[3] ^ (~C[4] & C[0]);
572
    R[0][4] = C[4] ^ (~C[0] & C[1]);
573
#endif
574
575
3.81M
    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
576
3.81M
    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
577
3.81M
    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
578
3.81M
    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
579
3.81M
    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
580
581
3.81M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
582
3.81M
    R[1][0] = C[0] ^ (C[1] |  C[2]);
583
3.81M
    R[1][1] = C[1] ^ (C[2] &  C[3]);
584
3.81M
    R[1][2] = C[2] ^ (C[3] | ~C[4]);
585
3.81M
    R[1][3] = C[3] ^ (C[4] |  C[0]);
586
3.81M
    R[1][4] = C[4] ^ (C[0] &  C[1]);
587
#else
588
    R[1][0] = C[0] ^ (~C[1] & C[2]);
589
    R[1][1] = C[1] ^ (~C[2] & C[3]);
590
    R[1][2] = C[2] ^ (~C[3] & C[4]);
591
    R[1][3] = C[3] ^ (~C[4] & C[0]);
592
    R[1][4] = C[4] ^ (~C[0] & C[1]);
593
#endif
594
595
3.81M
    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
596
3.81M
    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
597
3.81M
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
598
3.81M
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
599
3.81M
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
600
601
3.81M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
602
3.81M
    R[2][0] =  C[0] ^ ( C[1] | C[2]);
603
3.81M
    R[2][1] =  C[1] ^ ( C[2] & C[3]);
604
3.81M
    R[2][2] =  C[2] ^ (~C[3] & C[4]);
605
3.81M
    R[2][3] = ~C[3] ^ ( C[4] | C[0]);
606
3.81M
    R[2][4] =  C[4] ^ ( C[0] & C[1]);
607
#else
608
    R[2][0] = C[0] ^ (~C[1] & C[2]);
609
    R[2][1] = C[1] ^ (~C[2] & C[3]);
610
    R[2][2] = C[2] ^ (~C[3] & C[4]);
611
    R[2][3] = C[3] ^ (~C[4] & C[0]);
612
    R[2][4] = C[4] ^ (~C[0] & C[1]);
613
#endif
614
615
3.81M
    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
616
3.81M
    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
617
3.81M
    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
618
3.81M
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
619
3.81M
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
620
621
3.81M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
622
3.81M
    R[3][0] =  C[0] ^ ( C[1] & C[2]);
623
3.81M
    R[3][1] =  C[1] ^ ( C[2] | C[3]);
624
3.81M
    R[3][2] =  C[2] ^ (~C[3] | C[4]);
625
3.81M
    R[3][3] = ~C[3] ^ ( C[4] & C[0]);
626
3.81M
    R[3][4] =  C[4] ^ ( C[0] | C[1]);
627
#else
628
    R[3][0] = C[0] ^ (~C[1] & C[2]);
629
    R[3][1] = C[1] ^ (~C[2] & C[3]);
630
    R[3][2] = C[2] ^ (~C[3] & C[4]);
631
    R[3][3] = C[3] ^ (~C[4] & C[0]);
632
    R[3][4] = C[4] ^ (~C[0] & C[1]);
633
#endif
634
635
3.81M
    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
636
3.81M
    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
637
3.81M
    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
638
3.81M
    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
639
3.81M
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
640
641
3.81M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
642
3.81M
    R[4][0] =  C[0] ^ (~C[1] & C[2]);
643
3.81M
    R[4][1] = ~C[1] ^ ( C[2] | C[3]);
644
3.81M
    R[4][2] =  C[2] ^ ( C[3] & C[4]);
645
3.81M
    R[4][3] =  C[3] ^ ( C[4] | C[0]);
646
3.81M
    R[4][4] =  C[4] ^ ( C[0] & C[1]);
647
#else
648
    R[4][0] = C[0] ^ (~C[1] & C[2]);
649
    R[4][1] = C[1] ^ (~C[2] & C[3]);
650
    R[4][2] = C[2] ^ (~C[3] & C[4]);
651
    R[4][3] = C[3] ^ (~C[4] & C[0]);
652
    R[4][4] = C[4] ^ (~C[0] & C[1]);
653
#endif
654
3.81M
}
655
656
static void KeccakF1600(uint64_t A[5][5])
657
158k
{
658
158k
    uint64_t T[5][5];
659
158k
    size_t i;
660
661
158k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
662
158k
    A[0][1] = ~A[0][1];
663
158k
    A[0][2] = ~A[0][2];
664
158k
    A[1][3] = ~A[1][3];
665
158k
    A[2][2] = ~A[2][2];
666
158k
    A[3][2] = ~A[3][2];
667
158k
    A[4][0] = ~A[4][0];
668
158k
#endif
669
670
2.06M
    for (i = 0; i < 24; i += 2) {
671
1.90M
        Round(T, A, i);
672
1.90M
        Round(A, T, i + 1);
673
1.90M
    }
674
675
158k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
676
158k
    A[0][1] = ~A[0][1];
677
158k
    A[0][2] = ~A[0][2];
678
158k
    A[1][3] = ~A[1][3];
679
158k
    A[2][2] = ~A[2][2];
680
158k
    A[3][2] = ~A[3][2];
681
158k
    A[4][0] = ~A[4][0];
682
158k
#endif
683
158k
}
684
685
#else   /* define KECCAK_INPLACE to compile this code path */
686
/*
687
 * This implementation is KECCAK_1X from above combined 4 times with
688
 * a twist that allows to omit temporary storage and perform in-place
689
 * processing. It's discussed in section 2.5 of "Keccak implementation
690
 * overview". It's likely to be best suited for processors with large
691
 * register bank... On the other hand processor with large register
692
 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
693
 * more compact...
694
 */
695
static void FourRounds(uint64_t A[5][5], size_t i)
696
{
697
    uint64_t B[5], C[5], D[5];
698
699
    assert(i <= OSSL_NELEM(iotas) - 4);
700
701
    /* Round 4*n */
702
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
703
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
704
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
705
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
706
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
707
708
    D[0] = ROL64(C[1], 1) ^ C[4];
709
    D[1] = ROL64(C[2], 1) ^ C[0];
710
    D[2] = ROL64(C[3], 1) ^ C[1];
711
    D[3] = ROL64(C[4], 1) ^ C[2];
712
    D[4] = ROL64(C[0], 1) ^ C[3];
713
714
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
715
    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
716
    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
717
    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
718
    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
719
720
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
721
    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
722
    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
723
    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
724
    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
725
726
    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
727
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
728
    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
729
    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
730
    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
731
732
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
733
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
734
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
735
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
736
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
737
738
    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
739
    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
740
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
741
    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
742
    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
743
744
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
745
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
746
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
747
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
748
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
749
750
    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
751
    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
752
    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
753
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
754
    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
755
756
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
757
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
758
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
759
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
760
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
761
762
    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
763
    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
764
    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
765
    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
766
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
767
768
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
769
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
770
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
771
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
772
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
773
774
    /* Round 4*n+1 */
775
    D[0] = ROL64(C[1], 1) ^ C[4];
776
    D[1] = ROL64(C[2], 1) ^ C[0];
777
    D[2] = ROL64(C[3], 1) ^ C[1];
778
    D[3] = ROL64(C[4], 1) ^ C[2];
779
    D[4] = ROL64(C[0], 1) ^ C[3];
780
781
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
782
    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
783
    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
784
    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
785
    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
786
787
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
788
    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
789
    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
790
    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
791
    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
792
793
    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
794
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
795
    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
796
    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
797
    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
798
799
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
800
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
801
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
802
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
803
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
804
805
    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
806
    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
807
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
808
    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
809
    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
810
811
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
812
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
813
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
814
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
815
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
816
817
    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
818
    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
819
    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
820
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
821
    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
822
823
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
824
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
825
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
826
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
827
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
828
829
    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
830
    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
831
    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
832
    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
833
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
834
835
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
836
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
837
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
838
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
839
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
840
841
    /* Round 4*n+2 */
842
    D[0] = ROL64(C[1], 1) ^ C[4];
843
    D[1] = ROL64(C[2], 1) ^ C[0];
844
    D[2] = ROL64(C[3], 1) ^ C[1];
845
    D[3] = ROL64(C[4], 1) ^ C[2];
846
    D[4] = ROL64(C[0], 1) ^ C[3];
847
848
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
849
    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
850
    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
851
    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
852
    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
853
854
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
855
    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
856
    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
857
    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
858
    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
859
860
    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
861
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
862
    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
863
    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
864
    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
865
866
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
867
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
868
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
869
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
870
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
871
872
    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
873
    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
874
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
875
    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
876
    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
877
878
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
879
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
880
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
881
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
882
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
883
884
    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
885
    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
886
    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
887
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
888
    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
889
890
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
891
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
892
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
893
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
894
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
895
896
    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
897
    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
898
    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
899
    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
900
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
901
902
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
903
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
904
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
905
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
906
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
907
908
    /* Round 4*n+3 */
909
    D[0] = ROL64(C[1], 1) ^ C[4];
910
    D[1] = ROL64(C[2], 1) ^ C[0];
911
    D[2] = ROL64(C[3], 1) ^ C[1];
912
    D[3] = ROL64(C[4], 1) ^ C[2];
913
    D[4] = ROL64(C[0], 1) ^ C[3];
914
915
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
916
    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
917
    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
918
    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
919
    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
920
921
    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
922
    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
923
    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
924
    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
925
    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
926
927
    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
928
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
929
    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
930
    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
931
    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
932
933
    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
934
    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
935
    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
936
    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
937
    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
938
939
    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
940
    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
941
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
942
    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
943
    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
944
945
    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
946
    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
947
    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
948
    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
949
    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
950
951
    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
952
    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
953
    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
954
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
955
    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
956
957
    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
958
    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
959
    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
960
    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
961
    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
962
963
    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
964
    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
965
    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
966
    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
967
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
968
969
    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
970
    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
971
    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
972
    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
973
    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
974
}
975
976
static void KeccakF1600(uint64_t A[5][5])
977
{
978
    size_t i;
979
980
    for (i = 0; i < 24; i += 4) {
981
        FourRounds(A, i);
982
    }
983
}
984
985
#endif
986
987
static uint64_t BitInterleave(uint64_t Ai)
988
1.95M
{
989
1.95M
    if (BIT_INTERLEAVE) {
990
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
991
0
        uint32_t t0, t1;
992
993
0
        t0 = lo & 0x55555555;
994
0
        t0 |= t0 >> 1;  t0 &= 0x33333333;
995
0
        t0 |= t0 >> 2;  t0 &= 0x0f0f0f0f;
996
0
        t0 |= t0 >> 4;  t0 &= 0x00ff00ff;
997
0
        t0 |= t0 >> 8;  t0 &= 0x0000ffff;
998
999
0
        t1 = hi & 0x55555555;
1000
0
        t1 |= t1 >> 1;  t1 &= 0x33333333;
1001
0
        t1 |= t1 >> 2;  t1 &= 0x0f0f0f0f;
1002
0
        t1 |= t1 >> 4;  t1 &= 0x00ff00ff;
1003
0
        t1 |= t1 >> 8;  t1 <<= 16;
1004
1005
0
        lo &= 0xaaaaaaaa;
1006
0
        lo |= lo << 1;  lo &= 0xcccccccc;
1007
0
        lo |= lo << 2;  lo &= 0xf0f0f0f0;
1008
0
        lo |= lo << 4;  lo &= 0xff00ff00;
1009
0
        lo |= lo << 8;  lo >>= 16;
1010
1011
0
        hi &= 0xaaaaaaaa;
1012
0
        hi |= hi << 1;  hi &= 0xcccccccc;
1013
0
        hi |= hi << 2;  hi &= 0xf0f0f0f0;
1014
0
        hi |= hi << 4;  hi &= 0xff00ff00;
1015
0
        hi |= hi << 8;  hi &= 0xffff0000;
1016
1017
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1018
0
    }
1019
1020
1.95M
    return Ai;
1021
1.95M
}
1022
1023
static uint64_t BitDeinterleave(uint64_t Ai)
1024
22.6k
{
1025
22.6k
    if (BIT_INTERLEAVE) {
1026
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1027
0
        uint32_t t0, t1;
1028
1029
0
        t0 = lo & 0x0000ffff;
1030
0
        t0 |= t0 << 8;  t0 &= 0x00ff00ff;
1031
0
        t0 |= t0 << 4;  t0 &= 0x0f0f0f0f;
1032
0
        t0 |= t0 << 2;  t0 &= 0x33333333;
1033
0
        t0 |= t0 << 1;  t0 &= 0x55555555;
1034
1035
0
        t1 = hi << 16;
1036
0
        t1 |= t1 >> 8;  t1 &= 0xff00ff00;
1037
0
        t1 |= t1 >> 4;  t1 &= 0xf0f0f0f0;
1038
0
        t1 |= t1 >> 2;  t1 &= 0xcccccccc;
1039
0
        t1 |= t1 >> 1;  t1 &= 0xaaaaaaaa;
1040
1041
0
        lo >>= 16;
1042
0
        lo |= lo << 8;  lo &= 0x00ff00ff;
1043
0
        lo |= lo << 4;  lo &= 0x0f0f0f0f;
1044
0
        lo |= lo << 2;  lo &= 0x33333333;
1045
0
        lo |= lo << 1;  lo &= 0x55555555;
1046
1047
0
        hi &= 0xffff0000;
1048
0
        hi |= hi >> 8;  hi &= 0xff00ff00;
1049
0
        hi |= hi >> 4;  hi &= 0xf0f0f0f0;
1050
0
        hi |= hi >> 2;  hi &= 0xcccccccc;
1051
0
        hi |= hi >> 1;  hi &= 0xaaaaaaaa;
1052
1053
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1054
0
    }
1055
1056
22.6k
    return Ai;
1057
22.6k
}
1058
1059
/*
1060
 * SHA3_absorb can be called multiple times, but at each invocation
1061
 * largest multiple of |r| out of |len| bytes are processed. Then
1062
 * remaining amount of bytes is returned. This is done to spare caller
1063
 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1064
 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1065
 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1066
 * padding and intermediate sub-block buffering, byte- or bitwise, is
1067
 * caller's responsibility.
1068
 */
1069
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1070
                   size_t r)
1071
12.0k
{
1072
12.0k
    uint64_t *A_flat = (uint64_t *)A;
1073
12.0k
    size_t i, w = r / 8;
1074
1075
12.0k
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1076
1077
170k
    while (len >= r) {
1078
2.11M
        for (i = 0; i < w; i++) {
1079
1.95M
            uint64_t Ai = (uint64_t)inp[0]       | (uint64_t)inp[1] << 8  |
1080
1.95M
                          (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1081
1.95M
                          (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1082
1.95M
                          (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1083
1.95M
            inp += 8;
1084
1085
1.95M
            A_flat[i] ^= BitInterleave(Ai);
1086
1.95M
        }
1087
158k
        KeccakF1600(A);
1088
158k
        len -= r;
1089
158k
    }
1090
1091
12.0k
    return len;
1092
12.0k
}
1093
1094
/*
1095
 * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of
1096
 * |len| bytes.
1097
 * If multiple SHA3_squeeze calls are required the output length |len| must be a
1098
 * multiple of the blocksize, with |next| being 0 on the first call and 1 on
1099
 * subsequent calls. It is the callers responsibility to buffer the results.
1100
 * When only a single call to SHA3_squeeze is required, |len| can be any size
1101
 * and |next| must be 0.
1102
 */
1103
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r,
1104
                  int next)
1105
3.86k
{
1106
3.86k
    uint64_t *A_flat = (uint64_t *)A;
1107
3.86k
    size_t i, w = r / 8;
1108
1109
3.86k
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1110
1111
6.70k
    while (len != 0) {
1112
3.86k
        if (next)
1113
0
            KeccakF1600(A);
1114
3.86k
        next = 1;
1115
25.5k
        for (i = 0; i < w && len != 0; i++) {
1116
22.6k
            uint64_t Ai = BitDeinterleave(A_flat[i]);
1117
1118
22.6k
            if (len < 8) {
1119
5.14k
                for (i = 0; i < len; i++) {
1120
4.11k
                    *out++ = (unsigned char)Ai;
1121
4.11k
                    Ai >>= 8;
1122
4.11k
                }
1123
1.02k
                return;
1124
1.02k
            }
1125
1126
21.6k
            out[0] = (unsigned char)(Ai);
1127
21.6k
            out[1] = (unsigned char)(Ai >> 8);
1128
21.6k
            out[2] = (unsigned char)(Ai >> 16);
1129
21.6k
            out[3] = (unsigned char)(Ai >> 24);
1130
21.6k
            out[4] = (unsigned char)(Ai >> 32);
1131
21.6k
            out[5] = (unsigned char)(Ai >> 40);
1132
21.6k
            out[6] = (unsigned char)(Ai >> 48);
1133
21.6k
            out[7] = (unsigned char)(Ai >> 56);
1134
21.6k
            out += 8;
1135
21.6k
            len -= 8;
1136
21.6k
        }
1137
3.86k
    }
1138
3.86k
}
1139
#endif
1140
1141
#ifdef SELFTEST
1142
/*
1143
 * Post-padding one-shot implementations would look as following:
1144
 *
1145
 * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1146
 * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1147
 * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1148
 * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1149
 * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
1150
 * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
1151
 */
1152
1153
void SHA3_sponge(const unsigned char *inp, size_t len,
1154
                 unsigned char *out, size_t d, size_t r)
1155
{
1156
    uint64_t A[5][5];
1157
1158
    memset(A, 0, sizeof(A));
1159
    SHA3_absorb(A, inp, len, r);
1160
    SHA3_squeeze(A, out, d, r);
1161
}
1162
1163
# include <stdio.h>
1164
1165
int main(void)
1166
{
1167
    /*
1168
     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1169
     */
1170
    unsigned char test[168] = { '\xf3', '\x3' };
1171
    unsigned char out[512];
1172
    size_t i;
1173
    static const unsigned char result[512] = {
1174
        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1175
        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1176
        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1177
        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1178
        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1179
        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1180
        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1181
        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1182
        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1183
        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1184
        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1185
        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1186
        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1187
        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1188
        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1189
        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1190
        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1191
        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1192
        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1193
        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1194
        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1195
        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1196
        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1197
        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1198
        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1199
        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1200
        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1201
        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1202
        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1203
        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1204
        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1205
        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1206
        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1207
        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1208
        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1209
        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1210
        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1211
        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1212
        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1213
        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1214
        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1215
        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1216
        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1217
        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1218
        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1219
        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1220
        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1221
        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1222
        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1223
        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1224
        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1225
        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1226
        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1227
        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1228
        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1229
        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1230
        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1231
        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1232
        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1233
        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1234
        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1235
        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1236
        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1237
        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1238
    };
1239
1240
    test[167] = '\x80';
1241
    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1242
1243
    /*
1244
     * Rationale behind keeping output [formatted as below] is that
1245
     * one should be able to redirect it to a file, then copy-n-paste
1246
     * final "output val" from official example to another file, and
1247
     * compare the two with diff(1).
1248
     */
1249
    for (i = 0; i < sizeof(out);) {
1250
        printf("%02X", out[i]);
1251
        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1252
    }
1253
1254
    if (memcmp(out, result, sizeof(out))) {
1255
        fprintf(stderr, "failure\n");
1256
        return 1;
1257
    } else {
1258
        fprintf(stderr, "success\n");
1259
        return 0;
1260
    }
1261
}
1262
#endif