Coverage Report

Created: 2026-02-22 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/sha/keccak1600.c
Line
Count
Source
1
/*
2
 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/e_os2.h>
11
#include <string.h>
12
#include <assert.h>
13
14
#include "internal/nelem.h"
15
16
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
17
    size_t r);
18
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);
19
20
#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
21
22
/*
23
 * Choose some sensible defaults
24
 */
25
#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
26
#define KECCAK_2X /* default to KECCAK_2X variant */
27
#endif
28
29
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || defined(__mips) || defined(__riscv) || defined(__s390__) || defined(__EMSCRIPTEN__)
30
/*
31
 * These don't have "and with complement" instruction, so minimize amount
32
 * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
33
 */
34
#define KECCAK_COMPLEMENTING_TRANSFORM
35
#endif
36
37
#if defined(__x86_64__) || defined(__aarch64__) || defined(__mips64) || defined(__ia64) || (defined(__VMS) && !defined(__vax))
38
/*
39
 * These are available even in ILP32 flavours, but even then they are
40
 * capable of performing 64-bit operations as efficiently as in *P64.
41
 * Since it's not given that we can use sizeof(void *), just shunt it.
42
 */
43
669M
#define BIT_INTERLEAVE (0)
44
#else
45
#define BIT_INTERLEAVE (sizeof(void *) < 8)
46
#endif
47
48
0
#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
49
50
static uint64_t ROL64(uint64_t val, int offset)
51
656M
{
52
656M
    if (offset == 0) {
53
0
        return val;
54
656M
    } else if (!BIT_INTERLEAVE) {
55
656M
        return (val << offset) | (val >> (64 - offset));
56
656M
    } else {
57
0
        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
58
59
0
        if (offset & 1) {
60
0
            uint32_t tmp = hi;
61
62
0
            offset >>= 1;
63
0
            hi = ROL32(lo, offset);
64
0
            lo = ROL32(tmp, offset + 1);
65
0
        } else {
66
0
            offset >>= 1;
67
0
            lo = ROL32(lo, offset);
68
0
            hi = ROL32(hi, offset);
69
0
        }
70
71
0
        return ((uint64_t)hi << 32) | lo;
72
0
    }
73
656M
}
74
75
static const unsigned char rhotates[5][5] = {
76
    { 0, 1, 62, 28, 27 },
77
    { 36, 44, 6, 55, 20 },
78
    { 3, 10, 43, 25, 39 },
79
    { 41, 45, 15, 21, 8 },
80
    { 18, 2, 61, 56, 14 }
81
};
82
83
static const uint64_t iotas[] = {
84
    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
85
    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
86
    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
87
    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
88
    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
89
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
90
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
91
    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
92
    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
93
    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
94
    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
95
    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
96
    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
97
    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
98
    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
99
    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
100
    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
101
    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
102
    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
103
    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
104
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
105
    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
106
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
107
    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
108
};
109
110
#if defined(KECCAK_REF)
111
/*
112
 * This is straightforward or "maximum clarity" implementation aiming
113
 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
114
 * Permutation-Based Hash and Extendible-Output Functions" as much as
115
 * possible. With one caveat. Because of the way C stores matrices,
116
 * references to A[x,y] in the specification are presented as A[y][x].
117
 * Implementation unrolls inner x-loops so that modulo 5 operations are
118
 * explicitly pre-computed.
119
 */
120
static void Theta(uint64_t A[5][5])
121
{
122
    uint64_t C[5], D[5];
123
    size_t y;
124
125
    C[0] = A[0][0];
126
    C[1] = A[0][1];
127
    C[2] = A[0][2];
128
    C[3] = A[0][3];
129
    C[4] = A[0][4];
130
131
    for (y = 1; y < 5; y++) {
132
        C[0] ^= A[y][0];
133
        C[1] ^= A[y][1];
134
        C[2] ^= A[y][2];
135
        C[3] ^= A[y][3];
136
        C[4] ^= A[y][4];
137
    }
138
139
    D[0] = ROL64(C[1], 1) ^ C[4];
140
    D[1] = ROL64(C[2], 1) ^ C[0];
141
    D[2] = ROL64(C[3], 1) ^ C[1];
142
    D[3] = ROL64(C[4], 1) ^ C[2];
143
    D[4] = ROL64(C[0], 1) ^ C[3];
144
145
    for (y = 0; y < 5; y++) {
146
        A[y][0] ^= D[0];
147
        A[y][1] ^= D[1];
148
        A[y][2] ^= D[2];
149
        A[y][3] ^= D[3];
150
        A[y][4] ^= D[4];
151
    }
152
}
153
154
static void Rho(uint64_t A[5][5])
155
{
156
    size_t y;
157
158
    for (y = 0; y < 5; y++) {
159
        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
160
        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
161
        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
162
        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
163
        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
164
    }
165
}
166
167
static void Pi(uint64_t A[5][5])
168
{
169
    uint64_t T[5][5];
170
171
    /*
172
     * T = A
173
     * A[y][x] = T[x][(3*y+x)%5]
174
     */
175
    memcpy(T, A, sizeof(T));
176
177
    A[0][0] = T[0][0];
178
    A[0][1] = T[1][1];
179
    A[0][2] = T[2][2];
180
    A[0][3] = T[3][3];
181
    A[0][4] = T[4][4];
182
183
    A[1][0] = T[0][3];
184
    A[1][1] = T[1][4];
185
    A[1][2] = T[2][0];
186
    A[1][3] = T[3][1];
187
    A[1][4] = T[4][2];
188
189
    A[2][0] = T[0][1];
190
    A[2][1] = T[1][2];
191
    A[2][2] = T[2][3];
192
    A[2][3] = T[3][4];
193
    A[2][4] = T[4][0];
194
195
    A[3][0] = T[0][4];
196
    A[3][1] = T[1][0];
197
    A[3][2] = T[2][1];
198
    A[3][3] = T[3][2];
199
    A[3][4] = T[4][3];
200
201
    A[4][0] = T[0][2];
202
    A[4][1] = T[1][3];
203
    A[4][2] = T[2][4];
204
    A[4][3] = T[3][0];
205
    A[4][4] = T[4][1];
206
}
207
208
static void Chi(uint64_t A[5][5])
209
{
210
    uint64_t C[5];
211
    size_t y;
212
213
    for (y = 0; y < 5; y++) {
214
        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
215
        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
216
        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
217
        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
218
        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
219
220
        A[y][0] = C[0];
221
        A[y][1] = C[1];
222
        A[y][2] = C[2];
223
        A[y][3] = C[3];
224
        A[y][4] = C[4];
225
    }
226
}
227
228
static void Iota(uint64_t A[5][5], size_t i)
229
{
230
    assert(i < OSSL_NELEM(iotas));
231
    A[0][0] ^= iotas[i];
232
}
233
234
static void KeccakF1600(uint64_t A[5][5])
235
{
236
    size_t i;
237
238
    for (i = 0; i < 24; i++) {
239
        Theta(A);
240
        Rho(A);
241
        Pi(A);
242
        Chi(A);
243
        Iota(A, i);
244
    }
245
}
246
247
#elif defined(KECCAK_1X)
248
/*
249
 * This implementation is optimization of above code featuring unroll
250
 * of even y-loops, their fusion and code motion. It also minimizes
251
 * temporary storage. Compiler would normally do all these things for
252
 * you, purpose of manual optimization is to provide "unobscured"
253
 * reference for assembly implementation [in case this approach is
254
 * chosen for implementation on some platform]. In the nutshell it's
255
 * equivalent of "plane-per-plane processing" approach discussed in
256
 * section 2.4 of "Keccak implementation overview".
257
 */
258
static void Round(uint64_t A[5][5], size_t i)
259
{
260
    uint64_t C[5], E[2]; /* registers */
261
    uint64_t D[5], T[2][5]; /* memory    */
262
263
    assert(i < OSSL_NELEM(iotas));
264
265
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
266
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
267
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
268
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
269
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
270
271
#if defined(__arm__)
272
    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
273
    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
274
    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
275
    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
276
    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
277
278
    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
279
    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
280
    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
281
    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
282
    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
283
284
    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
285
    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
286
    C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
287
    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
288
    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
289
#else
290
    D[0] = ROL64(C[1], 1) ^ C[4];
291
    D[1] = ROL64(C[2], 1) ^ C[0];
292
    D[2] = ROL64(C[3], 1) ^ C[1];
293
    D[3] = ROL64(C[4], 1) ^ C[2];
294
    D[4] = ROL64(C[0], 1) ^ C[3];
295
296
    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
297
    T[0][1] = A[0][1] ^ D[1];
298
    T[0][2] = A[0][2] ^ D[2];
299
    T[0][3] = A[0][3] ^ D[3];
300
    T[0][4] = A[0][4] ^ D[4];
301
302
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
303
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
304
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
305
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
306
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
307
#endif
308
    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
309
    A[0][1] = C[1] ^ (~C[2] & C[3]);
310
    A[0][2] = C[2] ^ (~C[3] & C[4]);
311
    A[0][3] = C[3] ^ (~C[4] & C[0]);
312
    A[0][4] = C[4] ^ (~C[0] & C[1]);
313
314
    T[1][0] = A[1][0] ^ (C[3] = D[0]);
315
    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
316
    T[1][2] = A[1][2] ^ (E[0] = D[2]);
317
    T[1][3] = A[1][3] ^ (E[1] = D[3]);
318
    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
319
320
    C[0] = ROL64(T[0][3], rhotates[0][3]);
321
    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
322
    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
323
    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
324
    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
325
326
    A[1][0] = C[0] ^ (~C[1] & C[2]);
327
    A[1][1] = C[1] ^ (~C[2] & C[3]);
328
    A[1][2] = C[2] ^ (~C[3] & C[4]);
329
    A[1][3] = C[3] ^ (~C[4] & C[0]);
330
    A[1][4] = C[4] ^ (~C[0] & C[1]);
331
332
    C[0] = ROL64(T[0][1], rhotates[0][1]);
333
    C[1] = ROL64(T[1][2], rhotates[1][2]);
334
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
335
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
336
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
337
338
    A[2][0] = C[0] ^ (~C[1] & C[2]);
339
    A[2][1] = C[1] ^ (~C[2] & C[3]);
340
    A[2][2] = C[2] ^ (~C[3] & C[4]);
341
    A[2][3] = C[3] ^ (~C[4] & C[0]);
342
    A[2][4] = C[4] ^ (~C[0] & C[1]);
343
344
    C[0] = ROL64(T[0][4], rhotates[0][4]);
345
    C[1] = ROL64(T[1][0], rhotates[1][0]);
346
    C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
347
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
348
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
349
350
    A[3][0] = C[0] ^ (~C[1] & C[2]);
351
    A[3][1] = C[1] ^ (~C[2] & C[3]);
352
    A[3][2] = C[2] ^ (~C[3] & C[4]);
353
    A[3][3] = C[3] ^ (~C[4] & C[0]);
354
    A[3][4] = C[4] ^ (~C[0] & C[1]);
355
356
    C[0] = ROL64(T[0][2], rhotates[0][2]);
357
    C[1] = ROL64(T[1][3], rhotates[1][3]);
358
    C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
359
    C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
360
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
361
362
    A[4][0] = C[0] ^ (~C[1] & C[2]);
363
    A[4][1] = C[1] ^ (~C[2] & C[3]);
364
    A[4][2] = C[2] ^ (~C[3] & C[4]);
365
    A[4][3] = C[3] ^ (~C[4] & C[0]);
366
    A[4][4] = C[4] ^ (~C[0] & C[1]);
367
}
368
369
static void KeccakF1600(uint64_t A[5][5])
370
{
371
    size_t i;
372
373
    for (i = 0; i < 24; i++) {
374
        Round(A, i);
375
    }
376
}
377
378
#elif defined(KECCAK_1X_ALT)
379
/*
380
 * This is variant of above KECCAK_1X that reduces requirement for
381
 * temporary storage even further, but at cost of more updates to A[][].
382
 * It's less suitable if A[][] is memory bound, but better if it's
383
 * register bound.
384
 */
385
386
static void Round(uint64_t A[5][5], size_t i)
387
{
388
    uint64_t C[5], D[5];
389
390
    assert(i < OSSL_NELEM(iotas));
391
392
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
393
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
394
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
395
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
396
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
397
398
    D[1] = C[0] ^ ROL64(C[2], 1);
399
    D[2] = C[1] ^ ROL64(C[3], 1);
400
    D[3] = C[2] ^= ROL64(C[4], 1);
401
    D[4] = C[3] ^= ROL64(C[0], 1);
402
    D[0] = C[4] ^= ROL64(C[1], 1);
403
404
    A[0][1] ^= D[1];
405
    A[1][1] ^= D[1];
406
    A[2][1] ^= D[1];
407
    A[3][1] ^= D[1];
408
    A[4][1] ^= D[1];
409
410
    A[0][2] ^= D[2];
411
    A[1][2] ^= D[2];
412
    A[2][2] ^= D[2];
413
    A[3][2] ^= D[2];
414
    A[4][2] ^= D[2];
415
416
    A[0][3] ^= C[2];
417
    A[1][3] ^= C[2];
418
    A[2][3] ^= C[2];
419
    A[3][3] ^= C[2];
420
    A[4][3] ^= C[2];
421
422
    A[0][4] ^= C[3];
423
    A[1][4] ^= C[3];
424
    A[2][4] ^= C[3];
425
    A[3][4] ^= C[3];
426
    A[4][4] ^= C[3];
427
428
    A[0][0] ^= C[4];
429
    A[1][0] ^= C[4];
430
    A[2][0] ^= C[4];
431
    A[3][0] ^= C[4];
432
    A[4][0] ^= C[4];
433
434
    C[1] = A[0][1];
435
    C[2] = A[0][2];
436
    C[3] = A[0][3];
437
    C[4] = A[0][4];
438
439
    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
440
    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
441
    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
442
    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
443
444
    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
445
    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
446
    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
447
    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
448
449
    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
450
    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
451
    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
452
    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
453
454
    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
455
    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
456
    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
457
    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
458
459
    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
460
    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
461
    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
462
    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
463
464
    A[1][0] = ROL64(C[3], rhotates[0][3]);
465
    A[2][0] = ROL64(C[1], rhotates[0][1]);
466
    A[3][0] = ROL64(C[4], rhotates[0][4]);
467
    A[4][0] = ROL64(C[2], rhotates[0][2]);
468
469
    C[0] = A[0][0];
470
    C[1] = A[1][0];
471
    D[0] = A[0][1];
472
    D[1] = A[1][1];
473
474
    A[0][0] ^= (~A[0][1] & A[0][2]);
475
    A[1][0] ^= (~A[1][1] & A[1][2]);
476
    A[0][1] ^= (~A[0][2] & A[0][3]);
477
    A[1][1] ^= (~A[1][2] & A[1][3]);
478
    A[0][2] ^= (~A[0][3] & A[0][4]);
479
    A[1][2] ^= (~A[1][3] & A[1][4]);
480
    A[0][3] ^= (~A[0][4] & C[0]);
481
    A[1][3] ^= (~A[1][4] & C[1]);
482
    A[0][4] ^= (~C[0] & D[0]);
483
    A[1][4] ^= (~C[1] & D[1]);
484
485
    C[2] = A[2][0];
486
    C[3] = A[3][0];
487
    D[2] = A[2][1];
488
    D[3] = A[3][1];
489
490
    A[2][0] ^= (~A[2][1] & A[2][2]);
491
    A[3][0] ^= (~A[3][1] & A[3][2]);
492
    A[2][1] ^= (~A[2][2] & A[2][3]);
493
    A[3][1] ^= (~A[3][2] & A[3][3]);
494
    A[2][2] ^= (~A[2][3] & A[2][4]);
495
    A[3][2] ^= (~A[3][3] & A[3][4]);
496
    A[2][3] ^= (~A[2][4] & C[2]);
497
    A[3][3] ^= (~A[3][4] & C[3]);
498
    A[2][4] ^= (~C[2] & D[2]);
499
    A[3][4] ^= (~C[3] & D[3]);
500
501
    C[4] = A[4][0];
502
    D[4] = A[4][1];
503
504
    A[4][0] ^= (~A[4][1] & A[4][2]);
505
    A[4][1] ^= (~A[4][2] & A[4][3]);
506
    A[4][2] ^= (~A[4][3] & A[4][4]);
507
    A[4][3] ^= (~A[4][4] & C[4]);
508
    A[4][4] ^= (~C[4] & D[4]);
509
    A[0][0] ^= iotas[i];
510
}
511
512
static void KeccakF1600(uint64_t A[5][5])
513
{
514
    size_t i;
515
516
    for (i = 0; i < 24; i++) {
517
        Round(A, i);
518
    }
519
}
520
521
#elif defined(KECCAK_2X)
522
/*
523
 * This implementation is variant of KECCAK_1X above with outer-most
524
 * round loop unrolled twice. This allows to take temporary storage
525
 * out of round procedure and simplify references to it by alternating
526
 * it with actual data (see round loop below). Originally it was meant
527
 * rather as reference for an assembly implementation, but it seems to
528
 * play best with compilers [as well as provide best instruction per
529
 * processed byte ratio at minimal round unroll factor]...
530
 */
531
static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
532
22.6M
{
533
22.6M
    uint64_t C[5], D[5];
534
535
22.6M
    assert(i < OSSL_NELEM(iotas));
536
537
22.6M
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
538
22.6M
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
539
22.6M
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
540
22.6M
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
541
22.6M
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
542
543
22.6M
    D[0] = ROL64(C[1], 1) ^ C[4];
544
22.6M
    D[1] = ROL64(C[2], 1) ^ C[0];
545
22.6M
    D[2] = ROL64(C[3], 1) ^ C[1];
546
22.6M
    D[3] = ROL64(C[4], 1) ^ C[2];
547
22.6M
    D[4] = ROL64(C[0], 1) ^ C[3];
548
549
22.6M
    C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
550
22.6M
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
551
22.6M
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
552
22.6M
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
553
22.6M
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
554
555
22.6M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
556
22.6M
    R[0][0] = C[0] ^ (C[1] | C[2]) ^ iotas[i];
557
22.6M
    R[0][1] = C[1] ^ (~C[2] | C[3]);
558
22.6M
    R[0][2] = C[2] ^ (C[3] & C[4]);
559
22.6M
    R[0][3] = C[3] ^ (C[4] | C[0]);
560
22.6M
    R[0][4] = C[4] ^ (C[0] & C[1]);
561
#else
562
    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
563
    R[0][1] = C[1] ^ (~C[2] & C[3]);
564
    R[0][2] = C[2] ^ (~C[3] & C[4]);
565
    R[0][3] = C[3] ^ (~C[4] & C[0]);
566
    R[0][4] = C[4] ^ (~C[0] & C[1]);
567
#endif
568
569
22.6M
    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
570
22.6M
    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
571
22.6M
    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
572
22.6M
    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
573
22.6M
    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
574
575
22.6M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
576
22.6M
    R[1][0] = C[0] ^ (C[1] | C[2]);
577
22.6M
    R[1][1] = C[1] ^ (C[2] & C[3]);
578
22.6M
    R[1][2] = C[2] ^ (C[3] | ~C[4]);
579
22.6M
    R[1][3] = C[3] ^ (C[4] | C[0]);
580
22.6M
    R[1][4] = C[4] ^ (C[0] & C[1]);
581
#else
582
    R[1][0] = C[0] ^ (~C[1] & C[2]);
583
    R[1][1] = C[1] ^ (~C[2] & C[3]);
584
    R[1][2] = C[2] ^ (~C[3] & C[4]);
585
    R[1][3] = C[3] ^ (~C[4] & C[0]);
586
    R[1][4] = C[4] ^ (~C[0] & C[1]);
587
#endif
588
589
22.6M
    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
590
22.6M
    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
591
22.6M
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
592
22.6M
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
593
22.6M
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
594
595
22.6M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
596
22.6M
    R[2][0] = C[0] ^ (C[1] | C[2]);
597
22.6M
    R[2][1] = C[1] ^ (C[2] & C[3]);
598
22.6M
    R[2][2] = C[2] ^ (~C[3] & C[4]);
599
22.6M
    R[2][3] = ~C[3] ^ (C[4] | C[0]);
600
22.6M
    R[2][4] = C[4] ^ (C[0] & C[1]);
601
#else
602
    R[2][0] = C[0] ^ (~C[1] & C[2]);
603
    R[2][1] = C[1] ^ (~C[2] & C[3]);
604
    R[2][2] = C[2] ^ (~C[3] & C[4]);
605
    R[2][3] = C[3] ^ (~C[4] & C[0]);
606
    R[2][4] = C[4] ^ (~C[0] & C[1]);
607
#endif
608
609
22.6M
    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
610
22.6M
    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
611
22.6M
    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
612
22.6M
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
613
22.6M
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
614
615
22.6M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
616
22.6M
    R[3][0] = C[0] ^ (C[1] & C[2]);
617
22.6M
    R[3][1] = C[1] ^ (C[2] | C[3]);
618
22.6M
    R[3][2] = C[2] ^ (~C[3] | C[4]);
619
22.6M
    R[3][3] = ~C[3] ^ (C[4] & C[0]);
620
22.6M
    R[3][4] = C[4] ^ (C[0] | C[1]);
621
#else
622
    R[3][0] = C[0] ^ (~C[1] & C[2]);
623
    R[3][1] = C[1] ^ (~C[2] & C[3]);
624
    R[3][2] = C[2] ^ (~C[3] & C[4]);
625
    R[3][3] = C[3] ^ (~C[4] & C[0]);
626
    R[3][4] = C[4] ^ (~C[0] & C[1]);
627
#endif
628
629
22.6M
    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
630
22.6M
    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
631
22.6M
    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
632
22.6M
    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
633
22.6M
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
634
635
22.6M
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
636
22.6M
    R[4][0] = C[0] ^ (~C[1] & C[2]);
637
22.6M
    R[4][1] = ~C[1] ^ (C[2] | C[3]);
638
22.6M
    R[4][2] = C[2] ^ (C[3] & C[4]);
639
22.6M
    R[4][3] = C[3] ^ (C[4] | C[0]);
640
22.6M
    R[4][4] = C[4] ^ (C[0] & C[1]);
641
#else
642
    R[4][0] = C[0] ^ (~C[1] & C[2]);
643
    R[4][1] = C[1] ^ (~C[2] & C[3]);
644
    R[4][2] = C[2] ^ (~C[3] & C[4]);
645
    R[4][3] = C[3] ^ (~C[4] & C[0]);
646
    R[4][4] = C[4] ^ (~C[0] & C[1]);
647
#endif
648
22.6M
}
649
650
static void KeccakF1600(uint64_t A[5][5])
651
943k
{
652
943k
    uint64_t T[5][5];
653
943k
    size_t i;
654
655
943k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
656
943k
    A[0][1] = ~A[0][1];
657
943k
    A[0][2] = ~A[0][2];
658
943k
    A[1][3] = ~A[1][3];
659
943k
    A[2][2] = ~A[2][2];
660
943k
    A[3][2] = ~A[3][2];
661
943k
    A[4][0] = ~A[4][0];
662
943k
#endif
663
664
12.2M
    for (i = 0; i < 24; i += 2) {
665
11.3M
        Round(T, A, i);
666
11.3M
        Round(A, T, i + 1);
667
11.3M
    }
668
669
943k
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
670
943k
    A[0][1] = ~A[0][1];
671
943k
    A[0][2] = ~A[0][2];
672
943k
    A[1][3] = ~A[1][3];
673
943k
    A[2][2] = ~A[2][2];
674
943k
    A[3][2] = ~A[3][2];
675
943k
    A[4][0] = ~A[4][0];
676
943k
#endif
677
943k
}
678
679
#else /* define KECCAK_INPLACE to compile this code path */
680
/*
681
 * This implementation is KECCAK_1X from above combined 4 times with
682
 * a twist that allows to omit temporary storage and perform in-place
683
 * processing. It's discussed in section 2.5 of "Keccak implementation
684
 * overview". It's likely to be best suited for processors with large
685
 * register bank... On the other hand processor with large register
686
 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
687
 * more compact...
688
 */
689
static void FourRounds(uint64_t A[5][5], size_t i)
690
{
691
    uint64_t B[5], C[5], D[5];
692
693
    assert(i <= OSSL_NELEM(iotas) - 4);
694
695
    /* Round 4*n */
696
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
697
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
698
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
699
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
700
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
701
702
    D[0] = ROL64(C[1], 1) ^ C[4];
703
    D[1] = ROL64(C[2], 1) ^ C[0];
704
    D[2] = ROL64(C[3], 1) ^ C[1];
705
    D[3] = ROL64(C[4], 1) ^ C[2];
706
    D[4] = ROL64(C[0], 1) ^ C[3];
707
708
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
709
    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
710
    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
711
    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
712
    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
713
714
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
715
    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
716
    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
717
    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
718
    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
719
720
    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
721
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
722
    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
723
    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
724
    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
725
726
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
727
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
728
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
729
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
730
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
731
732
    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
733
    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
734
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
735
    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
736
    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
737
738
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
739
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
740
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
741
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
742
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
743
744
    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
745
    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
746
    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
747
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
748
    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
749
750
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
751
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
752
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
753
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
754
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
755
756
    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
757
    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
758
    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
759
    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
760
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
761
762
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
763
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
764
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
765
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
766
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
767
768
    /* Round 4*n+1 */
769
    D[0] = ROL64(C[1], 1) ^ C[4];
770
    D[1] = ROL64(C[2], 1) ^ C[0];
771
    D[2] = ROL64(C[3], 1) ^ C[1];
772
    D[3] = ROL64(C[4], 1) ^ C[2];
773
    D[4] = ROL64(C[0], 1) ^ C[3];
774
775
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
776
    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
777
    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
778
    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
779
    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
780
781
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
782
    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
783
    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
784
    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
785
    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
786
787
    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
788
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
789
    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
790
    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
791
    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
792
793
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
794
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
795
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
796
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
797
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
798
799
    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
800
    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
801
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
802
    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
803
    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
804
805
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
806
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
807
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
808
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
809
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
810
811
    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
812
    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
813
    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
814
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
815
    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
816
817
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
818
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
819
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
820
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
821
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
822
823
    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
824
    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
825
    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
826
    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
827
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
828
829
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
830
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
831
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
832
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
833
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
834
835
    /* Round 4*n+2 */
836
    D[0] = ROL64(C[1], 1) ^ C[4];
837
    D[1] = ROL64(C[2], 1) ^ C[0];
838
    D[2] = ROL64(C[3], 1) ^ C[1];
839
    D[3] = ROL64(C[4], 1) ^ C[2];
840
    D[4] = ROL64(C[0], 1) ^ C[3];
841
842
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
843
    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
844
    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
845
    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
846
    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
847
848
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
849
    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
850
    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
851
    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
852
    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
853
854
    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
855
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
856
    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
857
    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
858
    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
859
860
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
861
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
862
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
863
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
864
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
865
866
    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
867
    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
868
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
869
    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
870
    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
871
872
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
873
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
874
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
875
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
876
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
877
878
    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
879
    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
880
    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
881
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
882
    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
883
884
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
885
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
886
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
887
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
888
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
889
890
    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
891
    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
892
    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
893
    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
894
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
895
896
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
897
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
898
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
899
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
900
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
901
902
    /* Round 4*n+3 */
903
    D[0] = ROL64(C[1], 1) ^ C[4];
904
    D[1] = ROL64(C[2], 1) ^ C[0];
905
    D[2] = ROL64(C[3], 1) ^ C[1];
906
    D[3] = ROL64(C[4], 1) ^ C[2];
907
    D[4] = ROL64(C[0], 1) ^ C[3];
908
909
    B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
910
    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
911
    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
912
    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
913
    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
914
915
    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
916
    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
917
    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
918
    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
919
    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
920
921
    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
922
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
923
    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
924
    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
925
    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
926
927
    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
928
    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
929
    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
930
    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
931
    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
932
933
    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
934
    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
935
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
936
    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
937
    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
938
939
    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
940
    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
941
    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
942
    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
943
    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
944
945
    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
946
    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
947
    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
948
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
949
    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
950
951
    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
952
    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
953
    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
954
    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
955
    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
956
957
    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
958
    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
959
    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
960
    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
961
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
962
963
    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
964
    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
965
    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
966
    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
967
    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
968
}
969
970
static void KeccakF1600(uint64_t A[5][5])
971
{
972
    size_t i;
973
974
    for (i = 0; i < 24; i += 4) {
975
        FourRounds(A, i);
976
    }
977
}
978
979
#endif
980
981
static uint64_t BitInterleave(uint64_t Ai)
982
12.3M
{
983
12.3M
    if (BIT_INTERLEAVE) {
984
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
985
0
        uint32_t t0, t1;
986
987
0
        t0 = lo & 0x55555555;
988
0
        t0 |= t0 >> 1;
989
0
        t0 &= 0x33333333;
990
0
        t0 |= t0 >> 2;
991
0
        t0 &= 0x0f0f0f0f;
992
0
        t0 |= t0 >> 4;
993
0
        t0 &= 0x00ff00ff;
994
0
        t0 |= t0 >> 8;
995
0
        t0 &= 0x0000ffff;
996
997
0
        t1 = hi & 0x55555555;
998
0
        t1 |= t1 >> 1;
999
0
        t1 &= 0x33333333;
1000
0
        t1 |= t1 >> 2;
1001
0
        t1 &= 0x0f0f0f0f;
1002
0
        t1 |= t1 >> 4;
1003
0
        t1 &= 0x00ff00ff;
1004
0
        t1 |= t1 >> 8;
1005
0
        t1 <<= 16;
1006
1007
0
        lo &= 0xaaaaaaaa;
1008
0
        lo |= lo << 1;
1009
0
        lo &= 0xcccccccc;
1010
0
        lo |= lo << 2;
1011
0
        lo &= 0xf0f0f0f0;
1012
0
        lo |= lo << 4;
1013
0
        lo &= 0xff00ff00;
1014
0
        lo |= lo << 8;
1015
0
        lo >>= 16;
1016
1017
0
        hi &= 0xaaaaaaaa;
1018
0
        hi |= hi << 1;
1019
0
        hi &= 0xcccccccc;
1020
0
        hi |= hi << 2;
1021
0
        hi &= 0xf0f0f0f0;
1022
0
        hi |= hi << 4;
1023
0
        hi &= 0xff00ff00;
1024
0
        hi |= hi << 8;
1025
0
        hi &= 0xffff0000;
1026
1027
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1028
0
    }
1029
1030
12.3M
    return Ai;
1031
12.3M
}
1032
1033
static uint64_t BitDeinterleave(uint64_t Ai)
1034
336
{
1035
336
    if (BIT_INTERLEAVE) {
1036
0
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1037
0
        uint32_t t0, t1;
1038
1039
0
        t0 = lo & 0x0000ffff;
1040
0
        t0 |= t0 << 8;
1041
0
        t0 &= 0x00ff00ff;
1042
0
        t0 |= t0 << 4;
1043
0
        t0 &= 0x0f0f0f0f;
1044
0
        t0 |= t0 << 2;
1045
0
        t0 &= 0x33333333;
1046
0
        t0 |= t0 << 1;
1047
0
        t0 &= 0x55555555;
1048
1049
0
        t1 = hi << 16;
1050
0
        t1 |= t1 >> 8;
1051
0
        t1 &= 0xff00ff00;
1052
0
        t1 |= t1 >> 4;
1053
0
        t1 &= 0xf0f0f0f0;
1054
0
        t1 |= t1 >> 2;
1055
0
        t1 &= 0xcccccccc;
1056
0
        t1 |= t1 >> 1;
1057
0
        t1 &= 0xaaaaaaaa;
1058
1059
0
        lo >>= 16;
1060
0
        lo |= lo << 8;
1061
0
        lo &= 0x00ff00ff;
1062
0
        lo |= lo << 4;
1063
0
        lo &= 0x0f0f0f0f;
1064
0
        lo |= lo << 2;
1065
0
        lo &= 0x33333333;
1066
0
        lo |= lo << 1;
1067
0
        lo &= 0x55555555;
1068
1069
0
        hi &= 0xffff0000;
1070
0
        hi |= hi >> 8;
1071
0
        hi &= 0xff00ff00;
1072
0
        hi |= hi >> 4;
1073
0
        hi &= 0xf0f0f0f0;
1074
0
        hi |= hi >> 2;
1075
0
        hi &= 0xcccccccc;
1076
0
        hi |= hi >> 1;
1077
0
        hi &= 0xaaaaaaaa;
1078
1079
0
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1080
0
    }
1081
1082
336
    return Ai;
1083
336
}
1084
1085
/*
1086
 * SHA3_absorb can be called multiple times, but at each invocation
1087
 * largest multiple of |r| out of |len| bytes are processed. Then
1088
 * remaining amount of bytes is returned. This is done to spare caller
1089
 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1090
 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1091
 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1092
 * padding and intermediate sub-block buffering, byte- or bitwise, is
1093
 * caller's responsibility.
1094
 */
1095
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1096
    size_t r)
1097
236
{
1098
236
    uint64_t *A_flat = (uint64_t *)A;
1099
236
    size_t i, w = r / 8;
1100
1101
236
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1102
1103
943k
    while (len >= r) {
1104
13.2M
        for (i = 0; i < w; i++) {
1105
12.3M
            uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 | (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 | (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 | (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1106
12.3M
            inp += 8;
1107
1108
12.3M
            A_flat[i] ^= BitInterleave(Ai);
1109
12.3M
        }
1110
943k
        KeccakF1600(A);
1111
943k
        len -= r;
1112
943k
    }
1113
1114
236
    return len;
1115
236
}
1116
1117
/*
1118
 * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of
1119
 * |len| bytes.
1120
 * If multiple SHA3_squeeze calls are required the output length |len| must be a
1121
 * multiple of the blocksize, with |next| being 0 on the first call and 1 on
1122
 * subsequent calls. It is the callers responsibility to buffer the results.
1123
 * When only a single call to SHA3_squeeze is required, |len| can be any size
1124
 * and |next| must be 0.
1125
 */
1126
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r,
1127
    int next)
1128
59
{
1129
59
    uint64_t *A_flat = (uint64_t *)A;
1130
59
    size_t i, w = r / 8;
1131
1132
59
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1133
1134
102
    while (len != 0) {
1135
59
        if (next)
1136
0
            KeccakF1600(A);
1137
59
        next = 1;
1138
379
        for (i = 0; i < w && len != 0; i++) {
1139
336
            uint64_t Ai = BitDeinterleave(A_flat[i]);
1140
1141
336
            if (len < 8) {
1142
80
                for (i = 0; i < len; i++) {
1143
64
                    *out++ = (unsigned char)Ai;
1144
64
                    Ai >>= 8;
1145
64
                }
1146
16
                return;
1147
16
            }
1148
1149
320
            out[0] = (unsigned char)(Ai);
1150
320
            out[1] = (unsigned char)(Ai >> 8);
1151
320
            out[2] = (unsigned char)(Ai >> 16);
1152
320
            out[3] = (unsigned char)(Ai >> 24);
1153
320
            out[4] = (unsigned char)(Ai >> 32);
1154
320
            out[5] = (unsigned char)(Ai >> 40);
1155
320
            out[6] = (unsigned char)(Ai >> 48);
1156
320
            out[7] = (unsigned char)(Ai >> 56);
1157
320
            out += 8;
1158
320
            len -= 8;
1159
320
        }
1160
59
    }
1161
59
}
1162
#endif
1163
1164
#ifdef SELFTEST
1165
/*
1166
 * Post-padding one-shot implementations would look as following:
1167
 *
1168
 * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1169
 * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1170
 * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1171
 * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1172
 * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
1173
 * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
1174
 */
1175
1176
void SHA3_sponge(const unsigned char *inp, size_t len,
1177
    unsigned char *out, size_t d, size_t r)
1178
{
1179
    uint64_t A[5][5];
1180
1181
    memset(A, 0, sizeof(A));
1182
    SHA3_absorb(A, inp, len, r);
1183
    SHA3_squeeze(A, out, d, r);
1184
}
1185
1186
#include <stdio.h>
1187
1188
int main(void)
1189
{
1190
    /*
1191
     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1192
     */
1193
    unsigned char test[168] = { '\xf3', '\x3' };
1194
    unsigned char out[512];
1195
    size_t i;
1196
    static const unsigned char result[512] = {
1197
        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1198
        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1199
        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1200
        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1201
        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1202
        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1203
        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1204
        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1205
        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1206
        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1207
        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1208
        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1209
        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1210
        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1211
        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1212
        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1213
        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1214
        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1215
        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1216
        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1217
        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1218
        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1219
        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1220
        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1221
        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1222
        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1223
        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1224
        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1225
        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1226
        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1227
        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1228
        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1229
        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1230
        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1231
        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1232
        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1233
        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1234
        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1235
        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1236
        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1237
        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1238
        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1239
        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1240
        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1241
        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1242
        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1243
        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1244
        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1245
        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1246
        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1247
        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1248
        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1249
        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1250
        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1251
        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1252
        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1253
        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1254
        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1255
        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1256
        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1257
        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1258
        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1259
        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1260
        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1261
    };
1262
1263
    test[167] = '\x80';
1264
    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1265
1266
    /*
1267
     * Rationale behind keeping output [formatted as below] is that
1268
     * one should be able to redirect it to a file, then copy-n-paste
1269
     * final "output val" from official example to another file, and
1270
     * compare the two with diff(1).
1271
     */
1272
    for (i = 0; i < sizeof(out);) {
1273
        printf("%02X", out[i]);
1274
        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1275
    }
1276
1277
    if (memcmp(out, result, sizeof(out))) {
1278
        fprintf(stderr, "failure\n");
1279
        return 1;
1280
    } else {
1281
        fprintf(stderr, "success\n");
1282
        return 0;
1283
    }
1284
}
1285
#endif