/src/tor/src/ext/keccak-tiny/keccak-tiny-unrolled.c
Line | Count | Source |
1 | | /** libkeccak-tiny |
2 | | * |
3 | | * A single-file implementation of SHA-3 and SHAKE. |
4 | | * |
5 | | * Implementor: David Leon Gil |
6 | | * License: CC0, attribution kindly requested. Blame taken too, |
7 | | * but not liability. |
8 | | */ |
9 | | #include "keccak-tiny.h" |
10 | | |
11 | | #include <string.h> |
12 | | #include "lib/crypt_ops/crypto_util.h" |
13 | | #include "byteorder.h" |
14 | | |
15 | | /******** Endianness conversion helpers ********/ |
16 | | |
17 | | static inline uint64_t |
18 | 10.9M | loadu64le(const unsigned char *x) { |
19 | 10.9M | uint64_t r; |
20 | 10.9M | memcpy(&r, x, sizeof(r)); |
21 | 10.9M | return _le64toh(r); |
22 | 10.9M | } |
23 | | |
24 | | static inline void |
25 | 183k | storeu64le(uint8_t *x, uint64_t u) { |
26 | 183k | uint64_t val = _le64toh(u); |
27 | 183k | memcpy(x, &val, sizeof(u)); |
28 | 183k | } |
29 | | |
30 | | /******** The Keccak-f[1600] permutation ********/ |
31 | | |
32 | | /*** Constants. ***/ |
33 | | static const uint8_t rho[24] = \ |
34 | | { 1, 3, 6, 10, 15, 21, |
35 | | 28, 36, 45, 55, 2, 14, |
36 | | 27, 41, 56, 8, 25, 43, |
37 | | 62, 18, 39, 61, 20, 44}; |
38 | | static const uint8_t pi[24] = \ |
39 | | {10, 7, 11, 17, 18, 3, |
40 | | 5, 16, 8, 21, 24, 4, |
41 | | 15, 23, 19, 13, 12, 2, |
42 | | 20, 14, 22, 9, 6, 1}; |
43 | | static const uint64_t RC[24] = \ |
44 | | {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, |
45 | | 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, |
46 | | 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, |
47 | | 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, |
48 | | 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, |
49 | | 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; |
50 | | |
51 | | /*** Helper macros to unroll the permutation. ***/ |
52 | | #define rol(x, s) (((x) << s) | ((x) >> (64 - s))) |
53 | 645k | #define REPEAT6(e) e e e e e e |
54 | 645k | #define REPEAT24(e) REPEAT6(e e e e) |
55 | | #define REPEAT5(e) e e e e e |
56 | | #define FOR5(v, s, e) \ |
57 | | v = 0; \ |
58 | | REPEAT5(e; v += s;) |
59 | | |
60 | | /*** Keccak-f[1600] ***/ |
61 | 645k | static inline void keccakf(void* state) { |
62 | 645k | uint64_t* a = (uint64_t*)state; |
63 | 645k | uint64_t b[5] = {0}; |
64 | 645k | uint64_t t = 0; |
65 | 645k | uint8_t x, y, i = 0; |
66 | | |
67 | 645k | REPEAT24( |
68 | | // Theta |
69 | 645k | FOR5(x, 1, |
70 | 645k | b[x] = 0; |
71 | 645k | FOR5(y, 5, |
72 | 645k | b[x] ^= a[x + y]; )) |
73 | 645k | FOR5(x, 1, |
74 | 645k | FOR5(y, 5, |
75 | 645k | a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) |
76 | | // Rho and pi |
77 | 645k | t = a[1]; |
78 | 645k | x = 0; |
79 | 645k | REPEAT24(b[0] = a[pi[x]]; |
80 | 645k | a[pi[x]] = rol(t, rho[x]); |
81 | 645k | t = b[0]; |
82 | 645k | x++; ) |
83 | | // Chi |
84 | 645k | FOR5(y, |
85 | 645k | 5, |
86 | 645k | FOR5(x, 1, |
87 | 645k | b[x] = a[y + x];) |
88 | 645k | FOR5(x, 1, |
89 | 645k | a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) |
90 | | // Iota |
91 | 645k | a[0] ^= RC[i]; |
92 | 645k | i++; ) |
93 | 645k | } |
94 | | |
95 | | /******** The FIPS202-defined functions. ********/ |
96 | | |
97 | | /*** Some helper macros. ***/ |
98 | | |
99 | | // `xorin` modified to handle Big Endian systems, `buf` being unaligned on |
100 | | // systems that care about such things. Assumes that len is a multiple of 8, |
101 | | // which is always true for the rates we use, and the modified finalize. |
102 | | static inline void |
103 | 645k | xorin8(uint8_t *dst, const uint8_t *src, size_t len) { |
104 | 645k | uint64_t* a = (uint64_t*)dst; // Always aligned. |
105 | 11.6M | for (size_t i = 0; i < len; i += 8) { |
106 | 10.9M | a[i/8] ^= loadu64le(src + i); |
107 | 10.9M | } |
108 | 645k | } |
109 | | |
110 | | // `setout` likewise modified to handle Big Endian systems. Assumes that len |
111 | | // is a multiple of 8, which is true for every rate we use. |
112 | | static inline void |
113 | 10.7k | setout8(const uint8_t *src, uint8_t *dst, size_t len) { |
114 | 10.7k | const uint64_t *si = (const uint64_t*)src; // Always aligned. |
115 | 194k | for (size_t i = 0; i < len; i+= 8) { |
116 | 183k | storeu64le(dst+i, si[i/8]); |
117 | 183k | } |
118 | 10.7k | } |
119 | | |
120 | 634k | #define P keccakf |
121 | | #define Plen KECCAK_MAX_RATE |
122 | | |
123 | 43.1k | #define KECCAK_DELIM_DIGEST 0x06 |
124 | 0 | #define KECCAK_DELIM_XOF 0x1f |
125 | | |
126 | | // Fold P*F over the full blocks of an input. |
127 | | #define foldP(I, L, F) \ |
128 | 643k | while (L >= s->rate) { \ |
129 | 634k | F(s->a, I, s->rate); \ |
130 | 634k | P(s->a); \ |
131 | 634k | I += s->rate; \ |
132 | 634k | L -= s->rate; \ |
133 | 634k | } |
134 | | |
135 | | static inline void |
136 | | keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks) |
137 | 8.55k | { |
138 | 8.55k | size_t blen = nr_blocks * s->rate; |
139 | 8.55k | foldP(buf, blen, xorin8); |
140 | 8.55k | } |
141 | | |
142 | | static int |
143 | | keccak_update(keccak_state *s, const uint8_t *buf, size_t len) |
144 | 10.7k | { |
145 | 10.7k | if (s->finalized) |
146 | 0 | return -1; |
147 | 10.7k | if ((buf == NULL) && len != 0) |
148 | 0 | return -1; |
149 | | |
150 | 10.7k | size_t remaining = len; |
151 | 20.7k | while (remaining > 0) { |
152 | 9.98k | if (s->offset == 0) { |
153 | 9.98k | const size_t blocks = remaining / s->rate; |
154 | 9.98k | size_t direct_bytes = blocks * s->rate; |
155 | 9.98k | if (direct_bytes > 0) { |
156 | 8.55k | keccak_absorb_blocks(s, buf, blocks); |
157 | 8.55k | remaining -= direct_bytes; |
158 | 8.55k | buf += direct_bytes; |
159 | 8.55k | } |
160 | 9.98k | } |
161 | | |
162 | 9.98k | const size_t buf_avail = s->rate - s->offset; |
163 | 9.98k | const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail; |
164 | 9.98k | if (buf_bytes > 0) { |
165 | 9.84k | memcpy(&s->block[s->offset], buf, buf_bytes); |
166 | 9.84k | s->offset += buf_bytes; |
167 | 9.84k | remaining -= buf_bytes; |
168 | 9.84k | buf += buf_bytes; |
169 | 9.84k | } |
170 | 9.98k | if (s->offset == s->rate) { |
171 | 0 | keccak_absorb_blocks(s, s->block, 1); |
172 | 0 | s->offset = 0; |
173 | 0 | } |
174 | 9.98k | } |
175 | 10.7k | return 0; |
176 | 10.7k | } |
177 | | |
178 | | static void |
179 | | keccak_finalize(keccak_state *s) |
180 | 10.7k | { |
181 | | // Xor in the DS and pad frame. |
182 | 10.7k | s->block[s->offset++] = s->delim; // DS. |
183 | 716k | for (size_t i = s->offset; i < s->rate; i++) { |
184 | 705k | s->block[i] = 0; |
185 | 705k | } |
186 | 10.7k | s->block[s->rate - 1] |= 0x80; // Pad frame. |
187 | | |
188 | | // Xor in the last block. |
189 | 10.7k | xorin8(s->a, s->block, s->rate); |
190 | | |
191 | 10.7k | memwipe(s->block, 0, sizeof(s->block)); |
192 | 10.7k | s->finalized = 1; |
193 | 10.7k | s->offset = s->rate; |
194 | 10.7k | } |
195 | | |
196 | | static inline void |
197 | | keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks) |
198 | 10.7k | { |
199 | 21.5k | for (size_t n = 0; n < nr_blocks; n++) { |
200 | 10.7k | keccakf(s->a); |
201 | 10.7k | setout8(s->a, out, s->rate); |
202 | 10.7k | out += s->rate; |
203 | 10.7k | } |
204 | 10.7k | } |
205 | | |
206 | | static int |
207 | | keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen) |
208 | 10.7k | { |
209 | 10.7k | if (!s->finalized) |
210 | 0 | return -1; |
211 | | |
212 | 10.7k | size_t remaining = outlen; |
213 | 21.5k | while (remaining > 0) { |
214 | 10.7k | if (s->offset == s->rate) { |
215 | 10.7k | const size_t blocks = remaining / s->rate; |
216 | 10.7k | const size_t direct_bytes = blocks * s->rate; |
217 | 10.7k | if (blocks > 0) { |
218 | 0 | keccak_squeeze_blocks(s, out, blocks); |
219 | 0 | out += direct_bytes; |
220 | 0 | remaining -= direct_bytes; |
221 | 0 | } |
222 | | |
223 | 10.7k | if (remaining > 0) { |
224 | 10.7k | keccak_squeeze_blocks(s, s->block, 1); |
225 | 10.7k | s->offset = 0; |
226 | 10.7k | } |
227 | 10.7k | } |
228 | | |
229 | 10.7k | const size_t buf_bytes = s->rate - s->offset; |
230 | 10.7k | const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes; |
231 | 10.7k | if (indirect_bytes > 0) { |
232 | 10.7k | memcpy(out, &s->block[s->offset], indirect_bytes); |
233 | 10.7k | out += indirect_bytes; |
234 | 10.7k | s->offset += indirect_bytes; |
235 | 10.7k | remaining -= indirect_bytes; |
236 | 10.7k | } |
237 | 10.7k | } |
238 | 10.7k | return 0; |
239 | 10.7k | } |
240 | | |
241 | | int |
242 | | keccak_digest_init(keccak_state *s, size_t bits) |
243 | 10.7k | { |
244 | 10.7k | if (s == NULL) |
245 | 0 | return -1; |
246 | 10.7k | if (bits != 224 && bits != 256 && bits != 384 && bits != 512) |
247 | 0 | return -1; |
248 | | |
249 | 10.7k | keccak_cleanse(s); |
250 | 10.7k | s->rate = KECCAK_RATE(bits); |
251 | 10.7k | s->delim = KECCAK_DELIM_DIGEST; |
252 | 10.7k | return 0; |
253 | 10.7k | } |
254 | | |
255 | | int |
256 | | keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len) |
257 | 10.7k | { |
258 | 10.7k | if (s == NULL) |
259 | 0 | return -1; |
260 | 10.7k | if (s->delim != KECCAK_DELIM_DIGEST) |
261 | 0 | return -1; |
262 | | |
263 | 10.7k | return keccak_update(s, buf, len); |
264 | 10.7k | } |
265 | | |
266 | | int |
267 | | keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen) |
268 | 0 | { |
269 | 0 | if (s == NULL) |
270 | 0 | return -1; |
271 | 0 | if (s->delim != KECCAK_DELIM_DIGEST) |
272 | 0 | return -1; |
273 | 0 | if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8) |
274 | 0 | return -1; |
275 | | |
276 | | // Work in a copy so that incremental/rolling hashes are easy. |
277 | 0 | keccak_state s_tmp; |
278 | 0 | keccak_clone(&s_tmp, s); |
279 | 0 | keccak_finalize(&s_tmp); |
280 | 0 | int ret = keccak_squeeze(&s_tmp, out, outlen); |
281 | 0 | keccak_cleanse(&s_tmp); |
282 | 0 | return ret; |
283 | 0 | } |
284 | | |
285 | | int |
286 | | keccak_xof_init(keccak_state *s, size_t bits) |
287 | 0 | { |
288 | 0 | if (s == NULL) |
289 | 0 | return -1; |
290 | 0 | if (bits != 128 && bits != 256) |
291 | 0 | return -1; |
292 | | |
293 | 0 | keccak_cleanse(s); |
294 | 0 | s->rate = KECCAK_RATE(bits); |
295 | 0 | s->delim = KECCAK_DELIM_XOF; |
296 | 0 | return 0; |
297 | 0 | } |
298 | | |
299 | | int |
300 | | keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len) |
301 | 0 | { |
302 | 0 | if (s == NULL) |
303 | 0 | return -1; |
304 | 0 | if (s->delim != KECCAK_DELIM_XOF) |
305 | 0 | return -1; |
306 | | |
307 | 0 | return keccak_update(s, buf, len); |
308 | 0 | } |
309 | | |
310 | | int |
311 | | keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen) |
312 | 0 | { |
313 | 0 | if (s == NULL) |
314 | 0 | return -1; |
315 | 0 | if (s->delim != KECCAK_DELIM_XOF) |
316 | 0 | return -1; |
317 | | |
318 | 0 | if (!s->finalized) |
319 | 0 | keccak_finalize(s); |
320 | |
|
321 | 0 | return keccak_squeeze(s, out, outlen); |
322 | 0 | } |
323 | | |
324 | | void |
325 | | keccak_clone(keccak_state *out, const keccak_state *in) |
326 | 0 | { |
327 | 0 | memcpy(out, in, sizeof(keccak_state)); |
328 | 0 | } |
329 | | |
330 | | void |
331 | | keccak_cleanse(keccak_state *s) |
332 | 32.3k | { |
333 | 32.3k | memwipe(s, 0, sizeof(keccak_state)); |
334 | 32.3k | } |
335 | | |
336 | | /** The sponge-based hash construction. **/ |
337 | | static inline int hash(uint8_t* out, size_t outlen, |
338 | | const uint8_t* in, size_t inlen, |
339 | 10.7k | size_t bits, uint8_t delim) { |
340 | 10.7k | if ((out == NULL) || ((in == NULL) && inlen != 0)) { |
341 | 0 | return -1; |
342 | 0 | } |
343 | | |
344 | 10.7k | int ret = 0; |
345 | 10.7k | keccak_state s; |
346 | 10.7k | keccak_cleanse(&s); |
347 | | |
348 | 10.7k | switch (delim) { |
349 | 10.7k | case KECCAK_DELIM_DIGEST: |
350 | 10.7k | ret |= keccak_digest_init(&s, bits); |
351 | 10.7k | ret |= keccak_digest_update(&s, in, inlen); |
352 | | // Use the internal API instead of sum to avoid the memcpy. |
353 | 10.7k | keccak_finalize(&s); |
354 | 10.7k | ret |= keccak_squeeze(&s, out, outlen); |
355 | 10.7k | break; |
356 | 0 | case KECCAK_DELIM_XOF: |
357 | 0 | ret |= keccak_xof_init(&s, bits); |
358 | 0 | ret |= keccak_xof_absorb(&s, in, inlen); |
359 | 0 | ret |= keccak_xof_squeeze(&s, out, outlen); |
360 | 0 | break; |
361 | 0 | default: |
362 | 0 | return -1; |
363 | 10.7k | } |
364 | 10.7k | keccak_cleanse(&s); |
365 | 10.7k | return ret; |
366 | 10.7k | } |
367 | | |
368 | | /*** Helper macros to define SHA3 and SHAKE instances. ***/ |
369 | | #define defshake(bits) \ |
370 | | int shake##bits(uint8_t* out, size_t outlen, \ |
371 | 0 | const uint8_t* in, size_t inlen) { \ |
372 | 0 | return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \ |
373 | 0 | } Unexecuted instantiation: shake128 Unexecuted instantiation: shake256 |
374 | | #define defsha3(bits) \ |
375 | | int sha3_##bits(uint8_t* out, size_t outlen, \ |
376 | 10.7k | const uint8_t* in, size_t inlen) { \ |
377 | 10.7k | if (outlen > (bits/8)) { \ |
378 | 0 | return -1; \ |
379 | 0 | } \ |
380 | 10.7k | return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \ |
381 | 10.7k | } Unexecuted instantiation: sha3_224 Line | Count | Source | 376 | 10.7k | const uint8_t* in, size_t inlen) { \ | 377 | 10.7k | if (outlen > (bits/8)) { \ | 378 | 0 | return -1; \ | 379 | 0 | } \ | 380 | 10.7k | return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \ | 381 | 10.7k | } |
Unexecuted instantiation: sha3_384 Unexecuted instantiation: sha3_512 |
382 | | |
383 | | /*** FIPS202 SHAKE VOFs ***/ |
384 | | defshake(128) |
385 | | defshake(256) |
386 | | |
387 | | /*** FIPS202 SHA3 FOFs ***/ |
388 | | defsha3(224) |
389 | | defsha3(256) |
390 | | defsha3(384) |
391 | | defsha3(512) |