/src/boringssl/crypto/fipsmodule/keccak/keccak.cc.inc
Line | Count | Source |
1 | | // Copyright 2023 The BoringSSL Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include <openssl/base.h> |
16 | | |
17 | | #include <assert.h> |
18 | | #include <stdlib.h> |
19 | | |
20 | | #include "../../internal.h" |
21 | | #include "./internal.h" |
22 | | |
23 | | |
24 | | // keccak_f implements the Keccak-1600 permutation as described at |
25 | | // https://keccak.team/keccak_specs_summary.html. Each lane is represented as a |
26 | | // 64-bit value and the 5×5 lanes are stored as an array in row-major order. |
27 | 2.91M | static void keccak_f(uint64_t state[25]) { |
28 | 2.91M | static const int kNumRounds = 24; |
29 | 72.9M | for (int round = 0; round < kNumRounds; round++) { |
30 | | // θ step |
31 | 70.0M | uint64_t c[5]; |
32 | 420M | for (int x = 0; x < 5; x++) { |
33 | 350M | c[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ |
34 | 350M | state[x + 20]; |
35 | 350M | } |
36 | | |
37 | 420M | for (int x = 0; x < 5; x++) { |
38 | 350M | const uint64_t d = c[(x + 4) % 5] ^ CRYPTO_rotl_u64(c[(x + 1) % 5], 1); |
39 | 2.10G | for (int y = 0; y < 5; y++) { |
40 | 1.75G | state[y * 5 + x] ^= d; |
41 | 1.75G | } |
42 | 350M | } |
43 | | |
44 | | // ρ and π steps. |
45 | | // |
46 | | // These steps involve a mapping of the state matrix. Each input point, |
47 | | // (x,y), is rotated and written to the point (y, 2x + 3y). In the Keccak |
48 | | // pseudo-code a separate array is used because an in-place operation would |
49 | | // overwrite some values that are subsequently needed. However, the mapping |
50 | | // forms a trail through 24 of the 25 values so we can do it in place with |
51 | | // only a single temporary variable. |
52 | | // |
53 | | // Start with (1, 0). The value here will be mapped and end up at (0, 2). |
54 | | // That value will end up at (2, 1), then (1, 2), and so on. After 24 |
55 | | // steps, 24 of the 25 values have been hit (as this mapping is injective) |
56 | | // and the sequence will repeat. All that remains is to handle the element |
57 | | // at (0, 0), but the rotation for that element is zero, and it goes to (0, |
58 | | // 0), so we can ignore it. |
59 | 70.0M | uint64_t prev_value = state[1]; |
60 | 70.0M | #define PI_RHO_STEP(index, rotation) \ |
61 | 1.68G | do { \ |
62 | 1.68G | const uint64_t value = CRYPTO_rotl_u64(prev_value, rotation); \ |
63 | 1.68G | prev_value = state[index]; \ |
64 | 1.68G | state[index] = value; \ |
65 | 1.68G | } while (0) |
66 | | |
67 | 70.0M | PI_RHO_STEP(10, 1); |
68 | 70.0M | PI_RHO_STEP(7, 3); |
69 | 70.0M | PI_RHO_STEP(11, 6); |
70 | 70.0M | PI_RHO_STEP(17, 10); |
71 | 70.0M | PI_RHO_STEP(18, 15); |
72 | 70.0M | PI_RHO_STEP(3, 21); |
73 | 70.0M | PI_RHO_STEP(5, 28); |
74 | 70.0M | PI_RHO_STEP(16, 36); |
75 | 70.0M | PI_RHO_STEP(8, 45); |
76 | 70.0M | PI_RHO_STEP(21, 55); |
77 | 70.0M | PI_RHO_STEP(24, 2); |
78 | 70.0M | PI_RHO_STEP(4, 14); |
79 | 70.0M | PI_RHO_STEP(15, 27); |
80 | 70.0M | PI_RHO_STEP(23, 41); |
81 | 70.0M | PI_RHO_STEP(19, 56); |
82 | 70.0M | PI_RHO_STEP(13, 8); |
83 | 70.0M | PI_RHO_STEP(12, 25); |
84 | 70.0M | PI_RHO_STEP(2, 43); |
85 | 70.0M | PI_RHO_STEP(20, 62); |
86 | 70.0M | PI_RHO_STEP(14, 18); |
87 | 70.0M | PI_RHO_STEP(22, 39); |
88 | 70.0M | PI_RHO_STEP(9, 61); |
89 | 70.0M | PI_RHO_STEP(6, 20); |
90 | 70.0M | PI_RHO_STEP(1, 44); |
91 | | |
92 | 70.0M | #undef PI_RHO_STEP |
93 | | |
94 | | // χ step |
95 | 420M | for (int y = 0; y < 5; y++) { |
96 | 350M | const int row_index = 5 * y; |
97 | 350M | const uint64_t orig_x0 = state[row_index]; |
98 | 350M | const uint64_t orig_x1 = state[row_index + 1]; |
99 | 350M | state[row_index] ^= ~orig_x1 & state[row_index + 2]; |
100 | 350M | state[row_index + 1] ^= ~state[row_index + 2] & state[row_index + 3]; |
101 | 350M | state[row_index + 2] ^= ~state[row_index + 3] & state[row_index + 4]; |
102 | 350M | state[row_index + 3] ^= ~state[row_index + 4] & orig_x0; |
103 | 350M | state[row_index + 4] ^= ~orig_x0 & orig_x1; |
104 | 350M | } |
105 | | |
106 | | // ι step |
107 | | // |
108 | | // From https://keccak.team/files/Keccak-reference-3.0.pdf, section |
109 | | // 1.2, the round constants are based on the output of a LFSR. Thus, as |
110 | | // suggested in the appendix of of |
111 | | // https://keccak.team/keccak_specs_summary.html, the values are |
112 | | // simply encoded here. |
113 | 70.0M | static const uint64_t kRoundConstants[24] = { |
114 | 70.0M | 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, |
115 | 70.0M | 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, |
116 | 70.0M | 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, |
117 | 70.0M | 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, |
118 | 70.0M | 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, |
119 | 70.0M | 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, |
120 | 70.0M | 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, |
121 | 70.0M | 0x8000000000008080, 0x0000000080000001, 0x8000000080008008, |
122 | 70.0M | }; |
123 | | |
124 | 70.0M | state[0] ^= kRoundConstants[round]; |
125 | 70.0M | } |
126 | 2.91M | } |
127 | | |
128 | | static void keccak_init(struct BORINGSSL_keccak_st *ctx, |
129 | 1.15M | enum boringssl_keccak_config_t config) { |
130 | 1.15M | size_t required_out_len; |
131 | 1.15M | size_t capacity_bytes; |
132 | 1.15M | switch (config) { |
133 | 67.8k | case boringssl_sha3_256: |
134 | 67.8k | capacity_bytes = 512 / 8; |
135 | 67.8k | required_out_len = 32; |
136 | 67.8k | break; |
137 | 67.7k | case boringssl_sha3_512: |
138 | 67.7k | capacity_bytes = 1024 / 8; |
139 | 67.7k | required_out_len = 64; |
140 | 67.7k | break; |
141 | 609k | case boringssl_shake128: |
142 | 609k | capacity_bytes = 256 / 8; |
143 | 609k | required_out_len = 0; |
144 | 609k | break; |
145 | 406k | case boringssl_shake256: |
146 | 406k | capacity_bytes = 512 / 8; |
147 | 406k | required_out_len = 0; |
148 | 406k | break; |
149 | 0 | default: |
150 | 0 | abort(); |
151 | 1.15M | } |
152 | | |
153 | 1.15M | OPENSSL_memset(ctx, 0, sizeof(*ctx)); |
154 | 1.15M | ctx->config = config; |
155 | 1.15M | ctx->phase = boringssl_keccak_phase_absorb; |
156 | 1.15M | ctx->required_out_len = required_out_len; |
157 | 1.15M | ctx->rate_bytes = 200 - capacity_bytes; |
158 | 1.15M | assert(ctx->rate_bytes % 8 == 0); |
159 | 1.15M | } |
160 | | |
161 | | void BORINGSSL_keccak(uint8_t *out, size_t out_len, const uint8_t *in, |
162 | 542k | size_t in_len, enum boringssl_keccak_config_t config) { |
163 | 542k | struct BORINGSSL_keccak_st ctx; |
164 | 542k | keccak_init(&ctx, config); |
165 | 542k | if (ctx.required_out_len != 0 && out_len != ctx.required_out_len) { |
166 | 0 | abort(); |
167 | 0 | } |
168 | 542k | BORINGSSL_keccak_absorb(&ctx, in, in_len); |
169 | 542k | BORINGSSL_keccak_squeeze(&ctx, out, out_len); |
170 | 542k | } |
171 | | |
172 | | void BORINGSSL_keccak_init(struct BORINGSSL_keccak_st *ctx, |
173 | 609k | enum boringssl_keccak_config_t config) { |
174 | 609k | keccak_init(ctx, config); |
175 | 609k | } |
176 | | |
177 | | void BORINGSSL_keccak_absorb(struct BORINGSSL_keccak_st *ctx, const uint8_t *in, |
178 | 1.15M | size_t in_len) { |
179 | 1.15M | if (ctx->phase == boringssl_keccak_phase_squeeze) { |
180 | | // It's illegal to call absorb() again after calling squeeze(). |
181 | 0 | abort(); |
182 | 0 | } |
183 | | |
184 | 1.15M | const size_t rate_words = ctx->rate_bytes / 8; |
185 | | // XOR the input. Accessing |ctx->state| as a |uint8_t*| is allowed by strict |
186 | | // aliasing because we require |uint8_t| to be a character type. |
187 | 1.15M | uint8_t *state_bytes = (uint8_t *)ctx->state; |
188 | | |
189 | | // Absorb partial block. |
190 | 1.15M | if (ctx->absorb_offset != 0) { |
191 | 23 | assert(ctx->absorb_offset < ctx->rate_bytes); |
192 | 23 | size_t first_block_len = ctx->rate_bytes - ctx->absorb_offset; |
193 | 2.41k | for (size_t i = 0; i < first_block_len && i < in_len; i++) { |
194 | 2.39k | state_bytes[ctx->absorb_offset + i] ^= in[i]; |
195 | 2.39k | } |
196 | | |
197 | | // This input didn't fill the block. |
198 | 23 | if (first_block_len > in_len) { |
199 | 0 | ctx->absorb_offset += in_len; |
200 | 0 | return; |
201 | 0 | } |
202 | | |
203 | 23 | keccak_f(ctx->state); |
204 | 23 | in += first_block_len; |
205 | 23 | in_len -= first_block_len; |
206 | 23 | } |
207 | | |
208 | | // Absorb full blocks. |
209 | 1.69M | while (in_len >= ctx->rate_bytes) { |
210 | 9.76M | for (size_t i = 0; i < rate_words; i++) { |
211 | 9.22M | ctx->state[i] ^= CRYPTO_load_u64_le(in + 8 * i); |
212 | 9.22M | } |
213 | 542k | keccak_f(ctx->state); |
214 | 542k | in += ctx->rate_bytes; |
215 | 542k | in_len -= ctx->rate_bytes; |
216 | 542k | } |
217 | | |
218 | | // Absorb partial block. |
219 | 1.15M | assert(in_len < ctx->rate_bytes); |
220 | 44.0M | for (size_t i = 0; i < in_len; i++) { |
221 | 42.9M | state_bytes[i] ^= in[i]; |
222 | 42.9M | } |
223 | 1.15M | ctx->absorb_offset = in_len; |
224 | 1.15M | } |
225 | | |
226 | 1.15M | static void keccak_finalize(struct BORINGSSL_keccak_st *ctx) { |
227 | 1.15M | uint8_t terminator; |
228 | 1.15M | switch (ctx->config) { |
229 | 67.8k | case boringssl_sha3_256: |
230 | 135k | case boringssl_sha3_512: |
231 | 135k | terminator = 0x06; |
232 | 135k | break; |
233 | 609k | case boringssl_shake128: |
234 | 1.01M | case boringssl_shake256: |
235 | 1.01M | terminator = 0x1f; |
236 | 1.01M | break; |
237 | 0 | default: |
238 | 0 | abort(); |
239 | 1.15M | } |
240 | | |
241 | | // XOR the terminator. Accessing |ctx->state| as a |uint8_t*| is allowed by |
242 | | // strict aliasing because we require |uint8_t| to be a character type. |
243 | 1.15M | uint8_t *state_bytes = (uint8_t *)ctx->state; |
244 | 1.15M | state_bytes[ctx->absorb_offset] ^= terminator; |
245 | 1.15M | state_bytes[ctx->rate_bytes - 1] ^= 0x80; |
246 | 1.15M | keccak_f(ctx->state); |
247 | 1.15M | } |
248 | | |
249 | | void BORINGSSL_keccak_squeeze(struct BORINGSSL_keccak_st *ctx, uint8_t *out, |
250 | 2.37M | size_t out_len) { |
251 | 2.37M | if (ctx->required_out_len != 0 && |
252 | 135k | (ctx->phase == boringssl_keccak_phase_squeeze || |
253 | 135k | out_len != ctx->required_out_len)) { |
254 | | // The SHA-3 variants must be squeezed in a single call, to confirm that the |
255 | | // output length is correct. |
256 | 0 | abort(); |
257 | 0 | } |
258 | | |
259 | 2.37M | if (ctx->phase == boringssl_keccak_phase_absorb) { |
260 | 1.15M | keccak_finalize(ctx); |
261 | 1.15M | ctx->phase = boringssl_keccak_phase_squeeze; |
262 | 1.15M | } |
263 | | |
264 | | // Accessing |ctx->state| as a |uint8_t*| is allowed by strict aliasing |
265 | | // because we require |uint8_t| to be a character type. |
266 | 2.37M | const uint8_t *state_bytes = (const uint8_t *)ctx->state; |
267 | 4.75M | while (out_len) { |
268 | 2.37M | if (ctx->squeeze_offset == ctx->rate_bytes) { |
269 | 1.22M | keccak_f(ctx->state); |
270 | 1.22M | ctx->squeeze_offset = 0; |
271 | 1.22M | } |
272 | | |
273 | 2.37M | size_t remaining = ctx->rate_bytes - ctx->squeeze_offset; |
274 | 2.37M | size_t todo = out_len; |
275 | 2.37M | if (todo > remaining) { |
276 | 0 | todo = remaining; |
277 | 0 | } |
278 | 2.37M | OPENSSL_memcpy(out, &state_bytes[ctx->squeeze_offset], todo); |
279 | 2.37M | out += todo; |
280 | 2.37M | out_len -= todo; |
281 | 2.37M | ctx->squeeze_offset += todo; |
282 | 2.37M | } |
283 | 2.37M | } |