/src/boringssl/crypto/chacha/chacha.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2014, Google Inc. |
2 | | * |
3 | | * Permission to use, copy, modify, and/or distribute this software for any |
4 | | * purpose with or without fee is hereby granted, provided that the above |
5 | | * copyright notice and this permission notice appear in all copies. |
6 | | * |
7 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
8 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
10 | | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
14 | | |
15 | | // Adapted from the public domain, estream code by D. Bernstein. |
16 | | |
17 | | #include <openssl/chacha.h> |
18 | | |
19 | | #include <assert.h> |
20 | | #include <string.h> |
21 | | |
22 | | #include "../internal.h" |
23 | | #include "internal.h" |
24 | | |
25 | | |
26 | | // sigma contains the ChaCha constants, which happen to be an ASCII string. |
27 | | static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', |
28 | | '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; |
29 | | |
30 | | // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. |
31 | | #define QUARTERROUND(a, b, c, d) \ |
32 | 24.3k | x[a] += x[b]; \ |
33 | 24.3k | x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \ |
34 | 24.3k | x[c] += x[d]; \ |
35 | 24.3k | x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \ |
36 | 24.3k | x[a] += x[b]; \ |
37 | 24.3k | x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8); \ |
38 | 24.3k | x[c] += x[d]; \ |
39 | 24.3k | x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7); |
40 | | |
41 | | void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32], |
42 | 0 | const uint8_t nonce[16]) { |
43 | 0 | uint32_t x[16]; |
44 | 0 | OPENSSL_memcpy(x, sigma, sizeof(sigma)); |
45 | 0 | OPENSSL_memcpy(&x[4], key, 32); |
46 | 0 | OPENSSL_memcpy(&x[12], nonce, 16); |
47 | |
|
48 | 0 | for (size_t i = 0; i < 20; i += 2) { |
49 | 0 | QUARTERROUND(0, 4, 8, 12) |
50 | 0 | QUARTERROUND(1, 5, 9, 13) |
51 | 0 | QUARTERROUND(2, 6, 10, 14) |
52 | 0 | QUARTERROUND(3, 7, 11, 15) |
53 | 0 | QUARTERROUND(0, 5, 10, 15) |
54 | 0 | QUARTERROUND(1, 6, 11, 12) |
55 | 0 | QUARTERROUND(2, 7, 8, 13) |
56 | 0 | QUARTERROUND(3, 4, 9, 14) |
57 | 0 | } |
58 | |
|
59 | 0 | OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4); |
60 | 0 | OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4); |
61 | 0 | } |
62 | | |
63 | | #if defined(CHACHA20_ASM_NOHW) |
64 | | static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len, |
65 | 0 | const uint32_t key[8], const uint32_t counter[4]) { |
66 | | #if defined(CHACHA20_ASM_NEON) |
67 | | if (ChaCha20_ctr32_neon_capable(in_len)) { |
68 | | ChaCha20_ctr32_neon(out, in, in_len, key, counter); |
69 | | return; |
70 | | } |
71 | | #endif |
72 | 0 | #if defined(CHACHA20_ASM_AVX2) |
73 | 0 | if (ChaCha20_ctr32_avx2_capable(in_len)) { |
74 | 0 | ChaCha20_ctr32_avx2(out, in, in_len, key, counter); |
75 | 0 | return; |
76 | 0 | } |
77 | 0 | #endif |
78 | 0 | #if defined(CHACHA20_ASM_SSSE3_4X) |
79 | 0 | if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) { |
80 | 0 | ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter); |
81 | 0 | return; |
82 | 0 | } |
83 | 0 | #endif |
84 | 0 | #if defined(CHACHA20_ASM_SSSE3) |
85 | 0 | if (ChaCha20_ctr32_ssse3_capable(in_len)) { |
86 | 0 | ChaCha20_ctr32_ssse3(out, in, in_len, key, counter); |
87 | 0 | return; |
88 | 0 | } |
89 | 0 | #endif |
90 | 0 | if (in_len > 0) { |
91 | 0 | ChaCha20_ctr32_nohw(out, in, in_len, key, counter); |
92 | 0 | } |
93 | 0 | } |
94 | | #endif |
95 | | |
96 | | #if defined(CHACHA20_ASM_NOHW) |
97 | | |
98 | | void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, |
99 | | const uint8_t key[32], const uint8_t nonce[12], |
100 | | uint32_t counter) { |
101 | | assert(!buffers_alias(out, in_len, in, in_len) || in == out); |
102 | | |
103 | | uint32_t counter_nonce[4]; |
104 | | counter_nonce[0] = counter; |
105 | | counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0); |
106 | | counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4); |
107 | | counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8); |
108 | | |
109 | | const uint32_t *key_ptr = (const uint32_t *)key; |
110 | | #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) |
111 | | // The assembly expects the key to be four-byte aligned. |
112 | | uint32_t key_u32[8]; |
113 | | if ((((uintptr_t)key) & 3) != 0) { |
114 | | key_u32[0] = CRYPTO_load_u32_le(key + 0); |
115 | | key_u32[1] = CRYPTO_load_u32_le(key + 4); |
116 | | key_u32[2] = CRYPTO_load_u32_le(key + 8); |
117 | | key_u32[3] = CRYPTO_load_u32_le(key + 12); |
118 | | key_u32[4] = CRYPTO_load_u32_le(key + 16); |
119 | | key_u32[5] = CRYPTO_load_u32_le(key + 20); |
120 | | key_u32[6] = CRYPTO_load_u32_le(key + 24); |
121 | | key_u32[7] = CRYPTO_load_u32_le(key + 28); |
122 | | |
123 | | key_ptr = key_u32; |
124 | | } |
125 | | #endif |
126 | | |
127 | | while (in_len > 0) { |
128 | | // The assembly functions do not have defined overflow behavior. While |
129 | | // overflow is almost always a bug in the caller, we prefer our functions to |
130 | | // behave the same across platforms, so divide into multiple calls to avoid |
131 | | // this case. |
132 | | uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]); |
133 | | if (todo > in_len) { |
134 | | todo = in_len; |
135 | | } |
136 | | |
137 | | ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce); |
138 | | in += todo; |
139 | | out += todo; |
140 | | in_len -= todo; |
141 | | |
142 | | // We're either done and will next break out of the loop, or we stopped at |
143 | | // the wraparound point and the counter should continue at zero. |
144 | | counter_nonce[0] = 0; |
145 | | } |
146 | | } |
147 | | |
148 | | #else |
149 | | |
150 | | // chacha_core performs 20 rounds of ChaCha on the input words in |
151 | | // |input| and writes the 64 output bytes to |output|. |
152 | 304 | static void chacha_core(uint8_t output[64], const uint32_t input[16]) { |
153 | 304 | uint32_t x[16]; |
154 | 304 | int i; |
155 | | |
156 | 304 | OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16); |
157 | 3.34k | for (i = 20; i > 0; i -= 2) { |
158 | 3.04k | QUARTERROUND(0, 4, 8, 12) |
159 | 3.04k | QUARTERROUND(1, 5, 9, 13) |
160 | 3.04k | QUARTERROUND(2, 6, 10, 14) |
161 | 3.04k | QUARTERROUND(3, 7, 11, 15) |
162 | 3.04k | QUARTERROUND(0, 5, 10, 15) |
163 | 3.04k | QUARTERROUND(1, 6, 11, 12) |
164 | 3.04k | QUARTERROUND(2, 7, 8, 13) |
165 | 3.04k | QUARTERROUND(3, 4, 9, 14) |
166 | 3.04k | } |
167 | | |
168 | 5.16k | for (i = 0; i < 16; ++i) { |
169 | 4.86k | x[i] += input[i]; |
170 | 4.86k | } |
171 | 5.16k | for (i = 0; i < 16; ++i) { |
172 | 4.86k | CRYPTO_store_u32_le(output + 4 * i, x[i]); |
173 | 4.86k | } |
174 | 304 | } |
175 | | |
176 | | void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, |
177 | | const uint8_t key[32], const uint8_t nonce[12], |
178 | 304 | uint32_t counter) { |
179 | 304 | assert(!buffers_alias(out, in_len, in, in_len) || in == out); |
180 | | |
181 | 304 | uint32_t input[16]; |
182 | 304 | uint8_t buf[64]; |
183 | 304 | size_t todo, i; |
184 | | |
185 | 304 | input[0] = CRYPTO_load_u32_le(sigma + 0); |
186 | 304 | input[1] = CRYPTO_load_u32_le(sigma + 4); |
187 | 304 | input[2] = CRYPTO_load_u32_le(sigma + 8); |
188 | 304 | input[3] = CRYPTO_load_u32_le(sigma + 12); |
189 | | |
190 | 304 | input[4] = CRYPTO_load_u32_le(key + 0); |
191 | 304 | input[5] = CRYPTO_load_u32_le(key + 4); |
192 | 304 | input[6] = CRYPTO_load_u32_le(key + 8); |
193 | 304 | input[7] = CRYPTO_load_u32_le(key + 12); |
194 | | |
195 | 304 | input[8] = CRYPTO_load_u32_le(key + 16); |
196 | 304 | input[9] = CRYPTO_load_u32_le(key + 20); |
197 | 304 | input[10] = CRYPTO_load_u32_le(key + 24); |
198 | 304 | input[11] = CRYPTO_load_u32_le(key + 28); |
199 | | |
200 | 304 | input[12] = counter; |
201 | 304 | input[13] = CRYPTO_load_u32_le(nonce + 0); |
202 | 304 | input[14] = CRYPTO_load_u32_le(nonce + 4); |
203 | 304 | input[15] = CRYPTO_load_u32_le(nonce + 8); |
204 | | |
205 | 608 | while (in_len > 0) { |
206 | 304 | todo = sizeof(buf); |
207 | 304 | if (in_len < todo) { |
208 | 304 | todo = in_len; |
209 | 304 | } |
210 | | |
211 | 304 | chacha_core(buf, input); |
212 | 5.92k | for (i = 0; i < todo; i++) { |
213 | 5.62k | out[i] = in[i] ^ buf[i]; |
214 | 5.62k | } |
215 | | |
216 | 304 | out += todo; |
217 | 304 | in += todo; |
218 | 304 | in_len -= todo; |
219 | | |
220 | 304 | input[12]++; |
221 | 304 | } |
222 | 304 | } |
223 | | |
224 | | #endif |