/src/boringssl/crypto/chacha/chacha.cc
Line | Count | Source |
1 | | // Copyright 2014 The BoringSSL Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // Adapted from the public domain, estream code by D. Bernstein. |
16 | | |
17 | | #include <openssl/chacha.h> |
18 | | |
19 | | #include <assert.h> |
20 | | #include <string.h> |
21 | | |
22 | | #include "../internal.h" |
23 | | #include "internal.h" |
24 | | |
25 | | |
26 | | // sigma contains the ChaCha constants, which happen to be an ASCII string. |
27 | | using namespace bssl; |
28 | | |
29 | | static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', |
30 | | '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; |
31 | | |
32 | | // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. |
33 | | #define QUARTERROUND(a, b, c, d) \ |
34 | 0 | x[a] += x[b]; \ |
35 | 0 | x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \ |
36 | 0 | x[c] += x[d]; \ |
37 | 0 | x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \ |
38 | 0 | x[a] += x[b]; \ |
39 | 0 | x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8); \ |
40 | 0 | x[c] += x[d]; \ |
41 | 0 | x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7); |
42 | | |
43 | | void bssl::CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32], |
44 | 0 | const uint8_t nonce[16]) { |
45 | 0 | uint32_t x[16]; |
46 | 0 | OPENSSL_memcpy(x, sigma, sizeof(sigma)); |
47 | 0 | OPENSSL_memcpy(&x[4], key, 32); |
48 | 0 | OPENSSL_memcpy(&x[12], nonce, 16); |
49 | |
|
50 | 0 | for (size_t i = 0; i < 20; i += 2) { |
51 | 0 | QUARTERROUND(0, 4, 8, 12) |
52 | 0 | QUARTERROUND(1, 5, 9, 13) |
53 | 0 | QUARTERROUND(2, 6, 10, 14) |
54 | 0 | QUARTERROUND(3, 7, 11, 15) |
55 | 0 | QUARTERROUND(0, 5, 10, 15) |
56 | 0 | QUARTERROUND(1, 6, 11, 12) |
57 | 0 | QUARTERROUND(2, 7, 8, 13) |
58 | 0 | QUARTERROUND(3, 4, 9, 14) |
59 | 0 | } |
60 | |
|
61 | 0 | OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4); |
62 | 0 | OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4); |
63 | 0 | } |
64 | | |
65 | | #if defined(CHACHA20_ASM_NOHW) |
66 | | static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len, |
67 | 379 | const uint32_t key[8], const uint32_t counter[4]) { |
68 | | #if defined(CHACHA20_ASM_NEON) |
69 | | if (ChaCha20_ctr32_neon_capable(in_len)) { |
70 | | ChaCha20_ctr32_neon(out, in, in_len, key, counter); |
71 | | return; |
72 | | } |
73 | | #endif |
74 | 379 | #if defined(CHACHA20_ASM_AVX2) |
75 | 379 | if (ChaCha20_ctr32_avx2_capable(in_len)) { |
76 | 0 | ChaCha20_ctr32_avx2(out, in, in_len, key, counter); |
77 | 0 | return; |
78 | 0 | } |
79 | 379 | #endif |
80 | 379 | #if defined(CHACHA20_ASM_SSSE3_4X) |
81 | 379 | if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) { |
82 | 0 | ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter); |
83 | 0 | return; |
84 | 0 | } |
85 | 379 | #endif |
86 | 379 | #if defined(CHACHA20_ASM_SSSE3) |
87 | 379 | if (ChaCha20_ctr32_ssse3_capable(in_len)) { |
88 | 0 | ChaCha20_ctr32_ssse3(out, in, in_len, key, counter); |
89 | 0 | return; |
90 | 0 | } |
91 | 379 | #endif |
92 | 379 | if (in_len > 0) { |
93 | 379 | ChaCha20_ctr32_nohw(out, in, in_len, key, counter); |
94 | 379 | } |
95 | 379 | } |
96 | | #endif |
97 | | |
98 | | #if defined(CHACHA20_ASM_NOHW) |
99 | | |
100 | | void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, |
101 | | const uint8_t key[32], const uint8_t nonce[12], |
102 | 379 | uint32_t counter) { |
103 | 379 | assert(!buffers_alias(out, in_len, in, in_len) || in == out); |
104 | | |
105 | 379 | uint32_t counter_nonce[4]; |
106 | 379 | counter_nonce[0] = counter; |
107 | 379 | counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0); |
108 | 379 | counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4); |
109 | 379 | counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8); |
110 | | |
111 | 379 | const uint32_t *key_ptr = (const uint32_t *)key; |
112 | | #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) |
113 | | // The assembly expects the key to be four-byte aligned. |
114 | | uint32_t key_u32[8]; |
115 | | if ((((uintptr_t)key) & 3) != 0) { |
116 | | key_u32[0] = CRYPTO_load_u32_le(key + 0); |
117 | | key_u32[1] = CRYPTO_load_u32_le(key + 4); |
118 | | key_u32[2] = CRYPTO_load_u32_le(key + 8); |
119 | | key_u32[3] = CRYPTO_load_u32_le(key + 12); |
120 | | key_u32[4] = CRYPTO_load_u32_le(key + 16); |
121 | | key_u32[5] = CRYPTO_load_u32_le(key + 20); |
122 | | key_u32[6] = CRYPTO_load_u32_le(key + 24); |
123 | | key_u32[7] = CRYPTO_load_u32_le(key + 28); |
124 | | |
125 | | key_ptr = key_u32; |
126 | | } |
127 | | #endif |
128 | | |
129 | 758 | while (in_len > 0) { |
130 | | // The assembly functions do not have defined overflow behavior. While |
131 | | // overflow is almost always a bug in the caller, we prefer our functions to |
132 | | // behave the same across platforms, so divide into multiple calls to avoid |
133 | | // this case. |
134 | 379 | uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]); |
135 | 379 | if (todo > in_len) { |
136 | 379 | todo = in_len; |
137 | 379 | } |
138 | | |
139 | 379 | ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce); |
140 | 379 | in += todo; |
141 | 379 | out += todo; |
142 | 379 | in_len -= todo; |
143 | | |
144 | | // We're either done and will next break out of the loop, or we stopped at |
145 | | // the wraparound point and the counter should continue at zero. |
146 | 379 | counter_nonce[0] = 0; |
147 | 379 | } |
148 | 379 | } |
149 | | |
150 | | #else |
151 | | |
152 | | // chacha_core performs 20 rounds of ChaCha on the input words in |
153 | | // |input| and writes the 64 output bytes to |output|. |
154 | | static void chacha_core(uint8_t output[64], const uint32_t input[16]) { |
155 | | uint32_t x[16]; |
156 | | int i; |
157 | | |
158 | | OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16); |
159 | | for (i = 20; i > 0; i -= 2) { |
160 | | QUARTERROUND(0, 4, 8, 12) |
161 | | QUARTERROUND(1, 5, 9, 13) |
162 | | QUARTERROUND(2, 6, 10, 14) |
163 | | QUARTERROUND(3, 7, 11, 15) |
164 | | QUARTERROUND(0, 5, 10, 15) |
165 | | QUARTERROUND(1, 6, 11, 12) |
166 | | QUARTERROUND(2, 7, 8, 13) |
167 | | QUARTERROUND(3, 4, 9, 14) |
168 | | } |
169 | | |
170 | | for (i = 0; i < 16; ++i) { |
171 | | x[i] += input[i]; |
172 | | } |
173 | | for (i = 0; i < 16; ++i) { |
174 | | CRYPTO_store_u32_le(output + 4 * i, x[i]); |
175 | | } |
176 | | } |
177 | | |
178 | | void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, |
179 | | const uint8_t key[32], const uint8_t nonce[12], |
180 | | uint32_t counter) { |
181 | | assert(!buffers_alias(out, in_len, in, in_len) || in == out); |
182 | | |
183 | | uint32_t input[16]; |
184 | | uint8_t buf[64]; |
185 | | size_t todo, i; |
186 | | |
187 | | input[0] = CRYPTO_load_u32_le(sigma + 0); |
188 | | input[1] = CRYPTO_load_u32_le(sigma + 4); |
189 | | input[2] = CRYPTO_load_u32_le(sigma + 8); |
190 | | input[3] = CRYPTO_load_u32_le(sigma + 12); |
191 | | |
192 | | input[4] = CRYPTO_load_u32_le(key + 0); |
193 | | input[5] = CRYPTO_load_u32_le(key + 4); |
194 | | input[6] = CRYPTO_load_u32_le(key + 8); |
195 | | input[7] = CRYPTO_load_u32_le(key + 12); |
196 | | |
197 | | input[8] = CRYPTO_load_u32_le(key + 16); |
198 | | input[9] = CRYPTO_load_u32_le(key + 20); |
199 | | input[10] = CRYPTO_load_u32_le(key + 24); |
200 | | input[11] = CRYPTO_load_u32_le(key + 28); |
201 | | |
202 | | input[12] = counter; |
203 | | input[13] = CRYPTO_load_u32_le(nonce + 0); |
204 | | input[14] = CRYPTO_load_u32_le(nonce + 4); |
205 | | input[15] = CRYPTO_load_u32_le(nonce + 8); |
206 | | |
207 | | while (in_len > 0) { |
208 | | todo = sizeof(buf); |
209 | | if (in_len < todo) { |
210 | | todo = in_len; |
211 | | } |
212 | | |
213 | | chacha_core(buf, input); |
214 | | for (i = 0; i < todo; i++) { |
215 | | out[i] = in[i] ^ buf[i]; |
216 | | } |
217 | | |
218 | | out += todo; |
219 | | in += todo; |
220 | | in_len -= todo; |
221 | | |
222 | | input[12]++; |
223 | | } |
224 | | } |
225 | | |
226 | | #endif |