/src/hpn-ssh/cipher-chachapoly-libcrypto-mt.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2023 The Board of Trustees of Carnegie Mellon University. |
3 | | * |
4 | | * Author: Mitchell Dorrell <mwd@psc.edu> |
5 | | * Author: Chris Rapier <rapier@psc.edu> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or modify it |
8 | | * under the terms of the MIT License. |
9 | | * |
10 | | * This library is distributed in the hope that it will be useful, but WITHOUT |
11 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
12 | | * FITNESS FOR A PARTICULAR PURPOSE. See the MIT License for more details. |
13 | | * |
14 | | * You should have received a copy of the MIT License along with this library; |
15 | | * if not, see http://opensource.org/licenses/MIT. |
16 | | * |
17 | | */ |
18 | | |
19 | | /* TODO: audit includes */ |
20 | | |
21 | | #include "includes.h" |
22 | | #ifdef WITH_OPENSSL |
23 | | #include "openbsd-compat/openssl-compat.h" |
24 | | #endif |
25 | | |
26 | | #if defined(HAVE_EVP_CHACHA20) && !defined(HAVE_BROKEN_CHACHA20) |
27 | | |
28 | | #include <sys/types.h> |
29 | | #include <unistd.h> /* needed for getpid under C99 */ |
30 | | #include <stdarg.h> /* needed for log.h */ |
31 | | #include <string.h> |
32 | | #include <stdio.h> /* needed for misc.h */ |
33 | | #include <pthread.h> |
34 | | |
35 | | #include <openssl/evp.h> |
36 | | |
37 | | #include "defines.h" |
38 | | #include "log.h" |
39 | | #include "sshbuf.h" |
40 | | #include "ssherr.h" |
41 | | |
42 | | #include "xmalloc.h" |
43 | | #include "cipher-chachapoly.h" |
44 | | #include "cipher-chachapoly-libcrypto-mt.h" |
45 | | |
46 | | #ifndef likely |
47 | | # define likely(x) __builtin_expect(!!(x), 1) |
48 | | #endif |
49 | | #ifndef unlikely |
50 | 0 | # define unlikely(x) __builtin_expect(!!(x), 0) |
51 | | #endif |
52 | | |
53 | | /* Size of keystream to pregenerate, measured in bytes |
54 | | * we want to round up to the nearest chacha block and have |
55 | | * 128 bytes for overhead */ |
56 | 0 | #define ROUND_UP(x,y) (((((x)-1)/(y))+1)*(y)) |
57 | 0 | #define KEYSTREAMLEN (ROUND_UP((SSH_IOBUFSZ) + 128, (CHACHA_BLOCKLEN))) |
58 | | |
59 | | /* BEGIN TUNABLES */ |
60 | | |
61 | | /* Number of worker threads to spawn. */ |
62 | | /* the goal is to ensure that main is never |
63 | | * waiting on the worker threads for keystream data */ |
64 | 0 | #define NUMTHREADS 1 |
65 | | |
66 | | /* 64 seems to be a pretty blance between memory and performance |
67 | | * 128 is another option with somewhat higher memory consumption */ |
68 | 0 | #define NUMSTREAMS 64 |
69 | | |
70 | | /* END TUNABLES */ |
71 | | |
72 | | struct mt_keystream { |
73 | | u_char poly_key[POLY1305_KEYLEN]; /* POLY1305_KEYLEN == 32 */ |
74 | | u_char headerStream[CHACHA_BLOCKLEN]; /* CHACHA_BLOCKLEN == 64 */ |
75 | | u_char mainStream[KEYSTREAMLEN]; /* KEYSTREAMLEN == 32768 */ |
76 | | }; |
77 | | |
78 | | struct threadData { |
79 | | EVP_CIPHER_CTX * main_evp; |
80 | | EVP_CIPHER_CTX * header_evp; |
81 | | u_char seqbuf[16]; |
82 | | }; |
83 | | |
84 | | struct mt_keystream_batch { |
85 | | u_int batchID; |
86 | | struct threadData tds[NUMTHREADS]; |
87 | | struct mt_keystream streams[NUMSTREAMS]; |
88 | | }; |
89 | | |
90 | | struct chachapoly_ctx_mt { |
91 | | u_int seqnr; |
92 | | u_int batchID; |
93 | | |
94 | | struct mt_keystream_batch batches[2]; |
95 | | |
96 | | pthread_t manager_tid[2]; |
97 | | pthread_t self_tid; |
98 | | |
99 | | pid_t mainpid; |
100 | | u_char zeros[KEYSTREAMLEN]; /* KEYSTREAMLEN == 32768 */ |
101 | | |
102 | | /* if OpenSSL has support for Poly1305 in the MAC EVPs |
103 | | * use that (OSSL >= 3.0) if not then it's OSSL 1.1 so |
104 | | * use the Poly1305 digest methods. Failing that use the |
105 | | * internal poly1305 methods */ |
106 | | #ifdef OPENSSL_HAVE_POLY_EVP |
107 | | EVP_MAC_CTX *poly_ctx; |
108 | | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
109 | | EVP_PKEY_CTX *poly_ctx; |
110 | | EVP_MD_CTX *md_ctx; |
111 | | EVP_PKEY *pkey; |
112 | | size_t ptaglen; |
113 | | #else |
114 | | char *poly_ctx; |
115 | | #endif |
116 | | }; |
117 | | |
118 | | struct manager_thread_args { |
119 | | struct chachapoly_ctx_mt * ctx_mt; |
120 | | u_int oldBatchID; |
121 | | int retval; |
122 | | }; |
123 | | |
124 | | struct worker_thread_args { |
125 | | u_int batchID; |
126 | | struct mt_keystream_batch * batch; |
127 | | int threadIndex; |
128 | | u_char * zeros; |
129 | | int retval; |
130 | | }; |
131 | | |
132 | | /* generate the keystream and header |
133 | | * we use nulls for the "data" (the zeros variable) in order to |
134 | | * get the raw keystream |
135 | | * Returns 0 on success and -1 on failure */ |
136 | | int |
137 | | generate_keystream(struct mt_keystream * ks, u_int seqnr, |
138 | | struct threadData * td, u_char * zeros) |
139 | 0 | { |
140 | | /* generate poly1305 key */ |
141 | 0 | memset(td->seqbuf, 0, sizeof(td->seqbuf)); |
142 | 0 | POKE_U64(td->seqbuf + 8, seqnr); |
143 | 0 | memset(ks->poly_key , 0, sizeof(ks->poly_key)); |
144 | 0 | if (!EVP_CipherInit(td->main_evp, NULL, NULL, td->seqbuf, 1) || |
145 | 0 | EVP_Cipher(td->main_evp, ks->poly_key, ks->poly_key, |
146 | 0 | sizeof(ks->poly_key)) < 0) |
147 | 0 | return -1; |
148 | | |
149 | | /* generate header keystream for encrypting payload length */ |
150 | 0 | if (!EVP_CipherInit(td->header_evp, NULL, NULL, td->seqbuf, 1) || |
151 | 0 | EVP_Cipher(td->header_evp, ks->headerStream, zeros, CHACHA_BLOCKLEN) |
152 | 0 | < 0 ) |
153 | 0 | return -1; |
154 | | |
155 | | /* generate main keystream for encrypting payload */ |
156 | 0 | td->seqbuf[0] = 1; |
157 | 0 | if (!EVP_CipherInit(td->main_evp, NULL, NULL, td->seqbuf, 1) || |
158 | 0 | EVP_Cipher(td->main_evp, ks->mainStream, zeros, KEYSTREAMLEN) < 0) |
159 | 0 | return -1; |
160 | | |
161 | 0 | return 0; |
162 | 0 | } |
163 | | |
164 | | /* free the EVP contexts associated with the give thread */ |
165 | | void |
166 | | free_threadData(struct threadData * td) |
167 | 0 | { |
168 | 0 | if (td == NULL) |
169 | 0 | return; |
170 | 0 | if (td->main_evp) /* false if initialization didn't get this far */ |
171 | 0 | EVP_CIPHER_CTX_free(td->main_evp); |
172 | 0 | if (td->header_evp) /* false if initialization didn't get this far */ |
173 | 0 | EVP_CIPHER_CTX_free(td->header_evp); |
174 | 0 | explicit_bzero(td, sizeof(*td)); |
175 | 0 | } |
176 | | |
177 | | /* initialize the EVPs used by the worker thread |
178 | | Returns 0 on success and -1 on failure */ |
179 | | int |
180 | | initialize_threadData(struct threadData * td, const u_char *key) |
181 | 0 | { |
182 | 0 | memset(td,0,sizeof(*td)); |
183 | 0 | if ((td->main_evp = EVP_CIPHER_CTX_new()) == NULL || |
184 | 0 | (td->header_evp = EVP_CIPHER_CTX_new()) == NULL) |
185 | 0 | goto fail; |
186 | 0 | if (!EVP_CipherInit(td->main_evp, EVP_chacha20(), key, NULL, 1)) |
187 | 0 | goto fail; |
188 | 0 | if (!EVP_CipherInit(td->header_evp, EVP_chacha20(), key + 32, NULL, 1)) |
189 | 0 | goto fail; |
190 | 0 | if (EVP_CIPHER_CTX_iv_length(td->header_evp) != 16) |
191 | 0 | goto fail; |
192 | 0 | return 0; |
193 | 0 | fail: |
194 | 0 | free_threadData(td); |
195 | 0 | return -1; |
196 | 0 | } |
197 | | |
198 | | struct worker_thread_args * |
199 | | worker_thread(struct worker_thread_args * args) |
200 | 0 | { |
201 | | /* check first */ |
202 | 0 | if (args == NULL) |
203 | 0 | return NULL; |
204 | 0 | if (args->batch == NULL || args->zeros == NULL) { |
205 | 0 | args->retval = 1; |
206 | 0 | return args; |
207 | 0 | } |
208 | | |
209 | 0 | int threadIndex = args->threadIndex; |
210 | 0 | struct threadData * td = &(args->batch->tds[threadIndex]); |
211 | 0 | u_int refseqnr = args->batchID * NUMSTREAMS; |
212 | |
|
213 | 0 | for (int i = threadIndex; i < NUMSTREAMS; i += NUMTHREADS) { |
214 | 0 | if (generate_keystream(&(args->batch->streams[i]), refseqnr + i, |
215 | 0 | td, args->zeros) == -1) { |
216 | 0 | args->retval = 1; |
217 | 0 | return args; |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | 0 | args->retval = 0; |
222 | 0 | return args; |
223 | 0 | } |
224 | | |
225 | | int |
226 | | join_manager_thread(pthread_t manager_tid) |
227 | 0 | { |
228 | 0 | struct manager_thread_args * args; |
229 | 0 | if (pthread_join(manager_tid, (void **) &args) == 0) { |
230 | 0 | if (args == NULL) { |
231 | 0 | debug_f("Manager thread returned NULL!"); |
232 | 0 | return 1; |
233 | 0 | } else if (args == PTHREAD_CANCELED) { |
234 | 0 | debug_f("Manager thread canceled!"); |
235 | 0 | return 1; |
236 | 0 | } else if (args->retval != 0) { |
237 | 0 | debug_f("Manager thread error (%d)", args->retval); |
238 | 0 | free(args); |
239 | 0 | return 1; |
240 | 0 | } else { |
241 | 0 | free(args); |
242 | 0 | return 0; |
243 | 0 | } |
244 | 0 | } else { |
245 | 0 | debug_f("pthread_join error!"); |
246 | 0 | return 1; |
247 | 0 | } |
248 | 0 | } |
249 | | |
250 | | void |
251 | | chachapoly_free_mt(struct chachapoly_ctx_mt * ctx_mt) |
252 | 0 | { |
253 | 0 | if (ctx_mt == NULL) |
254 | 0 | return; |
255 | | |
256 | | #ifdef OPENSSL_HAVE_POLY_EVP |
257 | | if (ctx_mt->poly_ctx != NULL) { |
258 | | EVP_MAC_CTX_free(ctx_mt->poly_ctx); |
259 | | ctx_mt->poly_ctx = NULL; |
260 | | } |
261 | | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
262 | 0 | if (ctx_mt->md_ctx != NULL) { |
263 | 0 | EVP_MD_CTX_free(ctx_mt->md_ctx); |
264 | 0 | ctx_mt->md_ctx = NULL; |
265 | 0 | } |
266 | 0 | if (ctx_mt->pkey != NULL) { |
267 | 0 | EVP_PKEY_free(ctx_mt->pkey); |
268 | 0 | ctx_mt->pkey = NULL; |
269 | 0 | } |
270 | 0 | #endif |
271 | | |
272 | | /* |
273 | | * Only cleanup the manager threads if we are the PID that initialized |
274 | | * them! If we're a fork, the threads don't really exist. |
275 | | */ |
276 | |
|
277 | 0 | if (getpid() == ctx_mt->mainpid) { |
278 | 0 | if (ctx_mt->manager_tid[0] != ctx_mt->self_tid) { |
279 | 0 | join_manager_thread(ctx_mt->manager_tid[0]); |
280 | 0 | ctx_mt->manager_tid[0] = ctx_mt->self_tid; |
281 | 0 | } |
282 | 0 | if (ctx_mt->manager_tid[1] != ctx_mt->self_tid) { |
283 | 0 | join_manager_thread(ctx_mt->manager_tid[1]); |
284 | 0 | ctx_mt->manager_tid[1] = ctx_mt->self_tid; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | /* Cleanup thread data structures. */ |
289 | 0 | for (int i=0; i<2; i++) |
290 | 0 | for (int j=0; j<NUMTHREADS; j++) |
291 | 0 | free_threadData(&(ctx_mt->batches[i].tds[j])); |
292 | | |
293 | | /* Zero and free the whole multithreaded cipher context. */ |
294 | 0 | freezero(ctx_mt, sizeof(*ctx_mt)); |
295 | |
|
296 | 0 | return; |
297 | 0 | } |
298 | | |
299 | | struct chachapoly_ctx_mt * |
300 | | chachapoly_new_mt(u_int startseqnr, const u_char * key, u_int keylen) |
301 | 0 | { |
302 | 0 | struct chachapoly_ctx_mt * ctx_mt = xmalloc(sizeof(*ctx_mt)); |
303 | 0 | memset(ctx_mt, 0, sizeof(*ctx_mt)); |
304 | | /* Initialize the sequence number. When rekeying, this won't be zero. */ |
305 | 0 | ctx_mt->seqnr = startseqnr; |
306 | 0 | ctx_mt->batchID = startseqnr / NUMSTREAMS; |
307 | 0 | struct threadData mainData; |
308 | 0 | int tDataI; |
309 | 0 | int genKSfailed = 0; |
310 | |
|
311 | | #ifdef OPENSSL_HAVE_POLY_EVP |
312 | | EVP_MAC *mac = NULL; |
313 | | if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL) |
314 | | goto fail; |
315 | | if ((ctx_mt->poly_ctx = EVP_MAC_CTX_new(mac)) == NULL) |
316 | | goto fail; |
317 | | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
318 | 0 | if ((ctx_mt->md_ctx = EVP_MD_CTX_new()) == NULL) |
319 | 0 | goto fail; |
320 | 0 | if ((ctx_mt->pkey = EVP_PKEY_new_mac_key(EVP_PKEY_POLY1305, NULL, |
321 | 0 | ctx_mt->zeros, POLY1305_KEYLEN)) == NULL) |
322 | 0 | goto fail; |
323 | 0 | if (EVP_DigestSignInit(ctx_mt->md_ctx, &ctx_mt->poly_ctx, NULL, NULL, |
324 | 0 | ctx_mt->pkey) == 0) |
325 | 0 | goto fail; |
326 | | #else |
327 | | ctx_mt->poly_ctx = NULL; |
328 | | #endif |
329 | | |
330 | 0 | ctx_mt->batches[ctx_mt->batchID % 2].batchID = ctx_mt->batchID; |
331 | 0 | ctx_mt->batches[(ctx_mt->batchID + 1) % 2].batchID = |
332 | 0 | ctx_mt->batchID + 1; |
333 | | |
334 | | /* initialize batches[0] tds */ |
335 | 0 | for (tDataI = 0; tDataI < NUMTHREADS; tDataI++) { |
336 | 0 | if (initialize_threadData(&(ctx_mt->batches[0].tds[tDataI]), |
337 | 0 | key) != 0) |
338 | 0 | break; |
339 | 0 | } |
340 | 0 | if (tDataI < NUMTHREADS) { |
341 | | /* Backtrack starting with 'tDataI - 1' */ |
342 | 0 | for (tDataI--; tDataI >= 0; tDataI--) |
343 | 0 | free_threadData(&(ctx_mt->batches[0].tds[tDataI])); |
344 | 0 | goto fail; |
345 | 0 | } |
346 | | /* initialize batches[1] tds */ |
347 | 0 | for (tDataI = 0; tDataI < NUMTHREADS; tDataI++) { |
348 | 0 | if (initialize_threadData(&(ctx_mt->batches[1].tds[tDataI]), |
349 | 0 | key) != 0) |
350 | 0 | break; |
351 | 0 | } |
352 | 0 | if (tDataI < NUMTHREADS) { |
353 | | /* Backtrack starting with 'tDataI - 1' */ |
354 | 0 | for (tDataI--; tDataI >= 0; tDataI--) |
355 | 0 | free_threadData(&(ctx_mt->batches[1].tds[tDataI])); |
356 | | /* Free the batches[0] tds too */ |
357 | 0 | for (tDataI = NUMTHREADS; tDataI >= 0; tDataI--) |
358 | 0 | free_threadData(&(ctx_mt->batches[0].tds[tDataI])); |
359 | 0 | goto fail; |
360 | 0 | } |
361 | | |
362 | 0 | if (initialize_threadData(&mainData, key) != 0) { |
363 | 0 | chachapoly_free_mt(ctx_mt); |
364 | 0 | explicit_bzero(&startseqnr, sizeof(startseqnr)); |
365 | 0 | return NULL; |
366 | 0 | } |
367 | | |
368 | 0 | for (int i=0; i<2; i++) { |
369 | 0 | u_int refseqnr = ctx_mt->batches[i].batchID * NUMSTREAMS; |
370 | 0 | for (int j = startseqnr > refseqnr ? startseqnr - refseqnr : 0; |
371 | 0 | j<NUMSTREAMS; j++) { |
372 | 0 | if (generate_keystream(&(ctx_mt->batches[i].streams[j]), |
373 | 0 | refseqnr + j, &mainData, ctx_mt->zeros) == -1) { |
374 | 0 | debug_f("generate_keystream failed in " |
375 | 0 | "chacha20-poly1305@hpnssh.org"); |
376 | 0 | genKSfailed = 1; |
377 | 0 | break; /* imperfect, but it helps */ |
378 | 0 | } |
379 | 0 | } |
380 | 0 | } |
381 | |
|
382 | 0 | free_threadData(&mainData); |
383 | |
|
384 | 0 | if (genKSfailed != 0) { |
385 | 0 | chachapoly_free_mt(ctx_mt); |
386 | 0 | explicit_bzero(&startseqnr, sizeof(startseqnr)); |
387 | 0 | return NULL; |
388 | 0 | } |
389 | | |
390 | | /* Store the PID so that in the future, we can tell if we're a fork */ |
391 | 0 | ctx_mt->mainpid = getpid(); |
392 | 0 | ctx_mt->self_tid = pthread_self(); |
393 | 0 | ctx_mt->manager_tid[0] = ctx_mt->self_tid; |
394 | 0 | ctx_mt->manager_tid[1] = ctx_mt->self_tid; |
395 | | /* was reporting the TID using gettid() but it's not portable */ |
396 | 0 | debug2_f("<main thread: pid=%u, ptid=0x%lx>", getpid(), pthread_self()); |
397 | | |
398 | | /* Success! */ |
399 | 0 | explicit_bzero(&startseqnr, sizeof(startseqnr)); |
400 | 0 | return ctx_mt; |
401 | | |
402 | 0 | fail: |
403 | | #ifdef OPENSSL_HAVE_POLY_EVP |
404 | | if (ctx_mt->poly_ctx != NULL) { |
405 | | EVP_MAC_CTX_free(ctx_mt->poly_ctx); |
406 | | ctx_mt->poly_ctx = NULL; |
407 | | } |
408 | | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
409 | 0 | if (ctx_mt->md_ctx != NULL) { |
410 | 0 | EVP_MD_CTX_free(ctx_mt->md_ctx); |
411 | 0 | ctx_mt->md_ctx = NULL; |
412 | 0 | } |
413 | 0 | if (ctx_mt->pkey != NULL) { |
414 | 0 | EVP_PKEY_free(ctx_mt->pkey); |
415 | 0 | ctx_mt->pkey = NULL; |
416 | 0 | } |
417 | 0 | #endif |
418 | 0 | freezero(ctx_mt, sizeof(*ctx_mt)); |
419 | 0 | explicit_bzero(&startseqnr, sizeof(startseqnr)); |
420 | 0 | return NULL; |
421 | 0 | } |
422 | | |
423 | | /* Portable FastXOR using memcpy to avoid strict-aliasing and alignment UB. |
424 | | * Uses 128-bit chunks on compilers that support __uint128_t (GCC/Clang on |
425 | | * 64-bit platforms), falls back to uint64_t elsewhere. */ |
426 | | static inline void |
427 | | fastXOR2(u_char *dest, const u_char *src, const u_char *keystream, u_int len) |
428 | 0 | { |
429 | 0 | #if defined(__SIZEOF_INT128__) |
430 | 0 | typedef __uint128_t chunk; |
431 | | #else |
432 | | typedef uint64_t chunk; |
433 | | #endif |
434 | 0 | size_t i = 0; |
435 | 0 | while (i + sizeof(chunk) <= len) { |
436 | 0 | chunk a, b, r; |
437 | 0 | memcpy(&a, src + i, sizeof(a)); |
438 | 0 | memcpy(&b, keystream + i, sizeof(b)); |
439 | 0 | r = a ^ b; |
440 | 0 | memcpy(dest + i, &r, sizeof(r)); |
441 | 0 | i += sizeof(chunk); |
442 | 0 | } |
443 | 0 | while (i < len) { |
444 | 0 | dest[i] = src[i] ^ keystream[i]; |
445 | 0 | i++; |
446 | 0 | } |
447 | 0 | } |
448 | | |
449 | | struct manager_thread_args * |
450 | 0 | manager_thread(struct manager_thread_args * margs) { |
451 | | /* make sure we have valid data before proceeding */ |
452 | 0 | if (margs == NULL) |
453 | 0 | return NULL; |
454 | | |
455 | 0 | struct chachapoly_ctx_mt * ctx_mt = margs->ctx_mt; |
456 | 0 | if (ctx_mt == NULL) { |
457 | 0 | margs->retval = 1; |
458 | 0 | return margs; |
459 | 0 | } |
460 | | |
461 | 0 | u_int oldBatchID = margs->oldBatchID; |
462 | |
|
463 | 0 | struct mt_keystream_batch * batch = &(ctx_mt->batches[oldBatchID % 2]); |
464 | 0 | if (batch->batchID != oldBatchID) { |
465 | 0 | debug_f("Post-crypt batch miss! Seeking %u, found %u. Failing.", |
466 | 0 | oldBatchID, batch->batchID); |
467 | 0 | margs->retval = 1; |
468 | 0 | return margs; |
469 | 0 | } |
470 | | |
471 | 0 | margs->retval = 0; |
472 | 0 | u_int batchID = oldBatchID + 2; |
473 | |
|
474 | 0 | pthread_t tid[NUMTHREADS]; |
475 | 0 | struct worker_thread_args * wargs = malloc(NUMTHREADS * sizeof(*wargs)); |
476 | 0 | int ti; |
477 | |
|
478 | 0 | for (ti = 0; ti < NUMTHREADS; ti++) { |
479 | 0 | wargs[ti].batchID = batchID; |
480 | 0 | wargs[ti].batch = batch; |
481 | 0 | wargs[ti].threadIndex = ti; |
482 | 0 | wargs[ti].zeros = ctx_mt->zeros; |
483 | 0 | if (pthread_create(&(tid[ti]), NULL, (void *) worker_thread, |
484 | 0 | &(wargs[ti])) != 0) { |
485 | 0 | margs->retval = 1; |
486 | 0 | break; |
487 | 0 | } |
488 | 0 | } |
489 | 0 | for (; ti < NUMTHREADS; ti++) /* for error condition */ |
490 | 0 | tid[ti] = pthread_self(); |
491 | |
|
492 | 0 | struct worker_thread_args * retwargs; |
493 | |
|
494 | 0 | for (ti = 0; ti < NUMTHREADS; ti++) { |
495 | 0 | if (tid[ti] == pthread_self()) { |
496 | 0 | margs->retval = 1; /* redundant, but harmless */ |
497 | 0 | continue; |
498 | 0 | } |
499 | 0 | if (pthread_join(tid[ti], (void **) &retwargs) == 0) { |
500 | 0 | if (retwargs == NULL) { |
501 | 0 | debug_f("Worker thread returned NULL!"); |
502 | 0 | margs->retval = 1; |
503 | 0 | } else if (retwargs == PTHREAD_CANCELED) { |
504 | 0 | debug_f("Worker thread canceled!"); |
505 | 0 | margs->retval = 1; |
506 | 0 | } else { |
507 | 0 | if (retwargs->retval != 0) { |
508 | 0 | debug_f("Worker thread error (%d)", |
509 | 0 | retwargs->retval); |
510 | 0 | margs->retval = 1; |
511 | 0 | } |
512 | 0 | if (retwargs != &(wargs[ti])) { |
513 | 0 | debug_f("Worker thread didn't return " |
514 | 0 | "expected structure!"); |
515 | 0 | margs->retval = 1; |
516 | 0 | } |
517 | 0 | } |
518 | 0 | } else { |
519 | 0 | debug_f("pthread_join error!"); |
520 | 0 | margs->retval = 1; |
521 | 0 | } |
522 | 0 | } |
523 | 0 | free(wargs); |
524 | |
|
525 | 0 | if (margs->retval == 0) { |
526 | 0 | batch->batchID = batchID; |
527 | 0 | } |
528 | |
|
529 | 0 | return margs; |
530 | 0 | } |
531 | | |
532 | | int |
533 | | chachapoly_crypt_mt(struct chachapoly_ctx_mt *ctx_mt, u_int seqnr, u_char *dest, |
534 | | const u_char *src, u_int len, u_int aadlen, u_int authlen, int do_encrypt) |
535 | 0 | { |
536 | | #ifdef SAFETY |
537 | | if (ctx_mt->mainpid != getpid()) { /* we're a fork */ |
538 | | /* |
539 | | * TODO: this is EXTREMELY RARE, may never happen at all (only |
540 | | * if the fork calls crypt), so we should tell the compiler. |
541 | | */ |
542 | | /* The worker threads don't exist, we could spawn them? */ |
543 | | debug_f("Fork called crypt without workers!"); |
544 | | chachapoly_free_mt(ctx_mt); |
545 | | return SSH_ERR_INTERNAL_ERROR; |
546 | | } |
547 | | #endif |
548 | |
|
549 | 0 | pthread_t * manager_tid = &(ctx_mt->manager_tid[ctx_mt->batchID % 2]); |
550 | 0 | if (unlikely(*manager_tid != ctx_mt->self_tid)) { |
551 | 0 | int ret = join_manager_thread(*manager_tid); |
552 | 0 | *manager_tid = ctx_mt->self_tid; |
553 | 0 | if (ret != 0) |
554 | 0 | return SSH_ERR_INTERNAL_ERROR; |
555 | 0 | } |
556 | | |
557 | 0 | struct mt_keystream_batch * batch = |
558 | 0 | &(ctx_mt->batches[ctx_mt->batchID % 2]); |
559 | |
|
560 | 0 | struct mt_keystream * ks = &(batch->streams[seqnr % NUMSTREAMS]); |
561 | |
|
562 | 0 | int r = SSH_ERR_INTERNAL_ERROR; |
563 | |
|
564 | | #ifdef SAFETY |
565 | | if (batch->batchID == ctx_mt->batchID) { /* Safety check */ |
566 | | #endif |
567 | | /* check tag before anything else */ |
568 | 0 | if (!do_encrypt) { |
569 | 0 | const u_char *tag = src + aadlen + len; |
570 | 0 | u_char expected_tag[POLY1305_TAGLEN]; |
571 | 0 | #if !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
572 | 0 | if ((EVP_PKEY_CTX_ctrl(ctx_mt->poly_ctx, -1, |
573 | 0 | EVP_PKEY_OP_SIGNCTX, EVP_PKEY_CTRL_SET_MAC_KEY, |
574 | 0 | POLY1305_KEYLEN, ks->poly_key) <= 0) || |
575 | 0 | (EVP_DigestSignUpdate(ctx_mt->md_ctx, src, aadlen + len) == 0)) { |
576 | 0 | debug_f("SSL error while decrypting poly1305 tag"); |
577 | 0 | return SSH_ERR_INTERNAL_ERROR; |
578 | 0 | } |
579 | 0 | ctx_mt->ptaglen = POLY1305_TAGLEN; |
580 | 0 | if (EVP_DigestSignFinal(ctx_mt->md_ctx, expected_tag, |
581 | 0 | &ctx_mt->ptaglen) == 0) { |
582 | 0 | debug_f("SSL error while finalizing decyrpted poly1305"); |
583 | 0 | return SSH_ERR_INTERNAL_ERROR; |
584 | 0 | } |
585 | | #else |
586 | | poly1305_auth(ctx_mt->poly_ctx, expected_tag, src, |
587 | | aadlen + len, ks->poly_key); |
588 | | #endif |
589 | 0 | if (timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN) |
590 | 0 | != 0) |
591 | 0 | r = SSH_ERR_MAC_INVALID; |
592 | 0 | explicit_bzero(expected_tag, sizeof(expected_tag)); |
593 | 0 | } |
594 | 0 | if (r != SSH_ERR_MAC_INVALID) { |
595 | | /* Crypt additional data (i.e., packet length) */ |
596 | | /* TODO: is aadlen always four bytes? */ |
597 | | /* TODO: do we always have an aadlen? */ |
598 | 0 | if (aadlen) |
599 | 0 | for (u_int i=0; i<aadlen; i++) |
600 | 0 | dest[i] = ks->headerStream[i] ^ src[i]; |
601 | | /* Crypt payload */ |
602 | 0 | fastXOR2(dest+aadlen,src+aadlen,ks->mainStream,len); |
603 | | /* calculate and append tag */ |
604 | 0 | #if !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305) |
605 | 0 | if (do_encrypt) { |
606 | 0 | if ((EVP_PKEY_CTX_ctrl(ctx_mt->poly_ctx, -1, |
607 | 0 | EVP_PKEY_OP_SIGNCTX, EVP_PKEY_CTRL_SET_MAC_KEY, |
608 | 0 | POLY1305_KEYLEN, ks->poly_key) <=0) || |
609 | 0 | (EVP_DigestSignUpdate(ctx_mt->md_ctx, dest, aadlen + len) == 0)) { |
610 | 0 | debug_f ("SSL error while encrypting poly1305 tag"); |
611 | 0 | return SSH_ERR_INTERNAL_ERROR; |
612 | 0 | } |
613 | 0 | ctx_mt->ptaglen = POLY1305_TAGLEN; |
614 | 0 | if (EVP_DigestSignFinal(ctx_mt->md_ctx, dest+aadlen+len, |
615 | 0 | &ctx_mt->ptaglen) == 0) { |
616 | 0 | debug_f("SSL error while finalizing decyrpted poly1305"); |
617 | 0 | return SSH_ERR_INTERNAL_ERROR; |
618 | 0 | } |
619 | 0 | } |
620 | | #else |
621 | | if (do_encrypt) |
622 | | poly1305_auth(ctx_mt->poly_ctx, dest+aadlen+len, |
623 | | dest, aadlen+len, ks->poly_key); |
624 | | #endif |
625 | 0 | r=0; /* Success! */ |
626 | 0 | } |
627 | 0 | if (r) /* Anything nonzero is an error. */ |
628 | 0 | return r; |
629 | | |
630 | 0 | ctx_mt->seqnr = seqnr + 1; |
631 | |
|
632 | 0 | if (unlikely(ctx_mt->seqnr / NUMSTREAMS > ctx_mt->batchID)) { |
633 | 0 | struct manager_thread_args * args = |
634 | 0 | malloc(sizeof(*args)); |
635 | 0 | if (args == NULL) { |
636 | 0 | return SSH_ERR_INTERNAL_ERROR; |
637 | 0 | } |
638 | 0 | args->ctx_mt = ctx_mt; |
639 | 0 | args->oldBatchID = ctx_mt->batchID; |
640 | 0 | if (pthread_create(&(ctx_mt->manager_tid[ctx_mt->batchID |
641 | 0 | % 2]), NULL, (void *) manager_thread, args) != 0) { |
642 | 0 | free(args); |
643 | 0 | return SSH_ERR_INTERNAL_ERROR; |
644 | 0 | } |
645 | 0 | ctx_mt->batchID = ctx_mt->seqnr / NUMSTREAMS; |
646 | 0 | } |
647 | | |
648 | | /* TODO: Nothing we need to sanitize here? */ |
649 | | |
650 | 0 | return 0; |
651 | | #ifdef SAFETY |
652 | | } else { /* Bad, it's the wrong batch. */ |
653 | | debug_f( "Pre-crypt batch miss! Seeking %u, found %u. Failing.", |
654 | | ctx_mt->batchID, batch->batchID); |
655 | | return SSH_ERR_INTERNAL_ERROR; |
656 | | } |
657 | | #endif |
658 | 0 | } |
659 | | |
660 | | int |
661 | | chachapoly_get_length_mt(struct chachapoly_ctx_mt *ctx_mt, u_int *plenp, |
662 | | u_int seqnr, const u_char *cp, u_int len) |
663 | 0 | { |
664 | | /* TODO: add compiler hints */ |
665 | | #ifdef SAFETY |
666 | | if (ctx_mt->mainpid != getpid()) { /* Use serial mode if we're a fork */ |
667 | | debug_f("We're a fork. Failing."); |
668 | | return SSH_ERR_INTERNAL_ERROR; |
669 | | } |
670 | | #endif |
671 | |
|
672 | 0 | if (len < 4) |
673 | 0 | return SSH_ERR_MESSAGE_INCOMPLETE; |
674 | | |
675 | 0 | pthread_t * manager_tid = &(ctx_mt->manager_tid[ctx_mt->batchID % 2]); |
676 | 0 | if (unlikely(*manager_tid != ctx_mt->self_tid)) { |
677 | 0 | int ret = join_manager_thread(*manager_tid); |
678 | 0 | *manager_tid = ctx_mt->self_tid; |
679 | 0 | if (ret != 0) |
680 | 0 | return SSH_ERR_INTERNAL_ERROR; |
681 | 0 | } |
682 | | |
683 | 0 | u_char buf[4]; |
684 | | #ifdef SAFETY |
685 | | u_int sought_batchID = seqnr / NUMSTREAMS; |
686 | | #endif |
687 | 0 | struct mt_keystream_batch * batch = |
688 | 0 | &(ctx_mt->batches[ctx_mt->batchID % 2]); |
689 | 0 | struct mt_keystream * ks = &(batch->streams[seqnr % NUMSTREAMS]); |
690 | | #ifdef SAFETY |
691 | | if (batch->batchID == sought_batchID) { |
692 | | #endif |
693 | 0 | for (u_int i=0; i < sizeof(buf); i++) |
694 | 0 | buf[i]=ks->headerStream[i] ^ cp[i]; |
695 | 0 | *plenp = PEEK_U32(buf); |
696 | 0 | return 0; |
697 | | #ifdef SAFETY |
698 | | } else { |
699 | | debug_f("Batch miss! Seeking %u, found %u. Failing.", |
700 | | sought_batchID, batch->batchID); |
701 | | return SSH_ERR_INTERNAL_ERROR; |
702 | | } |
703 | | #endif |
704 | 0 | } |
705 | | #endif /* defined(HAVE_EVP_CHACHA20) && !defined(HAVE_BROKEN_CHACHA20) */ |