/src/hpn-ssh/cipher-chachapoly-libcrypto-mt.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright (c) 2023 The Board of Trustees of Carnegie Mellon University.  | 
3  |  |  *  | 
4  |  |  *  Author: Mitchell Dorrell <mwd@psc.edu>  | 
5  |  |  *  Author: Chris Rapier  <rapier@psc.edu>  | 
6  |  |  *  | 
7  |  |  * This library is free software; you can redistribute it and/or modify it  | 
8  |  |  * under the terms of the MIT License.  | 
9  |  |  *  | 
10  |  |  * This library is distributed in the hope that it will be useful, but WITHOUT  | 
11  |  |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or  | 
12  |  |  * FITNESS FOR A PARTICULAR PURPOSE.  See the MIT License for more details.  | 
13  |  |  *  | 
14  |  |  * You should have received a copy of the MIT License along with this library;  | 
15  |  |  * if not, see http://opensource.org/licenses/MIT.  | 
16  |  |  *  | 
17  |  |  */  | 
18  |  |  | 
19  |  | /* TODO: audit includes */  | 
20  |  |  | 
21  |  | #include "includes.h"  | 
22  |  | #ifdef WITH_OPENSSL  | 
23  |  | #include "openbsd-compat/openssl-compat.h"  | 
24  |  | #endif  | 
25  |  |  | 
26  |  | #if defined(HAVE_EVP_CHACHA20) && !defined(HAVE_BROKEN_CHACHA20)  | 
27  |  |  | 
28  |  | #include <sys/types.h>  | 
29  |  | #include <unistd.h> /* needed for getpid under C99 */  | 
30  |  | #include <stdarg.h> /* needed for log.h */  | 
31  |  | #include <string.h>  | 
32  |  | #include <stdio.h>  /* needed for misc.h */  | 
33  |  | #include <pthread.h>  | 
34  |  |  | 
35  |  | #include <openssl/evp.h>  | 
36  |  |  | 
37  |  | #include "defines.h"  | 
38  |  | #include "log.h"  | 
39  |  | #include "sshbuf.h"  | 
40  |  | #include "ssherr.h"  | 
41  |  |  | 
42  |  | #include "xmalloc.h"  | 
43  |  | #include "cipher-chachapoly.h"  | 
44  |  | #include "cipher-chachapoly-libcrypto-mt.h"  | 
45  |  |  | 
46  |  | #ifndef likely  | 
47  |  | # define likely(x)   __builtin_expect(!!(x), 1)  | 
48  |  | #endif  | 
49  |  | #ifndef unlikely  | 
50  | 0  | # define unlikely(x) __builtin_expect(!!(x), 0)  | 
51  |  | #endif  | 
52  |  |  | 
53  |  | /* Size of keystream to pregenerate, measured in bytes  | 
54  |  |  * we want to round up to the nearest chacha block and have  | 
55  |  |  * 128 bytes for overhead */  | 
56  | 0  | #define ROUND_UP(x,y) (((((x)-1)/(y))+1)*(y))  | 
57  | 0  | #define KEYSTREAMLEN (ROUND_UP((SSH_IOBUFSZ) + 128, (CHACHA_BLOCKLEN)))  | 
58  |  |  | 
59  |  | /* BEGIN TUNABLES */  | 
60  |  |  | 
61  |  | /* Number of worker threads to spawn. */  | 
62  |  | /* the goal is to ensure that main is never  | 
63  |  |  * waiting on the worker threads for keystream data */  | 
64  | 0  | #define NUMTHREADS 1  | 
65  |  |  | 
66  |  | /* 64 seems to be a pretty blance between memory and performance  | 
67  |  |  * 128 is another option with somewhat higher memory consumption */  | 
68  | 0  | #define NUMSTREAMS 64  | 
69  |  |  | 
70  |  | /* END TUNABLES */  | 
71  |  |  | 
72  |  | struct mt_keystream { | 
73  |  |   u_char poly_key[POLY1305_KEYLEN];     /* POLY1305_KEYLEN == 32 */  | 
74  |  |   u_char headerStream[CHACHA_BLOCKLEN]; /* CHACHA_BLOCKLEN == 64 */  | 
75  |  |   u_char mainStream[KEYSTREAMLEN];      /* KEYSTREAMLEN == 32768 */  | 
76  |  | };  | 
77  |  |  | 
78  |  | struct threadData { | 
79  |  |   EVP_CIPHER_CTX * main_evp;  | 
80  |  |   EVP_CIPHER_CTX * header_evp;  | 
81  |  |   u_char seqbuf[16];  | 
82  |  | };  | 
83  |  |  | 
84  |  | struct mt_keystream_batch { | 
85  |  |   u_int batchID;  | 
86  |  |   struct threadData tds[NUMTHREADS];  | 
87  |  |   struct mt_keystream streams[NUMSTREAMS];  | 
88  |  | };  | 
89  |  |  | 
90  |  | struct chachapoly_ctx_mt { | 
91  |  |   u_int seqnr;  | 
92  |  |   u_int batchID;  | 
93  |  |  | 
94  |  |   struct mt_keystream_batch batches[2];  | 
95  |  |  | 
96  |  |   pthread_t manager_tid[2];  | 
97  |  |   pthread_t self_tid;  | 
98  |  |  | 
99  |  |   pid_t mainpid;  | 
100  |  |   u_char zeros[KEYSTREAMLEN]; /* KEYSTREAMLEN == 32768 */  | 
101  |  |  | 
102  |  |   /* if OpenSSL has support for Poly1305 in the MAC EVPs  | 
103  |  |    * use that (OSSL >= 3.0) if not then it's OSSL 1.1 so  | 
104  |  |    * use the Poly1305 digest methods. Failing that use the  | 
105  |  |    * internal poly1305 methods */  | 
106  |  | #ifdef OPENSSL_HAVE_POLY_EVP  | 
107  |  |   EVP_MAC_CTX    *poly_ctx;  | 
108  |  | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
109  |  |   EVP_PKEY_CTX   *poly_ctx;  | 
110  |  |   EVP_MD_CTX     *md_ctx;  | 
111  |  |   EVP_PKEY       *pkey;  | 
112  |  |   size_t         ptaglen;  | 
113  |  | #else  | 
114  |  |   char           *poly_ctx;  | 
115  |  | #endif  | 
116  |  | };  | 
117  |  |  | 
118  |  | struct manager_thread_args { | 
119  |  |   struct chachapoly_ctx_mt * ctx_mt;  | 
120  |  |   u_int oldBatchID;  | 
121  |  |   int retval;  | 
122  |  | };  | 
123  |  |  | 
124  |  | struct worker_thread_args { | 
125  |  |   u_int batchID;  | 
126  |  |   struct mt_keystream_batch * batch;  | 
127  |  |   int threadIndex;  | 
128  |  |   u_char * zeros;  | 
129  |  |   int retval;  | 
130  |  | };  | 
131  |  |  | 
132  |  | /* generate the keystream and header  | 
133  |  |  * we use nulls for the "data" (the zeros variable) in order to  | 
134  |  |  * get the raw keystream  | 
135  |  |  * Returns 0 on success and -1 on failure */  | 
136  |  | int  | 
137  |  | generate_keystream(struct mt_keystream * ks, u_int seqnr,  | 
138  |  |     struct threadData * td, u_char * zeros)  | 
139  | 0  | { | 
140  |  |   /* generate poly1305 key */  | 
141  | 0  |   memset(td->seqbuf, 0, sizeof(td->seqbuf));  | 
142  | 0  |   POKE_U64(td->seqbuf + 8, seqnr);  | 
143  | 0  |   memset(ks->poly_key , 0, sizeof(ks->poly_key));  | 
144  | 0  |   if (!EVP_CipherInit(td->main_evp, NULL, NULL, td->seqbuf, 1) ||  | 
145  | 0  |       EVP_Cipher(td->main_evp, ks->poly_key, ks->poly_key,  | 
146  | 0  |       sizeof(ks->poly_key)) < 0)  | 
147  | 0  |     return -1;  | 
148  |  |  | 
149  |  |   /* generate header keystream for encrypting payload length */  | 
150  | 0  |   if (!EVP_CipherInit(td->header_evp, NULL, NULL, td->seqbuf, 1) ||  | 
151  | 0  |       EVP_Cipher(td->header_evp, ks->headerStream, zeros, CHACHA_BLOCKLEN)  | 
152  | 0  |       < 0 )  | 
153  | 0  |     return -1;  | 
154  |  |  | 
155  |  |   /* generate main keystream for encrypting payload */  | 
156  | 0  |   td->seqbuf[0] = 1;  | 
157  | 0  |   if (!EVP_CipherInit(td->main_evp, NULL, NULL, td->seqbuf, 1) ||  | 
158  | 0  |       EVP_Cipher(td->main_evp, ks->mainStream, zeros, KEYSTREAMLEN) < 0)  | 
159  | 0  |     return -1;  | 
160  |  |  | 
161  | 0  |   return 0;  | 
162  | 0  | }  | 
163  |  |  | 
164  |  | /* free the EVP contexts associated with the give thread */  | 
165  |  | void  | 
166  |  | free_threadData(struct threadData * td)  | 
167  | 0  | { | 
168  | 0  |   if (td == NULL)  | 
169  | 0  |     return;  | 
170  | 0  |   if (td->main_evp) /* false if initialization didn't get this far */  | 
171  | 0  |     EVP_CIPHER_CTX_free(td->main_evp);  | 
172  | 0  |   if (td->header_evp) /* false if initialization didn't get this far */  | 
173  | 0  |     EVP_CIPHER_CTX_free(td->header_evp);  | 
174  | 0  |   explicit_bzero(td, sizeof(*td));  | 
175  | 0  | }  | 
176  |  |  | 
177  |  | /* initialize the EVPs used by the worker thread  | 
178  |  |    Returns 0 on success and -1 on failure */  | 
179  |  | int  | 
180  |  | initialize_threadData(struct threadData * td, const u_char *key)  | 
181  | 0  | { | 
182  | 0  |   memset(td,0,sizeof(*td));  | 
183  | 0  |   if ((td->main_evp = EVP_CIPHER_CTX_new()) == NULL ||  | 
184  | 0  |       (td->header_evp = EVP_CIPHER_CTX_new()) == NULL)  | 
185  | 0  |     goto fail;  | 
186  | 0  |   if (!EVP_CipherInit(td->main_evp, EVP_chacha20(), key, NULL, 1))  | 
187  | 0  |     goto fail;  | 
188  | 0  |   if (!EVP_CipherInit(td->header_evp, EVP_chacha20(), key + 32, NULL, 1))  | 
189  | 0  |     goto fail;  | 
190  | 0  |   if (EVP_CIPHER_CTX_iv_length(td->header_evp) != 16)  | 
191  | 0  |     goto fail;  | 
192  | 0  |   return 0;  | 
193  | 0  |  fail:  | 
194  | 0  |   free_threadData(td);  | 
195  | 0  |   return -1;  | 
196  | 0  | }  | 
197  |  |  | 
198  |  | struct worker_thread_args *  | 
199  |  | worker_thread(struct worker_thread_args * args)  | 
200  | 0  | { | 
201  |  |   /* check first */  | 
202  | 0  |   if (args == NULL)  | 
203  | 0  |     return NULL;  | 
204  | 0  |   if (args->batch == NULL || args->zeros == NULL) { | 
205  | 0  |     args->retval = 1;  | 
206  | 0  |     return args;  | 
207  | 0  |   }  | 
208  |  |  | 
209  | 0  |   int threadIndex = args->threadIndex;  | 
210  | 0  |   struct threadData * td = &(args->batch->tds[threadIndex]);  | 
211  | 0  |   u_int refseqnr = args->batchID * NUMSTREAMS;  | 
212  |  | 
  | 
213  | 0  |   for (int i = threadIndex; i < NUMSTREAMS; i += NUMTHREADS) { | 
214  | 0  |     if (generate_keystream(&(args->batch->streams[i]), refseqnr + i,  | 
215  | 0  |         td, args->zeros) == -1) { | 
216  | 0  |       args->retval = 1;  | 
217  | 0  |       return args;  | 
218  | 0  |     }  | 
219  | 0  |   }  | 
220  |  |  | 
221  | 0  |   args->retval = 0;  | 
222  | 0  |   return args;  | 
223  | 0  | }  | 
224  |  |  | 
225  |  | int  | 
226  |  | join_manager_thread(pthread_t manager_tid)  | 
227  | 0  | { | 
228  | 0  |   struct manager_thread_args * args;  | 
229  | 0  |   if (pthread_join(manager_tid, (void **) &args) == 0) { | 
230  | 0  |     if (args == NULL) { | 
231  | 0  |       debug_f("Manager thread returned NULL!"); | 
232  | 0  |       return 1;  | 
233  | 0  |     } else if (args == PTHREAD_CANCELED) { | 
234  | 0  |       debug_f("Manager thread canceled!"); | 
235  | 0  |       return 1;  | 
236  | 0  |     } else if (args->retval != 0) { | 
237  | 0  |       debug_f("Manager thread error (%d)", args->retval); | 
238  | 0  |       free(args);  | 
239  | 0  |       return 1;  | 
240  | 0  |     } else { | 
241  | 0  |       free(args);  | 
242  | 0  |       return 0;  | 
243  | 0  |     }  | 
244  | 0  |   } else { | 
245  | 0  |     debug_f("pthread_join error!"); | 
246  | 0  |     return 1;  | 
247  | 0  |   }  | 
248  | 0  | }  | 
249  |  |  | 
250  |  | void  | 
251  |  | chachapoly_free_mt(struct chachapoly_ctx_mt * ctx_mt)  | 
252  | 0  | { | 
253  | 0  |   if (ctx_mt == NULL)  | 
254  | 0  |     return;  | 
255  |  |  | 
256  |  | #ifdef OPENSSL_HAVE_POLY_EVP  | 
257  |  |   if (ctx_mt->poly_ctx != NULL) { | 
258  |  |     EVP_MAC_CTX_free(ctx_mt->poly_ctx);  | 
259  |  |     ctx_mt->poly_ctx = NULL;  | 
260  |  |   }  | 
261  |  | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
262  | 0  |   if (ctx_mt->md_ctx != NULL) { | 
263  | 0  |     EVP_MD_CTX_free(ctx_mt->md_ctx);  | 
264  | 0  |     ctx_mt->md_ctx = NULL;  | 
265  | 0  |   }  | 
266  | 0  |   if (ctx_mt->pkey != NULL) { | 
267  | 0  |     EVP_PKEY_free(ctx_mt->pkey);  | 
268  | 0  |     ctx_mt->pkey = NULL;  | 
269  | 0  |   }  | 
270  | 0  | #endif  | 
271  |  |  | 
272  |  |   /*  | 
273  |  |    * Only cleanup the manager threads if we are the PID that initialized  | 
274  |  |    * them! If we're a fork, the threads don't really exist.  | 
275  |  |    */  | 
276  |  | 
  | 
277  | 0  |   if (getpid() == ctx_mt->mainpid) { | 
278  | 0  |     if (ctx_mt->manager_tid[0] != ctx_mt->self_tid) { | 
279  | 0  |       join_manager_thread(ctx_mt->manager_tid[0]);  | 
280  | 0  |       ctx_mt->manager_tid[0] = ctx_mt->self_tid;  | 
281  | 0  |     }  | 
282  | 0  |     if (ctx_mt->manager_tid[1] != ctx_mt->self_tid) { | 
283  | 0  |       join_manager_thread(ctx_mt->manager_tid[1]);  | 
284  | 0  |       ctx_mt->manager_tid[1] = ctx_mt->self_tid;  | 
285  | 0  |     }  | 
286  | 0  |   }  | 
287  |  |  | 
288  |  |   /* Cleanup thread data structures. */  | 
289  | 0  |   for (int i=0; i<2; i++)  | 
290  | 0  |     for (int j=0; j<NUMTHREADS; j++)  | 
291  | 0  |       free_threadData(&(ctx_mt->batches[i].tds[j]));  | 
292  |  |  | 
293  |  |   /* Zero and free the whole multithreaded cipher context. */  | 
294  | 0  |   freezero(ctx_mt, sizeof(*ctx_mt));  | 
295  |  | 
  | 
296  | 0  |   return;  | 
297  | 0  | }  | 
298  |  |  | 
299  |  | struct chachapoly_ctx_mt *  | 
300  |  | chachapoly_new_mt(u_int startseqnr, const u_char * key, u_int keylen)  | 
301  | 0  | { | 
302  | 0  |   struct chachapoly_ctx_mt * ctx_mt = xmalloc(sizeof(*ctx_mt));  | 
303  | 0  |   memset(ctx_mt, 0, sizeof(*ctx_mt));  | 
304  |  |   /* Initialize the sequence number. When rekeying, this won't be zero. */  | 
305  | 0  |   ctx_mt->seqnr = startseqnr;  | 
306  | 0  |   ctx_mt->batchID = startseqnr / NUMSTREAMS;  | 
307  | 0  |   struct threadData mainData;  | 
308  | 0  |   int tDataI;  | 
309  | 0  |   int genKSfailed = 0;  | 
310  |  | 
  | 
311  |  | #ifdef OPENSSL_HAVE_POLY_EVP  | 
312  |  |   EVP_MAC *mac = NULL;  | 
313  |  |   if ((mac = EVP_MAC_fetch(NULL, "POLY1305", NULL)) == NULL)  | 
314  |  |     goto fail;  | 
315  |  |   if ((ctx_mt->poly_ctx = EVP_MAC_CTX_new(mac)) == NULL)  | 
316  |  |     goto fail;  | 
317  |  | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
318  | 0  |   if ((ctx_mt->md_ctx = EVP_MD_CTX_new()) == NULL)  | 
319  | 0  |     goto fail;  | 
320  | 0  |   if ((ctx_mt->pkey = EVP_PKEY_new_mac_key(EVP_PKEY_POLY1305, NULL,  | 
321  | 0  |       ctx_mt->zeros, POLY1305_KEYLEN)) == NULL)  | 
322  | 0  |     goto fail;  | 
323  | 0  |   if (EVP_DigestSignInit(ctx_mt->md_ctx, &ctx_mt->poly_ctx, NULL, NULL,  | 
324  | 0  |       ctx_mt->pkey) == 0)  | 
325  | 0  |     goto fail;  | 
326  |  | #else  | 
327  |  |   ctx_mt->poly_ctx = NULL;  | 
328  |  | #endif  | 
329  |  |  | 
330  | 0  |   ctx_mt->batches[ctx_mt->batchID % 2].batchID = ctx_mt->batchID;  | 
331  | 0  |   ctx_mt->batches[(ctx_mt->batchID + 1) % 2].batchID =  | 
332  | 0  |       ctx_mt->batchID + 1;  | 
333  |  |  | 
334  |  |   /* initialize batches[0] tds */  | 
335  | 0  |   for (tDataI = 0; tDataI < NUMTHREADS; tDataI++) { | 
336  | 0  |     if (initialize_threadData(&(ctx_mt->batches[0].tds[tDataI]),  | 
337  | 0  |         key) != 0)  | 
338  | 0  |       break;  | 
339  | 0  |   }  | 
340  | 0  |   if (tDataI < NUMTHREADS) { | 
341  |  |     /* Backtrack starting with 'tDataI - 1' */  | 
342  | 0  |     for (tDataI--; tDataI >= 0; tDataI--)  | 
343  | 0  |       free_threadData(&(ctx_mt->batches[0].tds[tDataI]));  | 
344  | 0  |     goto fail;  | 
345  | 0  |   }  | 
346  |  |   /* initialize batches[1] tds */  | 
347  | 0  |   for (tDataI = 0; tDataI < NUMTHREADS; tDataI++) { | 
348  | 0  |     if (initialize_threadData(&(ctx_mt->batches[1].tds[tDataI]),  | 
349  | 0  |         key) != 0)  | 
350  | 0  |       break;  | 
351  | 0  |   }  | 
352  | 0  |   if (tDataI < NUMTHREADS) { | 
353  |  |     /* Backtrack starting with 'tDataI - 1' */  | 
354  | 0  |     for (tDataI--; tDataI >= 0; tDataI--)  | 
355  | 0  |       free_threadData(&(ctx_mt->batches[1].tds[tDataI]));  | 
356  |  |     /* Free the batches[0] tds too */  | 
357  | 0  |     for (tDataI = NUMTHREADS; tDataI >= 0; tDataI--)  | 
358  | 0  |       free_threadData(&(ctx_mt->batches[0].tds[tDataI]));  | 
359  | 0  |     goto fail;  | 
360  | 0  |   }  | 
361  |  |  | 
362  | 0  |   if (initialize_threadData(&mainData, key) != 0) { | 
363  | 0  |     chachapoly_free_mt(ctx_mt);  | 
364  | 0  |     explicit_bzero(&startseqnr, sizeof(startseqnr));  | 
365  | 0  |     return NULL;  | 
366  | 0  |   }  | 
367  |  |  | 
368  | 0  |   for (int i=0; i<2; i++) { | 
369  | 0  |     u_int refseqnr = ctx_mt->batches[i].batchID * NUMSTREAMS;  | 
370  | 0  |     for (int j = startseqnr > refseqnr ? startseqnr - refseqnr : 0;  | 
371  | 0  |          j<NUMSTREAMS; j++) { | 
372  | 0  |       if (generate_keystream(&(ctx_mt->batches[i].streams[j]),  | 
373  | 0  |           refseqnr + j, &mainData, ctx_mt->zeros) == -1) { | 
374  | 0  |         debug_f("generate_keystream failed in " | 
375  | 0  |             "chacha20-poly1305@hpnssh.org");  | 
376  | 0  |         genKSfailed = 1;  | 
377  | 0  |         break; /* imperfect, but it helps */  | 
378  | 0  |       }  | 
379  | 0  |     }  | 
380  | 0  |   }  | 
381  |  | 
  | 
382  | 0  |   free_threadData(&mainData);  | 
383  |  | 
  | 
384  | 0  |   if (genKSfailed != 0) { | 
385  | 0  |     chachapoly_free_mt(ctx_mt);  | 
386  | 0  |     explicit_bzero(&startseqnr, sizeof(startseqnr));  | 
387  | 0  |     return NULL;  | 
388  | 0  |   }  | 
389  |  |  | 
390  |  |   /* Store the PID so that in the future, we can tell if we're a fork */  | 
391  | 0  |   ctx_mt->mainpid = getpid();  | 
392  | 0  |   ctx_mt->self_tid = pthread_self();  | 
393  | 0  |   ctx_mt->manager_tid[0] = ctx_mt->self_tid;  | 
394  | 0  |   ctx_mt->manager_tid[1] = ctx_mt->self_tid;  | 
395  |  |   /* was reporting the TID using gettid() but it's not portable */  | 
396  | 0  |   debug2_f("<main thread: pid=%u, ptid=0x%lx>", getpid(), pthread_self()); | 
397  |  |  | 
398  |  |   /* Success! */  | 
399  | 0  |   explicit_bzero(&startseqnr, sizeof(startseqnr));  | 
400  | 0  |   return ctx_mt;  | 
401  |  |  | 
402  | 0  |  fail:  | 
403  |  | #ifdef OPENSSL_HAVE_POLY_EVP  | 
404  |  |   if (ctx_mt->poly_ctx != NULL) { | 
405  |  |     EVP_MAC_CTX_free(ctx_mt->poly_ctx);  | 
406  |  |     ctx_mt->poly_ctx = NULL;  | 
407  |  |   }  | 
408  |  | #elif !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
409  | 0  |   if (ctx_mt->md_ctx != NULL) { | 
410  | 0  |     EVP_MD_CTX_free(ctx_mt->md_ctx);  | 
411  | 0  |     ctx_mt->md_ctx = NULL;  | 
412  | 0  |   }  | 
413  | 0  |   if (ctx_mt->pkey != NULL) { | 
414  | 0  |     EVP_PKEY_free(ctx_mt->pkey);  | 
415  | 0  |     ctx_mt->pkey = NULL;  | 
416  | 0  |   }  | 
417  | 0  | #endif  | 
418  | 0  |   freezero(ctx_mt, sizeof(*ctx_mt));  | 
419  | 0  |   explicit_bzero(&startseqnr, sizeof(startseqnr));  | 
420  | 0  |   return NULL;  | 
421  | 0  | }  | 
422  |  |  | 
423  |  | /* a fast method to XOR the keystream against the data */  | 
424  |  | static inline void  | 
425  |  | fastXOR(u_char *dest, const u_char *src, const u_char *keystream, u_int len)  | 
426  | 0  | { | 
427  |  |  | 
428  |  |   /* XXX: this was __uint128_t but that was causing unaligned load errors.  | 
429  |  |    * this works but we need to explore it more. */  | 
430  | 0  |   typedef uint32_t chunk;  | 
431  | 0  |   size_t i;  | 
432  |  | 
  | 
433  | 0  |   for (i=0; i < (len / sizeof(chunk)); i++)  | 
434  | 0  |     ((chunk *)dest)[i]=((chunk *)src)[i]^((chunk *)keystream)[i];  | 
435  | 0  |   for (i=i*(sizeof(chunk) / sizeof(char)); i < len; i++)  | 
436  | 0  |     dest[i]=src[i]^keystream[i];  | 
437  | 0  | }  | 
438  |  |  | 
439  |  | struct manager_thread_args *  | 
440  | 0  | manager_thread(struct manager_thread_args * margs) { | 
441  |  |   /* make sure we have valid data before proceeding */  | 
442  | 0  |   if (margs == NULL)  | 
443  | 0  |     return NULL;  | 
444  |  |  | 
445  | 0  |   struct chachapoly_ctx_mt * ctx_mt = margs->ctx_mt;  | 
446  | 0  |   if (ctx_mt == NULL) { | 
447  | 0  |     margs->retval = 1;  | 
448  | 0  |     return margs;  | 
449  | 0  |   }  | 
450  |  |  | 
451  | 0  |   u_int oldBatchID = margs->oldBatchID;  | 
452  |  | 
  | 
453  | 0  |   struct mt_keystream_batch * batch = &(ctx_mt->batches[oldBatchID % 2]);  | 
454  | 0  |   if (batch->batchID != oldBatchID) { | 
455  | 0  |     debug_f("Post-crypt batch miss! Seeking %u, found %u. Failing.", | 
456  | 0  |         oldBatchID, batch->batchID);  | 
457  | 0  |     margs->retval = 1;  | 
458  | 0  |     return margs;  | 
459  | 0  |   }  | 
460  |  |  | 
461  | 0  |   margs->retval = 0;  | 
462  | 0  |   u_int batchID = oldBatchID + 2;  | 
463  |  | 
  | 
464  | 0  |   pthread_t tid[NUMTHREADS];  | 
465  | 0  |   struct worker_thread_args * wargs = malloc(NUMTHREADS * sizeof(*wargs));  | 
466  | 0  |   int ti;  | 
467  |  | 
  | 
468  | 0  |   for (ti = 0; ti < NUMTHREADS; ti++) { | 
469  | 0  |     wargs[ti].batchID = batchID;  | 
470  | 0  |     wargs[ti].batch = batch;  | 
471  | 0  |     wargs[ti].threadIndex = ti;  | 
472  | 0  |     wargs[ti].zeros = ctx_mt->zeros;  | 
473  | 0  |     if (pthread_create(&(tid[ti]), NULL, (void *) worker_thread,  | 
474  | 0  |         &(wargs[ti])) != 0) { | 
475  | 0  |       margs->retval = 1;  | 
476  | 0  |       break;  | 
477  | 0  |     }  | 
478  | 0  |   }  | 
479  | 0  |   for (; ti < NUMTHREADS; ti++) /* for error condition */  | 
480  | 0  |     tid[ti] = pthread_self();  | 
481  |  | 
  | 
482  | 0  |   struct worker_thread_args * retwargs;  | 
483  |  | 
  | 
484  | 0  |   for (ti = 0; ti < NUMTHREADS; ti++) { | 
485  | 0  |     if (tid[ti] == pthread_self()) { | 
486  | 0  |       margs->retval = 1; /* redundant, but harmless */  | 
487  | 0  |       continue;  | 
488  | 0  |     }  | 
489  | 0  |     if (pthread_join(tid[ti], (void **) &retwargs) == 0) { | 
490  | 0  |       if (retwargs == NULL) { | 
491  | 0  |         debug_f("Worker thread returned NULL!"); | 
492  | 0  |         margs->retval = 1;  | 
493  | 0  |       } else if (retwargs == PTHREAD_CANCELED) { | 
494  | 0  |         debug_f("Worker thread canceled!"); | 
495  | 0  |         margs->retval = 1;  | 
496  | 0  |       } else { | 
497  | 0  |         if (retwargs->retval != 0) { | 
498  | 0  |           debug_f("Worker thread error (%d)", | 
499  | 0  |               retwargs->retval);  | 
500  | 0  |           margs->retval = 1;  | 
501  | 0  |         }  | 
502  | 0  |         if (retwargs != &(wargs[ti])) { | 
503  | 0  |           debug_f("Worker thread didn't return " | 
504  | 0  |               "expected structure!");  | 
505  | 0  |           margs->retval = 1;  | 
506  | 0  |         }  | 
507  | 0  |       }  | 
508  | 0  |     } else { | 
509  | 0  |       debug_f("pthread_join error!"); | 
510  | 0  |       margs->retval = 1;  | 
511  | 0  |     }  | 
512  | 0  |   }  | 
513  | 0  |   free(wargs);  | 
514  |  | 
  | 
515  | 0  |   if (margs->retval == 0) { | 
516  | 0  |     batch->batchID = batchID;  | 
517  | 0  |   }  | 
518  |  | 
  | 
519  | 0  |   return margs;  | 
520  | 0  | }  | 
521  |  |  | 
522  |  | int  | 
523  |  | chachapoly_crypt_mt(struct chachapoly_ctx_mt *ctx_mt, u_int seqnr, u_char *dest,  | 
524  |  |     const u_char *src, u_int len, u_int aadlen, u_int authlen, int do_encrypt)  | 
525  | 0  | { | 
526  |  | #ifdef SAFETY  | 
527  |  |   if (ctx_mt->mainpid != getpid()) { /* we're a fork */ | 
528  |  |     /*  | 
529  |  |      * TODO: this is EXTREMELY RARE, may never happen at all (only  | 
530  |  |      * if the fork calls crypt), so we should tell the compiler.  | 
531  |  |      */  | 
532  |  |     /* The worker threads don't exist, we could spawn them? */  | 
533  |  |     debug_f("Fork called crypt without workers!"); | 
534  |  |     chachapoly_free_mt(ctx_mt);  | 
535  |  |     return SSH_ERR_INTERNAL_ERROR;  | 
536  |  |   }  | 
537  |  | #endif  | 
538  |  | 
  | 
539  | 0  |   pthread_t * manager_tid = &(ctx_mt->manager_tid[ctx_mt->batchID % 2]);  | 
540  | 0  |   if (unlikely(*manager_tid != ctx_mt->self_tid)) { | 
541  | 0  |     int ret = join_manager_thread(*manager_tid);  | 
542  | 0  |     *manager_tid = ctx_mt->self_tid;  | 
543  | 0  |     if (ret != 0)  | 
544  | 0  |       return SSH_ERR_INTERNAL_ERROR;  | 
545  | 0  |   }  | 
546  |  |  | 
547  | 0  |   struct mt_keystream_batch * batch =  | 
548  | 0  |       &(ctx_mt->batches[ctx_mt->batchID % 2]);  | 
549  |  | 
  | 
550  | 0  |   struct mt_keystream * ks = &(batch->streams[seqnr % NUMSTREAMS]);  | 
551  |  | 
  | 
552  | 0  |   int r = SSH_ERR_INTERNAL_ERROR;  | 
553  |  | 
  | 
554  |  | #ifdef SAFETY  | 
555  |  |   if (batch->batchID == ctx_mt->batchID) { /* Safety check */ | 
556  |  | #endif  | 
557  |  |     /* check tag before anything else */  | 
558  | 0  |     if (!do_encrypt) { | 
559  | 0  |       const u_char *tag = src + aadlen + len;  | 
560  | 0  |       u_char expected_tag[POLY1305_TAGLEN];  | 
561  | 0  | #if !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
562  | 0  |       if ((EVP_PKEY_CTX_ctrl(ctx_mt->poly_ctx, -1,  | 
563  | 0  |           EVP_PKEY_OP_SIGNCTX, EVP_PKEY_CTRL_SET_MAC_KEY,  | 
564  | 0  |           POLY1305_KEYLEN, ks->poly_key) <= 0) ||  | 
565  | 0  |           (EVP_DigestSignUpdate(ctx_mt->md_ctx, src, aadlen + len) == 0)) { | 
566  | 0  |         debug_f("SSL error while decrypting poly1305 tag"); | 
567  | 0  |         return SSH_ERR_INTERNAL_ERROR;  | 
568  | 0  |       }  | 
569  | 0  |       ctx_mt->ptaglen = POLY1305_TAGLEN;  | 
570  | 0  |       if (EVP_DigestSignFinal(ctx_mt->md_ctx, expected_tag,  | 
571  | 0  |           &ctx_mt->ptaglen) == 0) { | 
572  | 0  |         debug_f("SSL error while finalizing decyrpted poly1305"); | 
573  | 0  |         return SSH_ERR_INTERNAL_ERROR;  | 
574  | 0  |       }  | 
575  |  | #else  | 
576  |  |       poly1305_auth(ctx_mt->poly_ctx, expected_tag, src,  | 
577  |  |           aadlen + len, ks->poly_key);  | 
578  |  | #endif  | 
579  | 0  |       if (timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN)  | 
580  | 0  |           != 0)  | 
581  | 0  |         r = SSH_ERR_MAC_INVALID;  | 
582  | 0  |       explicit_bzero(expected_tag, sizeof(expected_tag));  | 
583  | 0  |     }  | 
584  | 0  |     if (r != SSH_ERR_MAC_INVALID) { | 
585  |  |       /* Crypt additional data (i.e., packet length) */  | 
586  |  |       /* TODO: is aadlen always four bytes? */  | 
587  |  |       /* TODO: do we always have an aadlen? */  | 
588  | 0  |       if (aadlen)  | 
589  | 0  |         for (u_int i=0; i<aadlen; i++)  | 
590  | 0  |           dest[i] = ks->headerStream[i] ^ src[i];  | 
591  |  |       /* Crypt payload */  | 
592  | 0  |       fastXOR(dest+aadlen,src+aadlen,ks->mainStream,len);  | 
593  |  |       /* calculate and append tag */  | 
594  | 0  | #if !defined(WITH_OPENSSL3) && defined(EVP_PKEY_POLY1305)  | 
595  | 0  |       if (do_encrypt) { | 
596  | 0  |         if ((EVP_PKEY_CTX_ctrl(ctx_mt->poly_ctx, -1,  | 
597  | 0  |             EVP_PKEY_OP_SIGNCTX, EVP_PKEY_CTRL_SET_MAC_KEY,  | 
598  | 0  |             POLY1305_KEYLEN, ks->poly_key) <=0) ||  | 
599  | 0  |             (EVP_DigestSignUpdate(ctx_mt->md_ctx, dest, aadlen + len) == 0)) { | 
600  | 0  |           debug_f ("SSL error while encrypting poly1305 tag"); | 
601  | 0  |           return SSH_ERR_INTERNAL_ERROR;  | 
602  | 0  |         }  | 
603  | 0  |         ctx_mt->ptaglen = POLY1305_TAGLEN;  | 
604  | 0  |         if (EVP_DigestSignFinal(ctx_mt->md_ctx, dest+aadlen+len,  | 
605  | 0  |             &ctx_mt->ptaglen) == 0) { | 
606  | 0  |           debug_f("SSL error while finalizing decyrpted poly1305"); | 
607  | 0  |           return SSH_ERR_INTERNAL_ERROR;  | 
608  | 0  |         }  | 
609  | 0  |       }  | 
610  |  | #else  | 
611  |  |       if (do_encrypt)  | 
612  |  |         poly1305_auth(ctx_mt->poly_ctx, dest+aadlen+len,  | 
613  |  |             dest, aadlen+len, ks->poly_key);  | 
614  |  | #endif  | 
615  | 0  |       r=0; /* Success! */  | 
616  | 0  |     }  | 
617  | 0  |     if (r) /* Anything nonzero is an error. */  | 
618  | 0  |       return r;  | 
619  |  |  | 
620  | 0  |     ctx_mt->seqnr = seqnr + 1;  | 
621  |  | 
  | 
622  | 0  |     if (unlikely(ctx_mt->seqnr / NUMSTREAMS > ctx_mt->batchID)) { | 
623  | 0  |       struct manager_thread_args * args =  | 
624  | 0  |           malloc(sizeof(*args));  | 
625  | 0  |       if (args == NULL) { | 
626  | 0  |         return SSH_ERR_INTERNAL_ERROR;  | 
627  | 0  |       }  | 
628  | 0  |       args->ctx_mt = ctx_mt;  | 
629  | 0  |       args->oldBatchID = ctx_mt->batchID;  | 
630  | 0  |       if (pthread_create(&(ctx_mt->manager_tid[ctx_mt->batchID  | 
631  | 0  |           % 2]), NULL, (void *) manager_thread, args) != 0) { | 
632  | 0  |         free(args);  | 
633  | 0  |         return SSH_ERR_INTERNAL_ERROR;  | 
634  | 0  |       }  | 
635  | 0  |       ctx_mt->batchID = ctx_mt->seqnr / NUMSTREAMS;  | 
636  | 0  |     }  | 
637  |  |  | 
638  |  |     /* TODO: Nothing we need to sanitize here? */  | 
639  |  |  | 
640  | 0  |     return 0;  | 
641  |  | #ifdef SAFETY  | 
642  |  |   } else { /* Bad, it's the wrong batch. */ | 
643  |  |     debug_f( "Pre-crypt batch miss! Seeking %u, found %u. Failing.",  | 
644  |  |         ctx_mt->batchID, batch->batchID);  | 
645  |  |     return SSH_ERR_INTERNAL_ERROR;  | 
646  |  |   }  | 
647  |  | #endif  | 
648  | 0  | }  | 
649  |  |  | 
650  |  | int  | 
651  |  | chachapoly_get_length_mt(struct chachapoly_ctx_mt *ctx_mt, u_int *plenp,  | 
652  |  |     u_int seqnr, const u_char *cp, u_int len)  | 
653  | 0  | { | 
654  |  |   /* TODO: add compiler hints */  | 
655  |  | #ifdef SAFETY  | 
656  |  |   if (ctx_mt->mainpid != getpid()) { /* Use serial mode if we're a fork */ | 
657  |  |     debug_f("We're a fork. Failing."); | 
658  |  |     return SSH_ERR_INTERNAL_ERROR;  | 
659  |  |   }  | 
660  |  | #endif  | 
661  |  | 
  | 
662  | 0  |   if (len < 4)  | 
663  | 0  |     return SSH_ERR_MESSAGE_INCOMPLETE;  | 
664  |  |  | 
665  | 0  |   pthread_t * manager_tid = &(ctx_mt->manager_tid[ctx_mt->batchID % 2]);  | 
666  | 0  |   if (unlikely(*manager_tid != ctx_mt->self_tid)) { | 
667  | 0  |     int ret = join_manager_thread(*manager_tid);  | 
668  | 0  |     *manager_tid = ctx_mt->self_tid;  | 
669  | 0  |     if (ret != 0)  | 
670  | 0  |       return SSH_ERR_INTERNAL_ERROR;  | 
671  | 0  |   }  | 
672  |  |  | 
673  | 0  |   u_char buf[4];  | 
674  |  | #ifdef SAFETY  | 
675  |  |   u_int sought_batchID = seqnr / NUMSTREAMS;  | 
676  |  | #endif  | 
677  | 0  |   struct mt_keystream_batch * batch =  | 
678  | 0  |       &(ctx_mt->batches[ctx_mt->batchID % 2]);  | 
679  | 0  |   struct mt_keystream * ks = &(batch->streams[seqnr % NUMSTREAMS]);  | 
680  |  | #ifdef SAFETY  | 
681  |  |   if (batch->batchID == sought_batchID) { | 
682  |  | #endif  | 
683  | 0  |     for (u_int i=0; i < sizeof(buf); i++)  | 
684  | 0  |       buf[i]=ks->headerStream[i] ^ cp[i];  | 
685  | 0  |     *plenp = PEEK_U32(buf);  | 
686  | 0  |     return 0;  | 
687  |  | #ifdef SAFETY  | 
688  |  |   } else { | 
689  |  |     debug_f("Batch miss! Seeking %u, found %u. Failing.", | 
690  |  |         sought_batchID, batch->batchID);  | 
691  |  |     return SSH_ERR_INTERNAL_ERROR;  | 
692  |  |   }  | 
693  |  | #endif  | 
694  | 0  | }  | 
695  |  | #endif /* defined(HAVE_EVP_CHACHA20) && !defined(HAVE_BROKEN_CHACHA20) */  |