/src/hpn-ssh/cipher-ctr-mt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * OpenSSH Multi-threaded AES-CTR Cipher |
3 | | * |
4 | | * Author: Benjamin Bennett <ben@psc.edu> |
5 | | * Author: Mike Tasota <tasota@gmail.com> |
6 | | * Author: Chris Rapier <rapier@psc.edu> |
7 | | * Copyright (c) 2008-2021 Pittsburgh Supercomputing Center. All rights reserved. |
8 | | * |
9 | | * Based on original OpenSSH AES-CTR cipher. Small portions remain unchanged, |
10 | | * Copyright (c) 2003 Markus Friedl <markus@openbsd.org> |
11 | | * |
12 | | * Permission to use, copy, modify, and distribute this software for any |
13 | | * purpose with or without fee is hereby granted, provided that the above |
14 | | * copyright notice and this permission notice appear in all copies. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
17 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
18 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
19 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
20 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
21 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
22 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
23 | | */ |
24 | | #include "includes.h" |
25 | | |
26 | | #if defined(WITH_OPENSSL) && !defined(WITH_OPENSSL3) |
27 | | #include <sys/types.h> |
28 | | |
29 | | #include <stdarg.h> |
30 | | #include <string.h> |
31 | | |
32 | | #include <openssl/evp.h> |
33 | | |
34 | | #include "xmalloc.h" |
35 | | #include "log.h" |
36 | | #include <unistd.h> |
37 | | #include "uthash.h" |
38 | | |
39 | | /* compatibility with old or broken OpenSSL versions */ |
40 | | #include "openbsd-compat/openssl-compat.h" |
41 | | |
42 | | #ifndef USE_BUILTIN_RIJNDAEL |
43 | | #include <openssl/aes.h> |
44 | | #endif |
45 | | |
46 | | #include <pthread.h> |
47 | | |
48 | | #ifdef __APPLE__ |
49 | | #include <sys/types.h> |
50 | | #include <sys/sysctl.h> |
51 | | #endif |
52 | | |
53 | | /* note regarding threads and queues */ |
54 | | /* initially this cipher was written in a way that |
55 | | * the key stream was generated in a per cipher block |
56 | | * loop. For example, if the key stream queue length was |
57 | | * 16k and the cipher block size was 16 bytes it would |
58 | | * fill the queue 16 bytes at a time. Mitch Dorrell pointed |
59 | | * out that we could fill the queue in once call eliminating |
60 | | * loop and multiple calls to EVP_EncryptUpdate. Doing so |
61 | | * dramatically reduced CPU load in the threads and indicated |
62 | | * that we could also eliminate most of the threads and queues |
63 | | * as it would take far less time for a queue to ebter KQ_FULL |
64 | | * state. As such, we've reduced the default number of threads |
65 | | * and queues from 2 and 8 (respectively) to 1 and 2. We've also |
66 | | * elimnated the need to determine the physical number of cores on |
67 | | * the system and, if the user desires, can spin up more threads |
68 | | * using an environment variable. Additionally, queues is now fixed |
69 | | * at thread_count + 1. |
70 | | * cjr 10/19/2022 */ |
71 | | |
72 | | /*-------------------- TUNABLES --------------------*/ |
73 | | /* maximum number of threads and queues */ |
74 | 0 | #define MAX_THREADS 4 |
75 | 0 | #define MAX_NUMKQ (MAX_THREADS + 1) |
76 | | |
77 | | /* Number of pregen threads to use */ |
78 | | /* this is a default value. The actual number is |
79 | | * determined during init as a function of the number |
80 | | * of available cores */ |
81 | | int cipher_threads = 1; |
82 | | |
83 | | /* Number of keystream queues */ |
84 | | /* ideally this should be large enough so that there is |
85 | | * always a key queue for a thread to work on |
86 | | * so maybe double of the number of threads. Again this |
87 | | * is a default and the actual value is determined in init*/ |
88 | | int numkq = 2; |
89 | | |
90 | | /* Length of a keystream queue */ |
91 | | /* one queue holds 512KB (1024 * 32 * 16) of key data |
92 | | * being that the queues are destroyed after a rekey |
93 | | * and at leats one has to be fully filled prior to |
94 | | * enciphering data we don't want this to be too large */ |
95 | 0 | #define KQLEN (1024 * 32) |
96 | | |
97 | | /* Processor cacheline length */ |
98 | | #define CACHELINE_LEN 64 |
99 | | |
100 | | /* Can the system do unaligned loads natively? */ |
101 | | #if defined(__aarch64__) || \ |
102 | | defined(__i386__) || \ |
103 | | defined(__powerpc__) || \ |
104 | | defined(__x86_64__) |
105 | | # define CIPHER_UNALIGNED_OK |
106 | | #endif |
107 | | #if defined(__SIZEOF_INT128__) |
108 | | # define CIPHER_INT128_OK |
109 | | #endif |
110 | | /*-------------------- END TUNABLES --------------------*/ |
111 | | |
112 | 0 | #define HAVE_NONE 0 |
113 | 0 | #define HAVE_KEY 1 |
114 | 0 | #define HAVE_IV 2 |
115 | | int X = 0; |
116 | | |
117 | | const EVP_CIPHER *evp_aes_ctr_mt(void); |
118 | | |
119 | | /* Keystream Queue state */ |
120 | | enum { |
121 | | KQINIT, |
122 | | KQEMPTY, |
123 | | KQFILLING, |
124 | | KQFULL, |
125 | | KQDRAINING |
126 | | }; |
127 | | |
128 | | /* Keystream Queue struct */ |
129 | | struct kq { |
130 | | u_char keys[KQLEN][AES_BLOCK_SIZE]; /* [32768][16B] */ |
131 | | u_char ctr[AES_BLOCK_SIZE]; /* 16B */ |
132 | | u_char pad0[CACHELINE_LEN]; |
133 | | pthread_mutex_t lock; |
134 | | pthread_cond_t cond; |
135 | | int qstate; |
136 | | u_char pad1[CACHELINE_LEN]; |
137 | | }; |
138 | | |
139 | | /* Context struct */ |
140 | | struct ssh_aes_ctr_ctx_mt |
141 | | { |
142 | | long unsigned int struct_id; |
143 | | int keylen; |
144 | | int state; |
145 | | int qidx; |
146 | | int ridx; |
147 | | int id[MAX_THREADS]; /* 32 */ |
148 | | AES_KEY aes_key; |
149 | | const u_char *orig_key; |
150 | | u_char aes_counter[AES_BLOCK_SIZE]; /* 16B */ |
151 | | pthread_t tid[MAX_THREADS]; /* 32 */ |
152 | | pthread_rwlock_t tid_lock; |
153 | | struct kq q[MAX_NUMKQ]; /* 33 */ |
154 | | #ifdef __APPLE__ |
155 | | pthread_rwlock_t stop_lock; |
156 | | int exit_flag; |
157 | | #endif /* __APPLE__ */ |
158 | | }; |
159 | | |
160 | | /* this defines the hash and elements of evp context pointers |
161 | | * that are created in thread_loop. We use this to clear and |
162 | | * free the contexts in stop_and_prejoin |
163 | | */ |
164 | | struct aes_mt_ctx_ptrs { |
165 | | pthread_t tid; |
166 | | EVP_CIPHER_CTX *pointer; /* 32 */ |
167 | | UT_hash_handle hh; |
168 | | }; |
169 | | |
170 | | /* globals */ |
171 | | /* how we increment the id the structs we create */ |
172 | | long unsigned int global_struct_id = 0; |
173 | | |
174 | | /* keep a copy of the pointers created in thread_loop to free later */ |
175 | | struct aes_mt_ctx_ptrs *evp_ptrs = NULL; |
176 | | |
177 | | /* |
178 | | * Add num to counter 'ctr' |
179 | | */ |
180 | | static void |
181 | | ssh_ctr_add(u_char *ctr, uint32_t num, u_int len) |
182 | 0 | { |
183 | 0 | int i; |
184 | 0 | uint16_t n; |
185 | |
|
186 | 0 | for (n = 0, i = len - 1; i >= 0 && (num || n); i--) { |
187 | 0 | n = ctr[i] + (num & 0xff) + n; |
188 | 0 | num >>= 8; |
189 | 0 | ctr[i] = n & 0xff; |
190 | 0 | n >>= 8; |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | | /* |
195 | | * Threads may be cancelled in a pthread_cond_wait, we must free the mutex |
196 | | */ |
197 | | static void |
198 | | thread_loop_cleanup(void *x) |
199 | 0 | { |
200 | 0 | pthread_mutex_unlock((pthread_mutex_t *)x); |
201 | 0 | } |
202 | | |
203 | | #ifdef __APPLE__ |
204 | | /* Check if we should exit, we are doing both cancel and exit condition |
205 | | * since on OSX threads seem to occasionally fail to notice when they have |
206 | | * been cancelled. We want to have a backup to make sure that we won't hang |
207 | | * when the main process join()-s the cancelled thread. |
208 | | */ |
209 | | static void |
210 | | thread_loop_check_exit(struct ssh_aes_ctr_ctx_mt *c) |
211 | | { |
212 | | int exit_flag; |
213 | | |
214 | | pthread_rwlock_rdlock(&c->stop_lock); |
215 | | exit_flag = c->exit_flag; |
216 | | pthread_rwlock_unlock(&c->stop_lock); |
217 | | |
218 | | if (exit_flag) |
219 | | pthread_exit(NULL); |
220 | | } |
221 | | #else |
222 | | # define thread_loop_check_exit(s) |
223 | | #endif /* __APPLE__ */ |
224 | | |
225 | | /* |
226 | | * Helper function to terminate the helper threads |
227 | | */ |
228 | | static void |
229 | | stop_and_join_pregen_threads(struct ssh_aes_ctr_ctx_mt *c) |
230 | 0 | { |
231 | 0 | int i; |
232 | |
|
233 | | #ifdef __APPLE__ |
234 | | /* notify threads that they should exit */ |
235 | | pthread_rwlock_wrlock(&c->stop_lock); |
236 | | c->exit_flag = TRUE; |
237 | | pthread_rwlock_unlock(&c->stop_lock); |
238 | | #endif /* __APPLE__ */ |
239 | | |
240 | | /* Cancel pregen threads */ |
241 | 0 | for (i = 0; i < cipher_threads; i++) { |
242 | 0 | debug ("Canceled %lu (%lu,%d)", c->tid[i], c->struct_id, c->id[i]); |
243 | 0 | pthread_cancel(c->tid[i]); |
244 | 0 | } |
245 | 0 | for (i = 0; i < numkq; i++) { |
246 | 0 | pthread_mutex_lock(&c->q[i].lock); |
247 | 0 | pthread_cond_broadcast(&c->q[i].cond); |
248 | 0 | pthread_mutex_unlock(&c->q[i].lock); |
249 | 0 | } |
250 | 0 | for (i = 0; i < cipher_threads; i++) { |
251 | 0 | if (pthread_kill(c->tid[i], 0) != 0) |
252 | 0 | debug3("AES-CTR MT pthread_join failure: Invalid thread id %lu in %s", |
253 | 0 | c->tid[i], __FUNCTION__); |
254 | 0 | else { |
255 | 0 | debug ("Joining %lu (%lu, %d)", c->tid[i], c->struct_id, c->id[i]); |
256 | 0 | pthread_mutex_destroy(&c->q[i].lock); |
257 | 0 | pthread_cond_destroy(&c->q[i].cond); |
258 | 0 | pthread_join(c->tid[i], NULL); |
259 | | /* this finds the entry in the hash that corresponding to the |
260 | | * thread id. That's used to find the pointer to the cipher struct |
261 | | * created in thread_loop. */ |
262 | 0 | struct aes_mt_ctx_ptrs *ptr; |
263 | 0 | HASH_FIND_INT(evp_ptrs, &c->tid[i], ptr); |
264 | 0 | EVP_CIPHER_CTX_free(ptr->pointer); |
265 | 0 | HASH_DEL(evp_ptrs, ptr); |
266 | 0 | free(ptr); } |
267 | 0 | } |
268 | 0 | pthread_rwlock_destroy(&c->tid_lock); |
269 | 0 | } |
270 | | |
271 | | /* |
272 | | * The life of a pregen thread: |
273 | | * Find empty keystream queues and fill them using their counter. |
274 | | * When done, update counter for the next fill. |
275 | | */ |
276 | | /* previously this used the low level interface which is, sadly, |
277 | | * slower than the EVP interface by a long shot. The original ctx (from the |
278 | | * body of the code) isn't passed in here but we have the key and the counter |
279 | | * which means we should be able to create the exact same ctx and use that to |
280 | | * fill the keystream queues. I'm concerned about additional overhead but the |
281 | | * additional speed from AESNI should make up for it. */ |
282 | | /* The above comment was made when I thought I needed to do a new EVP init for |
283 | | * each counter increment. Turns out not to be the case -cjr 10/15/21*/ |
284 | | |
285 | | static void * |
286 | | thread_loop(void *x) |
287 | 0 | { |
288 | 0 | EVP_CIPHER_CTX *aesni_ctx; |
289 | 0 | struct ssh_aes_ctr_ctx_mt *c = x; |
290 | 0 | struct kq *q; |
291 | 0 | struct aes_mt_ctx_ptrs *ptr; |
292 | 0 | int qidx; |
293 | 0 | pthread_t first_tid; |
294 | 0 | int outlen; |
295 | 0 | u_char mynull[KQLEN * AES_BLOCK_SIZE]; |
296 | 0 | memset(&mynull, 0, KQLEN * AES_BLOCK_SIZE); |
297 | | |
298 | | /* get the thread id to see if this is the first one */ |
299 | 0 | pthread_rwlock_rdlock(&c->tid_lock); |
300 | 0 | first_tid = c->tid[0]; |
301 | 0 | pthread_rwlock_unlock(&c->tid_lock); |
302 | | |
303 | | /* create the context for this thread */ |
304 | 0 | aesni_ctx = EVP_CIPHER_CTX_new(); |
305 | | |
306 | | /* keep track of the pointer for the evp in this struct |
307 | | * so we can free it later. So we place it in a hash indexed on the |
308 | | * thread id, which is available to us in the free function. |
309 | | * Note, the thread id isn't necessary unique across rekeys but |
310 | | * that's okay as they are unique during a key. */ |
311 | 0 | ptr = malloc(sizeof *ptr); /*freed in stop & prejoin */ |
312 | 0 | ptr->tid = pthread_self(); /* index for hash */ |
313 | 0 | ptr->pointer = aesni_ctx; |
314 | 0 | HASH_ADD_INT(evp_ptrs, tid, ptr); |
315 | | |
316 | | /* initialize the cipher ctx with the key provided |
317 | | * determine which cipher to use based on the key size */ |
318 | 0 | if (c->keylen == 256) |
319 | 0 | EVP_EncryptInit_ex(aesni_ctx, EVP_aes_256_ctr(), NULL, c->orig_key, NULL); |
320 | 0 | else if (c->keylen == 128) |
321 | 0 | EVP_EncryptInit_ex(aesni_ctx, EVP_aes_128_ctr(), NULL, c->orig_key, NULL); |
322 | 0 | else if (c->keylen == 192) |
323 | 0 | EVP_EncryptInit_ex(aesni_ctx, EVP_aes_192_ctr(), NULL, c->orig_key, NULL); |
324 | 0 | else { |
325 | 0 | logit("Invalid key length of %d in AES CTR MT. Exiting", c->keylen); |
326 | 0 | exit(1); |
327 | 0 | } |
328 | | |
329 | | /* |
330 | | * Handle the special case of startup, one thread must fill |
331 | | * the first KQ then mark it as draining. Lock held throughout. |
332 | | */ |
333 | | |
334 | 0 | if (pthread_equal(pthread_self(), first_tid)) { |
335 | | /* get the first element of the keyque struct */ |
336 | 0 | q = &c->q[0]; |
337 | 0 | pthread_mutex_lock(&q->lock); |
338 | | /* if we are in the INIT state then fill the queue */ |
339 | 0 | if (q->qstate == KQINIT) { |
340 | | /* set the initial counter */ |
341 | 0 | EVP_EncryptInit_ex(aesni_ctx, NULL, NULL, NULL, q->ctr); |
342 | | |
343 | | /* encypher a block sized null string (mynull) with the key. This |
344 | | * returns the keystream because xoring the keystream |
345 | | * against null returns the keystream. Store that in the appropriate queue */ |
346 | 0 | EVP_EncryptUpdate(aesni_ctx, q->keys[0], &outlen, mynull, KQLEN * AES_BLOCK_SIZE); |
347 | | |
348 | | /* add the number of blocks creates to the aes counter */ |
349 | 0 | ssh_ctr_add(q->ctr, KQLEN * numkq, AES_BLOCK_SIZE); |
350 | 0 | q->qstate = KQDRAINING; |
351 | 0 | pthread_cond_broadcast(&q->cond); |
352 | 0 | } |
353 | 0 | pthread_mutex_unlock(&q->lock); |
354 | 0 | } |
355 | | |
356 | | /* |
357 | | * Normal case is to find empty queues and fill them, skipping over |
358 | | * queues already filled by other threads and stopping to wait for |
359 | | * a draining queue to become empty. |
360 | | * |
361 | | * Multiple threads may be waiting on a draining queue and awoken |
362 | | * when empty. The first thread to wake will mark it as filling, |
363 | | * others will move on to fill, skip, or wait on the next queue. |
364 | | */ |
365 | 0 | for (qidx = 1;; qidx = (qidx + 1) % numkq) { |
366 | | /* Check if I was cancelled, also checked in cond_wait */ |
367 | 0 | pthread_testcancel(); |
368 | | |
369 | | /* Check if we should exit as well */ |
370 | 0 | thread_loop_check_exit(c); |
371 | | |
372 | | /* Lock queue and block if its draining */ |
373 | 0 | q = &c->q[qidx]; |
374 | 0 | pthread_mutex_lock(&q->lock); |
375 | 0 | pthread_cleanup_push(thread_loop_cleanup, &q->lock); |
376 | 0 | while (q->qstate == KQDRAINING || q->qstate == KQINIT) { |
377 | 0 | thread_loop_check_exit(c); |
378 | 0 | pthread_cond_wait(&q->cond, &q->lock); |
379 | 0 | } |
380 | 0 | pthread_cleanup_pop(0); |
381 | | |
382 | | /* If filling or full, somebody else got it, skip */ |
383 | 0 | if (q->qstate != KQEMPTY) { |
384 | 0 | pthread_mutex_unlock(&q->lock); |
385 | 0 | continue; |
386 | 0 | } |
387 | | |
388 | | /* |
389 | | * Empty, let's fill it. |
390 | | * Queue lock is relinquished while we do this so others |
391 | | * can see that it's being filled. |
392 | | */ |
393 | 0 | q->qstate = KQFILLING; |
394 | 0 | pthread_cond_broadcast(&q->cond); |
395 | 0 | pthread_mutex_unlock(&q->lock); |
396 | | |
397 | | /* set the initial counter */ |
398 | 0 | EVP_EncryptInit_ex(aesni_ctx, NULL, NULL, NULL, q->ctr); |
399 | | |
400 | | /* see coresponding block above for useful comments */ |
401 | 0 | EVP_EncryptUpdate(aesni_ctx, q->keys[0], &outlen, mynull, KQLEN * AES_BLOCK_SIZE); |
402 | | |
403 | | /* Re-lock, mark full and signal consumer */ |
404 | 0 | pthread_mutex_lock(&q->lock); |
405 | 0 | ssh_ctr_add(q->ctr, KQLEN * numkq, AES_BLOCK_SIZE); |
406 | 0 | q->qstate = KQFULL; |
407 | 0 | pthread_cond_broadcast(&q->cond); |
408 | 0 | pthread_mutex_unlock(&q->lock); |
409 | 0 | } |
410 | | |
411 | 0 | return NULL; |
412 | 0 | } |
413 | | |
414 | | /* this is where the data is actually enciphered and deciphered */ |
415 | | /* this may also benefit from upgrading to the EVP API */ |
416 | | static int |
417 | | ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, |
418 | | size_t len) |
419 | 0 | { |
420 | 0 | typedef union { |
421 | 0 | #ifdef CIPHER_INT128_OK |
422 | 0 | __uint128_t *u128; |
423 | 0 | #endif |
424 | 0 | uint64_t *u64; |
425 | 0 | uint32_t *u32; |
426 | 0 | uint8_t *u8; |
427 | 0 | const uint8_t *cu8; |
428 | 0 | uintptr_t u; |
429 | 0 | } ptrs_t; |
430 | 0 | ptrs_t destp, srcp, bufp; |
431 | 0 | uintptr_t align; |
432 | 0 | struct ssh_aes_ctr_ctx_mt *c; |
433 | 0 | struct kq *q, *oldq; |
434 | 0 | int ridx; |
435 | 0 | u_char *buf; |
436 | |
|
437 | 0 | if (len == 0) |
438 | 0 | return 1; |
439 | 0 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) |
440 | 0 | return 0; |
441 | | |
442 | 0 | q = &c->q[c->qidx]; |
443 | 0 | ridx = c->ridx; |
444 | | |
445 | | /* src already padded to block multiple */ |
446 | 0 | srcp.cu8 = src; |
447 | 0 | destp.u8 = dest; |
448 | 0 | do { /* do until len is 0 */ |
449 | 0 | buf = q->keys[ridx]; |
450 | 0 | bufp.u8 = buf; |
451 | | |
452 | | /* figure out the alignment on the fly */ |
453 | 0 | #ifdef CIPHER_UNALIGNED_OK |
454 | 0 | align = 0; |
455 | | #else |
456 | | align = destp.u | srcp.u | bufp.u; |
457 | | #endif |
458 | | |
459 | | /* xor the src against the key (buf) |
460 | | * different systems can do all 16 bytes at once or |
461 | | * may need to do it in 8 or 4 bytes chunks |
462 | | * worst case is doing it as a loop */ |
463 | 0 | #ifdef CIPHER_INT128_OK |
464 | | /* with GCC 13 we have having consistent seg faults |
465 | | * in this section of code. Since this is a critical |
466 | | * code path we are removing this until we have a solution |
467 | | * in place -cjr 02/22/24 |
468 | | * TODO: FIX THIS |
469 | | */ |
470 | | /* if ((align & 0xf) == 0) { */ |
471 | | /* destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; */ |
472 | | /* } else */ |
473 | 0 | #endif |
474 | | /* 64 bits */ |
475 | 0 | if ((align & 0x7) == 0) { |
476 | 0 | destp.u64[0] = srcp.u64[0] ^ bufp.u64[0]; |
477 | 0 | destp.u64[1] = srcp.u64[1] ^ bufp.u64[1]; |
478 | | /* 32 bits */ |
479 | 0 | } else if ((align & 0x3) == 0) { |
480 | 0 | destp.u32[0] = srcp.u32[0] ^ bufp.u32[0]; |
481 | 0 | destp.u32[1] = srcp.u32[1] ^ bufp.u32[1]; |
482 | 0 | destp.u32[2] = srcp.u32[2] ^ bufp.u32[2]; |
483 | 0 | destp.u32[3] = srcp.u32[3] ^ bufp.u32[3]; |
484 | 0 | } else { |
485 | | /*1 byte at a time*/ |
486 | 0 | size_t i; |
487 | 0 | for (i = 0; i < AES_BLOCK_SIZE; ++i) |
488 | 0 | dest[i] = src[i] ^ buf[i]; |
489 | 0 | } |
490 | | |
491 | | /* inc/decrement the pointers by the block size (16)*/ |
492 | 0 | destp.u += AES_BLOCK_SIZE; |
493 | 0 | srcp.u += AES_BLOCK_SIZE; |
494 | | |
495 | | /* Increment read index, switch queues on rollover */ |
496 | 0 | if ((ridx = (ridx + 1) % KQLEN) == 0) { |
497 | 0 | oldq = q; |
498 | | |
499 | | /* Mark next queue draining, may need to wait */ |
500 | 0 | c->qidx = (c->qidx + 1) % numkq; |
501 | 0 | q = &c->q[c->qidx]; |
502 | 0 | pthread_mutex_lock(&q->lock); |
503 | 0 | while (q->qstate != KQFULL) { |
504 | 0 | pthread_cond_wait(&q->cond, &q->lock); |
505 | 0 | } |
506 | 0 | q->qstate = KQDRAINING; |
507 | 0 | pthread_cond_broadcast(&q->cond); |
508 | 0 | pthread_mutex_unlock(&q->lock); |
509 | | |
510 | | /* Mark consumed queue empty and signal producers */ |
511 | 0 | pthread_mutex_lock(&oldq->lock); |
512 | 0 | oldq->qstate = KQEMPTY; |
513 | 0 | pthread_cond_broadcast(&oldq->cond); |
514 | 0 | pthread_mutex_unlock(&oldq->lock); |
515 | 0 | } |
516 | 0 | } while (len -= AES_BLOCK_SIZE); |
517 | 0 | c->ridx = ridx; |
518 | 0 | return 1; |
519 | 0 | } |
520 | | |
521 | | static int |
522 | | ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv, |
523 | | int enc) |
524 | 0 | { |
525 | 0 | struct ssh_aes_ctr_ctx_mt *c; |
526 | 0 | int i; |
527 | |
|
528 | 0 | char *aes_threads = getenv("SSH_CIPHER_THREADS"); |
529 | 0 | if (aes_threads != NULL && strlen(aes_threads) != 0) |
530 | 0 | cipher_threads = atoi(aes_threads); |
531 | 0 | else |
532 | 0 | cipher_threads = 1; |
533 | |
|
534 | 0 | if (cipher_threads < 1) |
535 | 0 | cipher_threads = 1; |
536 | |
|
537 | 0 | if (cipher_threads > MAX_THREADS) |
538 | 0 | cipher_threads = MAX_THREADS; |
539 | |
|
540 | 0 | numkq = cipher_threads + 1; |
541 | |
|
542 | 0 | if (numkq > MAX_NUMKQ) |
543 | 0 | numkq = MAX_NUMKQ; |
544 | |
|
545 | 0 | debug("Starting %d threads and %d queues\n", cipher_threads, numkq); |
546 | | |
547 | | /* set up the initial state of c (our cipher stream struct) */ |
548 | 0 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) { |
549 | 0 | c = xmalloc(sizeof(*c)); |
550 | 0 | pthread_rwlock_init(&c->tid_lock, NULL); |
551 | | #ifdef __APPLE__ |
552 | | pthread_rwlock_init(&c->stop_lock, NULL); |
553 | | c->exit_flag = FALSE; |
554 | | #endif /* __APPLE__ */ |
555 | |
|
556 | 0 | c->state = HAVE_NONE; |
557 | | |
558 | | /* initialize the mutexs and conditions for each lock in our struct */ |
559 | 0 | for (i = 0; i < numkq; i++) { |
560 | 0 | pthread_mutex_init(&c->q[i].lock, NULL); |
561 | 0 | pthread_cond_init(&c->q[i].cond, NULL); |
562 | 0 | } |
563 | | |
564 | | /* attach our struct to the context */ |
565 | 0 | EVP_CIPHER_CTX_set_app_data(ctx, c); |
566 | 0 | } |
567 | | |
568 | | /* we are initializing but the current structure already |
569 | | has an IV and key so we want to kill the existing key data |
570 | | and start over. This is important when we need to rekey the data stream */ |
571 | 0 | if (c->state == (HAVE_KEY | HAVE_IV)) { |
572 | | /* tell the pregen threads to exit */ |
573 | 0 | stop_and_join_pregen_threads(c); |
574 | |
|
575 | | #ifdef __APPLE__ |
576 | | /* reset the exit flag */ |
577 | | c->exit_flag = FALSE; |
578 | | #endif /* __APPLE__ */ |
579 | | |
580 | | /* Start over getting key & iv */ |
581 | 0 | c->state = HAVE_NONE; |
582 | 0 | } |
583 | | |
584 | | /* set the initial key for this key stream queue */ |
585 | 0 | if (key != NULL) { |
586 | 0 | AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8, |
587 | 0 | &c->aes_key); |
588 | 0 | c->orig_key = key; |
589 | 0 | c->keylen = EVP_CIPHER_CTX_key_length(ctx) * 8; |
590 | 0 | c->state |= HAVE_KEY; |
591 | 0 | } |
592 | | |
593 | | /* set the IV */ |
594 | 0 | if (iv != NULL) { |
595 | | /* init the counter this is just a 16byte uchar */ |
596 | 0 | memcpy(c->aes_counter, iv, AES_BLOCK_SIZE); |
597 | 0 | c->state |= HAVE_IV; |
598 | 0 | } |
599 | |
|
600 | 0 | if (c->state == (HAVE_KEY | HAVE_IV)) { |
601 | | /* Clear queues */ |
602 | | /* set the first key in the key queue to the current counter */ |
603 | 0 | memcpy(c->q[0].ctr, c->aes_counter, AES_BLOCK_SIZE); |
604 | | /* indicate that it needs to be initialized */ |
605 | 0 | c->q[0].qstate = KQINIT; |
606 | | /* for each of the remaining queues set the first counter to the |
607 | | * counter and then add the size of the queue to the counter */ |
608 | 0 | for (i = 1; i < numkq; i++) { |
609 | 0 | memcpy(c->q[i].ctr, c->aes_counter, AES_BLOCK_SIZE); |
610 | 0 | ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE); |
611 | 0 | c->q[i].qstate = KQEMPTY; |
612 | 0 | } |
613 | 0 | c->qidx = 0; |
614 | 0 | c->ridx = 0; |
615 | 0 | c->struct_id = global_struct_id++; |
616 | | |
617 | | |
618 | | /* Start threads */ |
619 | 0 | #define STACK_SIZE (1024 * 1024) |
620 | 0 | pthread_attr_t attr; |
621 | 0 | pthread_attr_init(&attr); |
622 | 0 | pthread_attr_setstacksize(&attr, STACK_SIZE); |
623 | 0 | for (i = 0; i < cipher_threads; i++) { |
624 | 0 | pthread_rwlock_wrlock(&c->tid_lock); |
625 | 0 | if (pthread_create(&c->tid[i], &attr, thread_loop, c) != 0) |
626 | 0 | fatal ("AES-CTR MT Could not create thread in %s", __FUNCTION__); |
627 | | /*should die here */ |
628 | 0 | else { |
629 | 0 | c->id[i] = i; |
630 | 0 | debug ("AES-CTR MT spawned a thread with id %lu in %s (%lu, %d)", |
631 | 0 | c->tid[i], __FUNCTION__, c->struct_id, c->id[i]); |
632 | 0 | } |
633 | 0 | pthread_rwlock_unlock(&c->tid_lock); |
634 | 0 | } |
635 | 0 | pthread_mutex_lock(&c->q[0].lock); |
636 | | // wait for all of the threads to be initialized |
637 | 0 | while (c->q[0].qstate == KQINIT) |
638 | 0 | pthread_cond_wait(&c->q[0].cond, &c->q[0].lock); |
639 | 0 | pthread_mutex_unlock(&c->q[0].lock); |
640 | 0 | } |
641 | 0 | return 1; |
642 | 0 | } |
643 | | |
644 | | static int |
645 | | ssh_aes_ctr_cleanup(EVP_CIPHER_CTX *ctx) |
646 | 0 | { |
647 | 0 | struct ssh_aes_ctr_ctx_mt *c; |
648 | |
|
649 | 0 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) { |
650 | 0 | stop_and_join_pregen_threads(c); |
651 | |
|
652 | 0 | memset(c, 0, sizeof(*c)); |
653 | 0 | free(c); |
654 | 0 | EVP_CIPHER_CTX_set_app_data(ctx, NULL); |
655 | 0 | } |
656 | 0 | return 1; |
657 | 0 | } |
658 | | |
659 | | /* <friedl> */ |
660 | | const EVP_CIPHER * |
661 | | evp_aes_ctr_mt(void) |
662 | 0 | { |
663 | 0 | static EVP_CIPHER *aes_ctr; |
664 | 0 | aes_ctr = EVP_CIPHER_meth_new(NID_undef, 16/*block*/, 16/*key*/); |
665 | 0 | EVP_CIPHER_meth_set_iv_length(aes_ctr, AES_BLOCK_SIZE); |
666 | 0 | EVP_CIPHER_meth_set_init(aes_ctr, ssh_aes_ctr_init); |
667 | 0 | EVP_CIPHER_meth_set_cleanup(aes_ctr, ssh_aes_ctr_cleanup); |
668 | 0 | EVP_CIPHER_meth_set_do_cipher(aes_ctr, ssh_aes_ctr); |
669 | 0 | # ifndef SSH_OLD_EVP |
670 | 0 | EVP_CIPHER_meth_set_flags(aes_ctr, EVP_CIPH_CBC_MODE |
671 | 0 | | EVP_CIPH_VARIABLE_LENGTH |
672 | 0 | | EVP_CIPH_ALWAYS_CALL_INIT |
673 | 0 | | EVP_CIPH_CUSTOM_IV); |
674 | 0 | # endif /*SSH_OLD_EVP*/ |
675 | 0 | return aes_ctr; |
676 | 0 | } |
677 | | #endif /* OSSL Check */ |