Coverage Report

Created: 2024-06-18 06:23

/src/hpn-ssh/cipher-ctr-mt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * OpenSSH Multi-threaded AES-CTR Cipher
3
 *
4
 * Author: Benjamin Bennett <ben@psc.edu>
5
 * Author: Mike Tasota <tasota@gmail.com>
6
 * Author: Chris Rapier <rapier@psc.edu>
7
 * Copyright (c) 2008-2021 Pittsburgh Supercomputing Center. All rights reserved.
8
 *
9
 * Based on original OpenSSH AES-CTR cipher. Small portions remain unchanged,
10
 * Copyright (c) 2003 Markus Friedl <markus@openbsd.org>
11
 *
12
 * Permission to use, copy, modify, and distribute this software for any
13
 * purpose with or without fee is hereby granted, provided that the above
14
 * copyright notice and this permission notice appear in all copies.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
17
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
18
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
19
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
21
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
22
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23
 */
24
#include "includes.h"
25
26
#if defined(WITH_OPENSSL) && !defined(WITH_OPENSSL3)
27
#include <sys/types.h>
28
29
#include <stdarg.h>
30
#include <string.h>
31
32
#include <openssl/evp.h>
33
34
#include "xmalloc.h"
35
#include "log.h"
36
#include <unistd.h>
37
#include "uthash.h"
38
39
/* compatibility with old or broken OpenSSL versions */
40
#include "openbsd-compat/openssl-compat.h"
41
42
#ifndef USE_BUILTIN_RIJNDAEL
43
#include <openssl/aes.h>
44
#endif
45
46
#include <pthread.h>
47
48
#ifdef __APPLE__
49
#include <sys/types.h>
50
#include <sys/sysctl.h>
51
#endif
52
53
/* note regarding threads and queues */
54
/* initially this cipher was written in a way that
55
 * the key stream was generated in a per cipher block
56
 * loop. For example, if the key stream queue length was
57
 * 16k and the cipher block size was 16 bytes it would
58
 * fill the queue 16 bytes at a time. Mitch Dorrell pointed
59
 * out that we could fill the queue in once call eliminating
60
 * loop and multiple calls to EVP_EncryptUpdate. Doing so
61
 * dramatically reduced CPU load in the threads and indicated
62
 * that we could also eliminate most of the threads and queues
63
 * as it would take far less time for a queue to ebter KQ_FULL
64
 * state. As such, we've reduced the default number of threads
65
 * and queues from 2 and 8 (respectively) to 1 and 2. We've also
66
 * elimnated the need to determine the physical number of cores on
67
 * the system and, if the user desires, can spin up more threads
68
 * using an environment variable. Additionally, queues is now fixed
69
 * at thread_count + 1.
70
 * cjr 10/19/2022 */
71
72
/*-------------------- TUNABLES --------------------*/
73
/* maximum number of threads and queues */
74
0
#define MAX_THREADS      4
75
0
#define MAX_NUMKQ        (MAX_THREADS + 1)
76
77
/* Number of pregen threads to use */
78
/* this is a default value. The actual number is
79
 * determined during init as a function of the number
80
 * of available cores */
81
int cipher_threads = 1;
82
83
/* Number of keystream queues */
84
/* ideally this should be large enough so that there is
85
 * always a key queue for a thread to work on
86
 * so maybe double of the number of threads. Again this
87
 * is a default and the actual value is determined in init*/
88
int numkq = 2;
89
90
/* Length of a keystream queue */
91
/* one queue holds 512KB (1024 * 32 * 16) of key data
92
 * being that the queues are destroyed after a rekey
93
 * and at leats one has to be fully filled prior to
94
 * enciphering data we don't want this to be too large */
95
0
#define KQLEN (1024 * 32)
96
97
/* Processor cacheline length */
98
#define CACHELINE_LEN 64
99
100
/* Can the system do unaligned loads natively? */
101
#if defined(__aarch64__) || \
102
    defined(__i386__)    || \
103
    defined(__powerpc__) || \
104
    defined(__x86_64__)
105
# define CIPHER_UNALIGNED_OK
106
#endif
107
#if defined(__SIZEOF_INT128__)
108
# define CIPHER_INT128_OK
109
#endif
110
/*-------------------- END TUNABLES --------------------*/
111
112
0
#define HAVE_NONE       0
113
0
#define HAVE_KEY        1
114
0
#define HAVE_IV         2
115
int X = 0;
116
117
const EVP_CIPHER *evp_aes_ctr_mt(void);
118
119
/* Keystream Queue state */
120
enum {
121
  KQINIT,
122
  KQEMPTY,
123
  KQFILLING,
124
  KQFULL,
125
  KQDRAINING
126
};
127
128
/* Keystream Queue struct */
129
struct kq {
130
  u_char    keys[KQLEN][AES_BLOCK_SIZE]; /* [32768][16B] */
131
  u_char    ctr[AES_BLOCK_SIZE]; /* 16B */
132
  u_char          pad0[CACHELINE_LEN];
133
  pthread_mutex_t lock;
134
  pthread_cond_t  cond;
135
  int             qstate;
136
  u_char          pad1[CACHELINE_LEN];
137
};
138
139
/* Context struct */
140
struct ssh_aes_ctr_ctx_mt
141
{
142
  long unsigned int struct_id;
143
  int               keylen;
144
  int     state;
145
  int     qidx;
146
  int     ridx;
147
  int               id[MAX_THREADS]; /* 32 */
148
  AES_KEY           aes_key;
149
  const u_char     *orig_key;
150
  u_char      aes_counter[AES_BLOCK_SIZE]; /* 16B */
151
  pthread_t   tid[MAX_THREADS]; /* 32 */
152
  pthread_rwlock_t  tid_lock;
153
  struct kq   q[MAX_NUMKQ]; /* 33 */
154
#ifdef __APPLE__
155
  pthread_rwlock_t  stop_lock;
156
  int     exit_flag;
157
#endif /* __APPLE__ */
158
};
159
160
/* this defines the hash and elements of evp context pointers
161
 * that are created in thread_loop. We use this to clear and
162
 * free the contexts in stop_and_prejoin
163
 */
164
struct aes_mt_ctx_ptrs {
165
  pthread_t       tid;
166
  EVP_CIPHER_CTX *pointer; /* 32 */
167
  UT_hash_handle hh;
168
};
169
170
/* globals */
171
/* how we increment the id the structs we create */
172
long unsigned int global_struct_id = 0;
173
174
/* keep a copy of the pointers created in thread_loop to free later */
175
struct aes_mt_ctx_ptrs *evp_ptrs = NULL;
176
177
/*
178
 * Add num to counter 'ctr'
179
 */
180
static void
181
ssh_ctr_add(u_char *ctr, uint32_t num, u_int len)
182
0
{
183
0
  int i;
184
0
  uint16_t n;
185
186
0
  for (n = 0, i = len - 1; i >= 0 && (num || n); i--) {
187
0
    n = ctr[i] + (num & 0xff) + n;
188
0
    num >>= 8;
189
0
    ctr[i] = n & 0xff;
190
0
    n >>= 8;
191
0
  }
192
0
}
193
194
/*
195
 * Threads may be cancelled in a pthread_cond_wait, we must free the mutex
196
 */
197
static void
198
thread_loop_cleanup(void *x)
199
0
{
200
0
  pthread_mutex_unlock((pthread_mutex_t *)x);
201
0
}
202
203
#ifdef __APPLE__
204
/* Check if we should exit, we are doing both cancel and exit condition
205
 * since on OSX threads seem to occasionally fail to notice when they have
206
 * been cancelled. We want to have a backup to make sure that we won't hang
207
 * when the main process join()-s the cancelled thread.
208
 */
209
static void
210
thread_loop_check_exit(struct ssh_aes_ctr_ctx_mt *c)
211
{
212
  int exit_flag;
213
214
  pthread_rwlock_rdlock(&c->stop_lock);
215
  exit_flag = c->exit_flag;
216
  pthread_rwlock_unlock(&c->stop_lock);
217
218
  if (exit_flag)
219
    pthread_exit(NULL);
220
}
221
#else
222
# define thread_loop_check_exit(s)
223
#endif /* __APPLE__ */
224
225
/*
226
 * Helper function to terminate the helper threads
227
 */
228
static void
229
stop_and_join_pregen_threads(struct ssh_aes_ctr_ctx_mt *c)
230
0
{
231
0
  int i;
232
233
#ifdef __APPLE__
234
  /* notify threads that they should exit */
235
  pthread_rwlock_wrlock(&c->stop_lock);
236
  c->exit_flag = TRUE;
237
  pthread_rwlock_unlock(&c->stop_lock);
238
#endif /* __APPLE__ */
239
240
  /* Cancel pregen threads */
241
0
  for (i = 0; i < cipher_threads; i++) {
242
0
    debug ("Canceled %lu (%lu,%d)", c->tid[i], c->struct_id, c->id[i]);
243
0
    pthread_cancel(c->tid[i]);
244
0
  }
245
0
        for (i = 0; i < numkq; i++) {
246
0
                pthread_mutex_lock(&c->q[i].lock);
247
0
                pthread_cond_broadcast(&c->q[i].cond);
248
0
                pthread_mutex_unlock(&c->q[i].lock);
249
0
        }
250
0
  for (i = 0; i < cipher_threads; i++) {
251
0
    if (pthread_kill(c->tid[i], 0) != 0)
252
0
      debug3("AES-CTR MT pthread_join failure: Invalid thread id %lu in %s",
253
0
             c->tid[i], __FUNCTION__);
254
0
    else {
255
0
      debug ("Joining %lu (%lu, %d)", c->tid[i], c->struct_id, c->id[i]);
256
0
      pthread_mutex_destroy(&c->q[i].lock);
257
0
                        pthread_cond_destroy(&c->q[i].cond);
258
0
                        pthread_join(c->tid[i], NULL);
259
      /* this finds the entry in the hash that corresponding to the
260
       * thread id. That's used to find the pointer to the cipher struct
261
       * created in thread_loop. */
262
0
      struct aes_mt_ctx_ptrs *ptr;
263
0
      HASH_FIND_INT(evp_ptrs, &c->tid[i], ptr);
264
0
      EVP_CIPHER_CTX_free(ptr->pointer);
265
0
      HASH_DEL(evp_ptrs, ptr);
266
0
      free(ptr);              }
267
0
        }
268
0
  pthread_rwlock_destroy(&c->tid_lock);
269
0
}
270
271
/*
272
 * The life of a pregen thread:
273
 *    Find empty keystream queues and fill them using their counter.
274
 *    When done, update counter for the next fill.
275
 */
276
/* previously this used the low level interface which is, sadly,
277
 * slower than the EVP interface by a long shot. The original ctx (from the
278
 * body of the code) isn't passed in here but we have the key and the counter
279
 * which means we should be able to create the exact same ctx and use that to
280
 * fill the keystream queues. I'm concerned about additional overhead but the
281
 * additional speed from AESNI should make up for it.  */
282
/* The above comment was made when I thought I needed to do a new EVP init for
283
 * each counter increment. Turns out not to be the case -cjr 10/15/21*/
284
285
static void *
286
thread_loop(void *x)
287
0
{
288
0
  EVP_CIPHER_CTX *aesni_ctx;
289
0
  struct ssh_aes_ctr_ctx_mt *c = x;
290
0
  struct kq *q;
291
0
  struct aes_mt_ctx_ptrs *ptr;
292
0
  int qidx;
293
0
  pthread_t first_tid;
294
0
  int outlen;
295
0
  u_char mynull[KQLEN * AES_BLOCK_SIZE];
296
0
  memset(&mynull, 0, KQLEN * AES_BLOCK_SIZE);
297
298
  /* get the thread id to see if this is the first one */
299
0
  pthread_rwlock_rdlock(&c->tid_lock);
300
0
  first_tid = c->tid[0];
301
0
  pthread_rwlock_unlock(&c->tid_lock);
302
303
  /* create the context for this thread */
304
0
  aesni_ctx = EVP_CIPHER_CTX_new();
305
306
  /* keep track of the pointer for the evp in this struct
307
   * so we can free it later. So we place it in a hash indexed on the
308
   * thread id, which is available to us in the free function.
309
   * Note, the thread id isn't necessary unique across rekeys but
310
   * that's okay as they are unique during a key. */
311
0
  ptr = malloc(sizeof *ptr); /*freed in stop & prejoin */
312
0
  ptr->tid = pthread_self(); /* index for hash */
313
0
  ptr->pointer = aesni_ctx;
314
0
  HASH_ADD_INT(evp_ptrs, tid, ptr);
315
316
  /* initialize the cipher ctx with the key provided
317
   * determine which cipher to use based on the key size */
318
0
  if (c->keylen == 256)
319
0
    EVP_EncryptInit_ex(aesni_ctx, EVP_aes_256_ctr(), NULL, c->orig_key, NULL);
320
0
  else if (c->keylen == 128)
321
0
    EVP_EncryptInit_ex(aesni_ctx, EVP_aes_128_ctr(), NULL, c->orig_key, NULL);
322
0
  else if (c->keylen == 192)
323
0
    EVP_EncryptInit_ex(aesni_ctx, EVP_aes_192_ctr(), NULL, c->orig_key, NULL);
324
0
  else {
325
0
    logit("Invalid key length of %d in AES CTR MT. Exiting", c->keylen);
326
0
    exit(1);
327
0
  }
328
329
  /*
330
   * Handle the special case of startup, one thread must fill
331
   * the first KQ then mark it as draining. Lock held throughout.
332
   */
333
334
0
  if (pthread_equal(pthread_self(), first_tid)) {
335
    /* get the first element of the keyque struct */
336
0
    q = &c->q[0];
337
0
    pthread_mutex_lock(&q->lock);
338
    /* if we are in the INIT state then fill the queue */
339
0
    if (q->qstate == KQINIT) {
340
      /* set the initial counter */
341
0
      EVP_EncryptInit_ex(aesni_ctx, NULL, NULL, NULL, q->ctr);
342
343
      /* encypher a block sized null string (mynull) with the key. This
344
       * returns the keystream because xoring the keystream
345
       * against null returns the keystream. Store that in the appropriate queue */
346
0
      EVP_EncryptUpdate(aesni_ctx, q->keys[0], &outlen, mynull, KQLEN * AES_BLOCK_SIZE);
347
348
      /* add the number of blocks creates to the aes counter */
349
0
      ssh_ctr_add(q->ctr, KQLEN * numkq, AES_BLOCK_SIZE);
350
0
      q->qstate = KQDRAINING;
351
0
      pthread_cond_broadcast(&q->cond);
352
0
    }
353
0
    pthread_mutex_unlock(&q->lock);
354
0
  }
355
356
  /*
357
   * Normal case is to find empty queues and fill them, skipping over
358
   * queues already filled by other threads and stopping to wait for
359
   * a draining queue to become empty.
360
   *
361
   * Multiple threads may be waiting on a draining queue and awoken
362
   * when empty.  The first thread to wake will mark it as filling,
363
   * others will move on to fill, skip, or wait on the next queue.
364
   */
365
0
  for (qidx = 1;; qidx = (qidx + 1) % numkq) {
366
    /* Check if I was cancelled, also checked in cond_wait */
367
0
    pthread_testcancel();
368
369
    /* Check if we should exit as well */
370
0
    thread_loop_check_exit(c);
371
372
    /* Lock queue and block if its draining */
373
0
    q = &c->q[qidx];
374
0
    pthread_mutex_lock(&q->lock);
375
0
    pthread_cleanup_push(thread_loop_cleanup, &q->lock);
376
0
    while (q->qstate == KQDRAINING || q->qstate == KQINIT) {
377
0
      thread_loop_check_exit(c);
378
0
      pthread_cond_wait(&q->cond, &q->lock);
379
0
    }
380
0
    pthread_cleanup_pop(0);
381
382
    /* If filling or full, somebody else got it, skip */
383
0
    if (q->qstate != KQEMPTY) {
384
0
      pthread_mutex_unlock(&q->lock);
385
0
      continue;
386
0
    }
387
388
    /*
389
     * Empty, let's fill it.
390
     * Queue lock is relinquished while we do this so others
391
     * can see that it's being filled.
392
     */
393
0
    q->qstate = KQFILLING;
394
0
    pthread_cond_broadcast(&q->cond);
395
0
    pthread_mutex_unlock(&q->lock);
396
397
    /* set the initial counter */
398
0
    EVP_EncryptInit_ex(aesni_ctx, NULL, NULL, NULL, q->ctr);
399
400
    /* see coresponding block above for useful comments */
401
0
    EVP_EncryptUpdate(aesni_ctx, q->keys[0], &outlen, mynull, KQLEN * AES_BLOCK_SIZE);
402
403
    /* Re-lock, mark full and signal consumer */
404
0
    pthread_mutex_lock(&q->lock);
405
0
    ssh_ctr_add(q->ctr, KQLEN * numkq, AES_BLOCK_SIZE);
406
0
    q->qstate = KQFULL;
407
0
    pthread_cond_broadcast(&q->cond);
408
0
    pthread_mutex_unlock(&q->lock);
409
0
  }
410
411
0
  return NULL;
412
0
}
413
414
/* this is where the data is actually enciphered and deciphered */
415
/* this may also benefit from upgrading to the EVP API */
416
static int
417
ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
418
    size_t len)
419
0
{
420
0
  typedef union {
421
0
#ifdef CIPHER_INT128_OK
422
0
    __uint128_t *u128;
423
0
#endif
424
0
    uint64_t *u64;
425
0
    uint32_t *u32;
426
0
    uint8_t *u8;
427
0
    const uint8_t *cu8;
428
0
    uintptr_t u;
429
0
  } ptrs_t;
430
0
  ptrs_t destp, srcp, bufp;
431
0
  uintptr_t align;
432
0
  struct ssh_aes_ctr_ctx_mt *c;
433
0
  struct kq *q, *oldq;
434
0
  int ridx;
435
0
  u_char *buf;
436
437
0
  if (len == 0)
438
0
    return 1;
439
0
  if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL)
440
0
    return 0;
441
442
0
  q = &c->q[c->qidx];
443
0
  ridx = c->ridx;
444
445
  /* src already padded to block multiple */
446
0
  srcp.cu8 = src;
447
0
  destp.u8 = dest;
448
0
  do { /* do until len is 0 */
449
0
    buf = q->keys[ridx];
450
0
    bufp.u8 = buf;
451
452
    /* figure out the alignment on the fly */
453
0
#ifdef CIPHER_UNALIGNED_OK
454
0
    align = 0;
455
#else
456
    align = destp.u | srcp.u | bufp.u;
457
#endif
458
459
    /* xor the src against the key (buf)
460
     * different systems can do all 16 bytes at once or
461
     * may need to do it in 8 or 4 bytes chunks
462
     * worst case is doing it as a loop */
463
0
#ifdef CIPHER_INT128_OK
464
    /* with GCC 13 we have having consistent seg faults
465
     * in this section of code. Since this is a critical
466
     * code path we are removing this until we have a solution
467
     * in place -cjr 02/22/24
468
     * TODO: FIX THIS
469
     */
470
    /* if ((align & 0xf) == 0) { */
471
    /*  destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; */
472
    /* } else */
473
0
#endif
474
    /* 64 bits */
475
0
    if ((align & 0x7) == 0) {
476
0
      destp.u64[0] = srcp.u64[0] ^ bufp.u64[0];
477
0
      destp.u64[1] = srcp.u64[1] ^ bufp.u64[1];
478
    /* 32 bits */
479
0
    } else if ((align & 0x3) == 0) {
480
0
      destp.u32[0] = srcp.u32[0] ^ bufp.u32[0];
481
0
      destp.u32[1] = srcp.u32[1] ^ bufp.u32[1];
482
0
      destp.u32[2] = srcp.u32[2] ^ bufp.u32[2];
483
0
      destp.u32[3] = srcp.u32[3] ^ bufp.u32[3];
484
0
    } else {
485
      /*1 byte at a time*/
486
0
      size_t i;
487
0
      for (i = 0; i < AES_BLOCK_SIZE; ++i)
488
0
        dest[i] = src[i] ^ buf[i];
489
0
    }
490
491
    /* inc/decrement the pointers by the block size (16)*/
492
0
    destp.u += AES_BLOCK_SIZE;
493
0
    srcp.u += AES_BLOCK_SIZE;
494
495
    /* Increment read index, switch queues on rollover */
496
0
    if ((ridx = (ridx + 1) % KQLEN) == 0) {
497
0
      oldq = q;
498
499
      /* Mark next queue draining, may need to wait */
500
0
      c->qidx = (c->qidx + 1) % numkq;
501
0
      q = &c->q[c->qidx];
502
0
      pthread_mutex_lock(&q->lock);
503
0
      while (q->qstate != KQFULL) {
504
0
        pthread_cond_wait(&q->cond, &q->lock);
505
0
      }
506
0
      q->qstate = KQDRAINING;
507
0
      pthread_cond_broadcast(&q->cond);
508
0
      pthread_mutex_unlock(&q->lock);
509
510
      /* Mark consumed queue empty and signal producers */
511
0
      pthread_mutex_lock(&oldq->lock);
512
0
      oldq->qstate = KQEMPTY;
513
0
      pthread_cond_broadcast(&oldq->cond);
514
0
      pthread_mutex_unlock(&oldq->lock);
515
0
    }
516
0
  } while (len -= AES_BLOCK_SIZE);
517
0
  c->ridx = ridx;
518
0
  return 1;
519
0
}
520
521
static int
522
ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv,
523
    int enc)
524
0
{
525
0
  struct ssh_aes_ctr_ctx_mt *c;
526
0
  int i;
527
528
0
  char *aes_threads = getenv("SSH_CIPHER_THREADS");
529
0
        if (aes_threads != NULL && strlen(aes_threads) != 0)
530
0
    cipher_threads = atoi(aes_threads);
531
0
  else
532
0
    cipher_threads = 1;
533
534
0
  if (cipher_threads < 1)
535
0
    cipher_threads = 1;
536
537
0
  if (cipher_threads > MAX_THREADS)
538
0
    cipher_threads = MAX_THREADS;
539
540
0
  numkq = cipher_threads + 1;
541
542
0
  if (numkq > MAX_NUMKQ)
543
0
    numkq = MAX_NUMKQ;
544
545
0
  debug("Starting %d threads and %d queues\n", cipher_threads, numkq);
546
547
  /* set up the initial state of c (our cipher stream struct) */
548
0
  if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) {
549
0
    c = xmalloc(sizeof(*c));
550
0
    pthread_rwlock_init(&c->tid_lock, NULL);
551
#ifdef __APPLE__
552
    pthread_rwlock_init(&c->stop_lock, NULL);
553
    c->exit_flag = FALSE;
554
#endif /* __APPLE__ */
555
556
0
    c->state = HAVE_NONE;
557
558
    /* initialize the mutexs and conditions for each lock in our struct */
559
0
    for (i = 0; i < numkq; i++) {
560
0
      pthread_mutex_init(&c->q[i].lock, NULL);
561
0
      pthread_cond_init(&c->q[i].cond, NULL);
562
0
    }
563
564
    /* attach our struct to the context */
565
0
    EVP_CIPHER_CTX_set_app_data(ctx, c);
566
0
  }
567
568
  /* we are initializing but the current structure already
569
     has an IV and key so we want to kill the existing key data
570
     and start over. This is important when we need to rekey the data stream */
571
0
  if (c->state == (HAVE_KEY | HAVE_IV)) {
572
    /* tell the pregen threads to exit */
573
0
    stop_and_join_pregen_threads(c);
574
575
#ifdef __APPLE__
576
    /* reset the exit flag */
577
    c->exit_flag = FALSE;
578
#endif /* __APPLE__ */
579
580
    /* Start over getting key & iv */
581
0
    c->state = HAVE_NONE;
582
0
  }
583
584
  /* set the initial key for this key stream queue */
585
0
  if (key != NULL) {
586
0
    AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
587
0
       &c->aes_key);
588
0
    c->orig_key = key;
589
0
    c->keylen = EVP_CIPHER_CTX_key_length(ctx) * 8;
590
0
    c->state |= HAVE_KEY;
591
0
  }
592
593
  /* set the IV */
594
0
  if (iv != NULL) {
595
    /* init the counter this is just a 16byte uchar */
596
0
    memcpy(c->aes_counter, iv, AES_BLOCK_SIZE);
597
0
    c->state |= HAVE_IV;
598
0
  }
599
600
0
  if (c->state == (HAVE_KEY | HAVE_IV)) {
601
    /* Clear queues */
602
    /* set the first key in the key queue to the current counter */
603
0
    memcpy(c->q[0].ctr, c->aes_counter, AES_BLOCK_SIZE);
604
    /* indicate that it needs to be initialized */
605
0
    c->q[0].qstate = KQINIT;
606
    /* for each of the remaining queues set the first counter to the
607
     * counter and then add the size of the queue to the counter */
608
0
    for (i = 1; i < numkq; i++) {
609
0
      memcpy(c->q[i].ctr, c->aes_counter, AES_BLOCK_SIZE);
610
0
      ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE);
611
0
      c->q[i].qstate = KQEMPTY;
612
0
    }
613
0
    c->qidx = 0;
614
0
    c->ridx = 0;
615
0
    c->struct_id = global_struct_id++;
616
617
618
    /* Start threads */
619
0
#define STACK_SIZE (1024 * 1024)
620
0
    pthread_attr_t attr;
621
0
    pthread_attr_init(&attr);
622
0
    pthread_attr_setstacksize(&attr, STACK_SIZE);
623
0
    for (i = 0; i < cipher_threads; i++) {
624
0
      pthread_rwlock_wrlock(&c->tid_lock);
625
0
      if (pthread_create(&c->tid[i], &attr, thread_loop, c) != 0)
626
0
        fatal ("AES-CTR MT Could not create thread in %s", __FUNCTION__);
627
                                /*should die here */
628
0
      else {
629
0
        c->id[i] = i;
630
0
        debug ("AES-CTR MT spawned a thread with id %lu in %s (%lu, %d)",
631
0
               c->tid[i], __FUNCTION__, c->struct_id, c->id[i]);
632
0
      }
633
0
      pthread_rwlock_unlock(&c->tid_lock);
634
0
    }
635
0
    pthread_mutex_lock(&c->q[0].lock);
636
    // wait for all of the threads to be initialized
637
0
    while (c->q[0].qstate == KQINIT)
638
0
      pthread_cond_wait(&c->q[0].cond, &c->q[0].lock);
639
0
    pthread_mutex_unlock(&c->q[0].lock);
640
0
  }
641
0
  return 1;
642
0
}
643
644
static int
645
ssh_aes_ctr_cleanup(EVP_CIPHER_CTX *ctx)
646
0
{
647
0
  struct ssh_aes_ctr_ctx_mt *c;
648
649
0
  if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) {
650
0
    stop_and_join_pregen_threads(c);
651
652
0
    memset(c, 0, sizeof(*c));
653
0
    free(c);
654
0
    EVP_CIPHER_CTX_set_app_data(ctx, NULL);
655
0
  }
656
0
  return 1;
657
0
}
658
659
/* <friedl> */
660
const EVP_CIPHER *
661
evp_aes_ctr_mt(void)
662
0
{
663
0
  static EVP_CIPHER *aes_ctr;
664
0
  aes_ctr = EVP_CIPHER_meth_new(NID_undef, 16/*block*/, 16/*key*/);
665
0
  EVP_CIPHER_meth_set_iv_length(aes_ctr, AES_BLOCK_SIZE);
666
0
  EVP_CIPHER_meth_set_init(aes_ctr, ssh_aes_ctr_init);
667
0
  EVP_CIPHER_meth_set_cleanup(aes_ctr, ssh_aes_ctr_cleanup);
668
0
  EVP_CIPHER_meth_set_do_cipher(aes_ctr, ssh_aes_ctr);
669
0
#  ifndef SSH_OLD_EVP
670
0
  EVP_CIPHER_meth_set_flags(aes_ctr, EVP_CIPH_CBC_MODE
671
0
              | EVP_CIPH_VARIABLE_LENGTH
672
0
              | EVP_CIPH_ALWAYS_CALL_INIT
673
0
              | EVP_CIPH_CUSTOM_IV);
674
0
#  endif /*SSH_OLD_EVP*/
675
0
  return aes_ctr;
676
0
}
677
#endif /* OSSL Check */