Coverage Report

Created: 2024-06-28 06:19

/src/wolfssl/wolfcrypt/src/chacha.c
Line
Count
Source (jump to first uncovered line)
1
/* chacha.c
2
 *
3
 * Copyright (C) 2006-2023 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
/*
22
23
DESCRIPTION
24
This library contains implementation for the ChaCha20 stream cipher.
25
26
Based from chacha-ref.c version 20080118
27
D. J. Bernstein
28
Public domain.
29
30
*/
31
32
#ifdef HAVE_CONFIG_H
33
    #include <config.h>
34
#endif
35
36
#include <wolfssl/wolfcrypt/settings.h>
37
38
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
39
    /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
40
41
#else
42
#if defined(HAVE_CHACHA)
43
44
#include <wolfssl/wolfcrypt/chacha.h>
45
#include <wolfssl/wolfcrypt/error-crypt.h>
46
#include <wolfssl/wolfcrypt/logging.h>
47
#include <wolfssl/wolfcrypt/cpuid.h>
48
#ifdef NO_INLINE
49
    #include <wolfssl/wolfcrypt/misc.h>
50
#else
51
    #define WOLFSSL_MISC_INCLUDED
52
    #include <wolfcrypt/src/misc.c>
53
#endif
54
55
#ifdef CHACHA_AEAD_TEST
56
    #include <stdio.h>
57
#endif
58
59
#ifdef USE_INTEL_CHACHA_SPEEDUP
60
    #include <emmintrin.h>
61
    #include <immintrin.h>
62
63
    #if defined(__GNUC__) && ((__GNUC__ < 4) || \
64
                              (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
65
        #undef  NO_AVX2_SUPPORT
66
        #define NO_AVX2_SUPPORT
67
    #endif
68
    #if defined(__clang__) && ((__clang_major__ < 3) || \
69
                               (__clang_major__ == 3 && __clang_minor__ <= 5))
70
        #undef  NO_AVX2_SUPPORT
71
        #define NO_AVX2_SUPPORT
72
    #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
73
        #undef NO_AVX2_SUPPORT
74
    #endif
75
76
    #ifndef NO_AVX2_SUPPORT
77
        #define HAVE_INTEL_AVX2
78
    #endif
79
80
    static int cpuidFlagsSet = 0;
81
    static word32 cpuidFlags = 0;
82
#endif
83
84
#ifdef BIG_ENDIAN_ORDER
85
    #define LITTLE32(x) ByteReverseWord32(x)
86
#else
87
0
    #define LITTLE32(x) (x)
88
#endif
89
90
/* Number of rounds */
91
0
#define ROUNDS  20
92
93
0
#define U32C(v) (v##U)
94
0
#define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
95
0
#define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
96
97
0
#define ROTATE(v,c) rotlFixed(v, c)
98
#define XOR(v,w)    ((v) ^ (w))
99
0
#define PLUS(v,w)   (U32V((v) + (w)))
100
0
#define PLUSONE(v)  (PLUS((v),1))
101
102
#define QUARTERROUND(a,b,c,d) \
103
0
  x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
104
0
  x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
105
0
  x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
106
0
  x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
107
108
109
/**
110
  * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
111
  * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
112
  */
113
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
114
0
{
115
0
    word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
116
117
118
0
    if (ctx == NULL || inIv == NULL)
119
0
        return BAD_FUNC_ARG;
120
121
0
    XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
122
123
0
    ctx->left = 0; /* resets state */
124
0
    ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;           /* block counter */
125
0
    ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
126
0
    ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
127
0
    ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
128
129
0
    return 0;
130
0
}
131
132
/* "expand 32-byte k" as unsigned 32 byte */
133
static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
134
/* "expand 16-byte k" as unsigned 16 byte */
135
static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
136
137
/**
138
  * Key setup. 8 word iv (nonce)
139
  */
140
int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
141
0
{
142
0
    const word32* constants;
143
0
    const byte*   k;
144
145
#ifdef XSTREAM_ALIGN
146
    word32 alignKey[8];
147
#endif
148
149
0
    if (ctx == NULL || key == NULL)
150
0
        return BAD_FUNC_ARG;
151
152
0
    if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
153
0
        return BAD_FUNC_ARG;
154
155
#ifdef XSTREAM_ALIGN
156
    if ((wc_ptr_t)key % 4) {
157
        WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
158
        XMEMCPY(alignKey, key, keySz);
159
        k = (byte*)alignKey;
160
    }
161
    else {
162
        k = key;
163
    }
164
#else
165
0
    k = key;
166
0
#endif /* XSTREAM_ALIGN */
167
168
#ifdef CHACHA_AEAD_TEST
169
    word32 i;
170
    printf("ChaCha key used :\n");
171
    for (i = 0; i < keySz; i++) {
172
        printf("%02x", key[i]);
173
        if ((i + 1) % 8 == 0)
174
           printf("\n");
175
    }
176
    printf("\n\n");
177
#endif
178
179
0
    ctx->X[4] = U8TO32_LITTLE(k +  0);
180
0
    ctx->X[5] = U8TO32_LITTLE(k +  4);
181
0
    ctx->X[6] = U8TO32_LITTLE(k +  8);
182
0
    ctx->X[7] = U8TO32_LITTLE(k + 12);
183
0
    if (keySz == CHACHA_MAX_KEY_SZ) {
184
0
        k += 16;
185
0
        constants = sigma;
186
0
    }
187
0
    else {
188
0
        constants = tau;
189
0
    }
190
0
    ctx->X[ 8] = U8TO32_LITTLE(k +  0);
191
0
    ctx->X[ 9] = U8TO32_LITTLE(k +  4);
192
0
    ctx->X[10] = U8TO32_LITTLE(k +  8);
193
0
    ctx->X[11] = U8TO32_LITTLE(k + 12);
194
0
    ctx->X[ 0] = constants[0];
195
0
    ctx->X[ 1] = constants[1];
196
0
    ctx->X[ 2] = constants[2];
197
0
    ctx->X[ 3] = constants[3];
198
0
    ctx->left = 0; /* resets state */
199
200
0
    return 0;
201
0
}
202
203
/**
204
  * Converts word into bytes with rotations having been done.
205
  */
206
static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS],
207
        word32 state[CHACHA_CHUNK_WORDS])
208
0
{
209
0
    word32 i;
210
211
0
    XMEMCPY(x, state, CHACHA_CHUNK_BYTES);
212
213
0
    for (i = (ROUNDS); i > 0; i -= 2) {
214
0
        QUARTERROUND(0, 4,  8, 12)
215
0
        QUARTERROUND(1, 5,  9, 13)
216
0
        QUARTERROUND(2, 6, 10, 14)
217
0
        QUARTERROUND(3, 7, 11, 15)
218
0
        QUARTERROUND(0, 5, 10, 15)
219
0
        QUARTERROUND(1, 6, 11, 12)
220
0
        QUARTERROUND(2, 7,  8, 13)
221
0
        QUARTERROUND(3, 4,  9, 14)
222
0
    }
223
224
0
    for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
225
0
        x[i] = PLUS(x[i], state[i]);
226
#ifdef BIG_ENDIAN_ORDER
227
        x[i] = LITTLE32(x[i]);
228
#endif
229
0
    }
230
0
}
231
232
233
#ifdef HAVE_XCHACHA
234
235
/*
236
 * wc_HChacha_block - half a ChaCha block, for XChaCha
237
 *
238
 * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
239
 */
240
static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds)
241
0
{
242
0
    word32 x[CHACHA_CHUNK_WORDS];
243
0
    word32 i;
244
245
0
    for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
246
0
        x[i] = ctx->X[i];
247
0
    }
248
249
0
    for (i = nrounds; i > 0; i -= 2) {
250
0
        QUARTERROUND(0, 4,  8, 12)
251
0
        QUARTERROUND(1, 5,  9, 13)
252
0
        QUARTERROUND(2, 6, 10, 14)
253
0
        QUARTERROUND(3, 7, 11, 15)
254
0
        QUARTERROUND(0, 5, 10, 15)
255
0
        QUARTERROUND(1, 6, 11, 12)
256
0
        QUARTERROUND(2, 7,  8, 13)
257
0
        QUARTERROUND(3, 4,  9, 14)
258
0
    }
259
260
0
    for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
261
0
        stream[i] = x[i];
262
0
    for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
263
0
        stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
264
0
}
265
266
/* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
267
int wc_XChacha_SetKey(ChaCha *ctx,
268
                      const byte *key, word32 keySz,
269
                      const byte *nonce, word32 nonceSz,
270
0
                      word32 counter) {
271
0
    word32 k[CHACHA_MAX_KEY_SZ];
272
0
    byte iv[CHACHA_IV_BYTES];
273
0
    int ret;
274
275
0
    if (nonceSz != XCHACHA_NONCE_BYTES)
276
0
        return BAD_FUNC_ARG;
277
278
0
    if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
279
0
        return ret;
280
281
    /* form a first chacha IV from the first 16 bytes of the nonce.
282
     * the first word is supplied in the "counter" arg, and
283
     * the result is a full 128 bit nonceful IV for the one-time block
284
     * crypto op that follows.
285
     */
286
0
    if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
287
0
        return ret;
288
289
0
    wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
290
291
    /* the HChacha output is used as a 256 bit key for the main cipher. */
292
0
    XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
293
294
    /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
295
     * to form the IV for the main cipher.
296
     */
297
0
    XMEMSET(iv, 0, 4);
298
0
    XMEMCPY(iv + 4, nonce + 16, 8);
299
300
0
    if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
301
0
        return ret;
302
303
0
    ForceZero(k, sizeof k);
304
0
    ForceZero(iv, sizeof iv);
305
306
0
    return 0;
307
0
}
308
309
#endif /* HAVE_XCHACHA */
310
311
312
#ifdef __cplusplus
313
    extern "C" {
314
#endif
315
316
extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
317
                               word32 bytes);
318
extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
319
                                word32 bytes);
320
extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
321
                                word32 bytes);
322
323
#ifdef __cplusplus
324
    }  /* extern "C" */
325
#endif
326
327
328
/**
329
  * Encrypt a stream of bytes
330
  */
331
static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
332
                                    word32 bytes)
333
0
{
334
0
    union {
335
0
        byte state[CHACHA_CHUNK_BYTES];
336
0
        word32 state32[CHACHA_CHUNK_WORDS];
337
0
        wolfssl_word align_word; /* align for xorbufout */
338
0
    } tmp;
339
340
    /* handle left overs */
341
0
    if (bytes > 0 && ctx->left > 0) {
342
0
        word32 processed = min(bytes, ctx->left);
343
0
        wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */
344
0
        xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed);
345
0
        ctx->left -= processed;
346
347
        /* Used up all of the stream that was left, increment the counter */
348
0
        if (ctx->left == 0) {
349
0
            ctx->X[CHACHA_MATRIX_CNT_IV] =
350
0
                                          PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
351
0
        }
352
0
        bytes -= processed;
353
0
        c += processed;
354
0
        m += processed;
355
0
    }
356
357
0
    while (bytes >= CHACHA_CHUNK_BYTES) {
358
0
        wc_Chacha_wordtobyte(tmp.state32, ctx->X);
359
0
        ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
360
0
        xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES);
361
0
        bytes -= CHACHA_CHUNK_BYTES;
362
0
        c += CHACHA_CHUNK_BYTES;
363
0
        m += CHACHA_CHUNK_BYTES;
364
0
    }
365
366
0
    if (bytes) {
367
        /* in this case there will always be some left over since bytes is less
368
         * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
369
         * stream in order for the stream to be recreated on next call */
370
0
        wc_Chacha_wordtobyte(tmp.state32, ctx->X);
371
0
        xorbufout(c, m, tmp.state, bytes);
372
0
        ctx->left = CHACHA_CHUNK_BYTES - bytes;
373
0
    }
374
0
}
375
376
/**
377
  * API to encrypt/decrypt a message of any size.
378
  */
379
int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
380
                      word32 msglen)
381
0
{
382
0
    if (ctx == NULL || input == NULL || output == NULL)
383
0
        return BAD_FUNC_ARG;
384
385
#ifdef USE_INTEL_CHACHA_SPEEDUP
386
    /* handle left overs */
387
    if (msglen > 0 && ctx->left > 0) {
388
        byte*  out;
389
        word32 processed = min(msglen, ctx->left);
390
391
        out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
392
        xorbufout(output, input, out, processed);
393
        ctx->left -= processed;
394
        msglen -= processed;
395
        output += processed;
396
        input += processed;
397
    }
398
399
    if (msglen == 0) {
400
        return 0;
401
    }
402
403
    if (!cpuidFlagsSet) {
404
        cpuidFlags = cpuid_get_flags();
405
        cpuidFlagsSet = 1;
406
    }
407
408
    #ifdef HAVE_INTEL_AVX2
409
    if (IS_INTEL_AVX2(cpuidFlags)) {
410
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
411
        chacha_encrypt_avx2(ctx, input, output, msglen);
412
        RESTORE_VECTOR_REGISTERS();
413
        return 0;
414
    }
415
    #endif
416
    if (IS_INTEL_AVX1(cpuidFlags)) {
417
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
418
        chacha_encrypt_avx1(ctx, input, output, msglen);
419
        RESTORE_VECTOR_REGISTERS();
420
        return 0;
421
    }
422
    else {
423
        chacha_encrypt_x64(ctx, input, output, msglen);
424
        return 0;
425
    }
426
#endif
427
0
    wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
428
429
0
    return 0;
430
0
}
431
432
0
void wc_Chacha_purge_current_block(ChaCha* ctx) {
433
0
    if (ctx->left > 0) {
434
0
        byte scratch[CHACHA_CHUNK_BYTES];
435
0
        XMEMSET(scratch, 0, sizeof(scratch));
436
0
        (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
437
0
    }
438
0
}
439
440
#endif /* HAVE_CHACHA */
441
442
#endif /* WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_NEON */