Coverage Report

Created: 2025-07-23 06:59

/src/wolfssl-sp-math/wolfcrypt/src/chacha.c
Line
Count
Source (jump to first uncovered line)
1
/* chacha.c
2
 *
3
 * Copyright (C) 2006-2025 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
/*
22
23
DESCRIPTION
24
This library contains implementation for the ChaCha20 stream cipher.
25
26
Based from chacha-ref.c version 20080118
27
D. J. Bernstein
28
Public domain.
29
30
*/
31
32
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
33
34
#ifdef HAVE_CHACHA
35
    #include <wolfssl/wolfcrypt/chacha.h>
36
37
    #ifdef NO_INLINE
38
        #include <wolfssl/wolfcrypt/misc.h>
39
    #else
40
        #define WOLFSSL_MISC_INCLUDED
41
        #include <wolfcrypt/src/misc.c>
42
    #endif
43
44
    #ifdef BIG_ENDIAN_ORDER
45
        #define LITTLE32(x) ByteReverseWord32(x)
46
    #else
47
27.5k
        #define LITTLE32(x) (x)
48
    #endif
49
50
    /* Number of rounds */
51
12.6k
    #define ROUNDS  20
52
53
4.27M
    #define U32C(v) (v##U)
54
4.27M
    #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
55
5.69k
    #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
56
57
4.06M
    #define ROTATE(v,c) rotlFixed(v, c)
58
    #define XOR(v,w)    ((v) ^ (w))
59
4.27M
    #define PLUS(v,w)   (U32V((v) + (w)))
60
8.14k
    #define PLUSONE(v)  (PLUS((v),1))
61
62
    #define QUARTERROUND(a,b,c,d) \
63
1.01M
        x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
64
1.01M
        x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
65
1.01M
        x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
66
1.01M
        x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
67
#endif /* HAVE_CHACHA */
68
69
70
#if defined(WOLFSSL_ARMASM) && !defined(NO_CHACHA_ASM)
71
    /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
72
73
#elif defined(WOLFSSL_RISCV_ASM) && !defined(NO_CHACHA_ASM)
74
    /* implementation located in wolfcrypt/src/port/riscv/riscv-64-chacha.c */
75
76
#else
77
78
/* BEGIN ChaCha C implementation */
79
#if defined(HAVE_CHACHA)
80
81
#include <wolfssl/wolfcrypt/cpuid.h>
82
83
#ifdef CHACHA_AEAD_TEST
84
    #include <stdio.h>
85
#endif
86
87
#ifdef USE_INTEL_CHACHA_SPEEDUP
88
    #include <emmintrin.h>
89
    #include <immintrin.h>
90
91
    #if defined(__GNUC__) && ((__GNUC__ < 4) || \
92
                              (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
93
        #undef  NO_AVX2_SUPPORT
94
        #define NO_AVX2_SUPPORT
95
    #endif
96
    #if defined(__clang__) && ((__clang_major__ < 3) || \
97
                               (__clang_major__ == 3 && __clang_minor__ <= 5))
98
        #undef  NO_AVX2_SUPPORT
99
        #define NO_AVX2_SUPPORT
100
    #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
101
        #undef NO_AVX2_SUPPORT
102
    #endif
103
    #if defined(_MSC_VER) && (_MSC_VER <= 1900)
104
        #undef  NO_AVX2_SUPPORT
105
        #define NO_AVX2_SUPPORT
106
    #endif
107
108
    #ifndef NO_AVX2_SUPPORT
109
        #define HAVE_INTEL_AVX2
110
    #endif
111
112
    static int cpuidFlagsSet = 0;
113
    static word32 cpuidFlags = 0;
114
#endif
115
116
/**
117
  * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
118
  * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
119
  */
120
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
121
7.30k
{
122
7.30k
    word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
123
124
125
7.30k
    if (ctx == NULL || inIv == NULL)
126
0
        return BAD_FUNC_ARG;
127
128
7.30k
    XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
129
130
7.30k
    ctx->left = 0; /* resets state */
131
7.30k
    ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;           /* block counter */
132
7.30k
    ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
133
7.30k
    ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
134
7.30k
    ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
135
136
7.30k
    return 0;
137
7.30k
}
138
139
/* "expand 32-byte k" as unsigned 32 byte */
140
static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
141
/* "expand 16-byte k" as unsigned 16 byte */
142
static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
143
144
/**
145
  * Key setup. 8 word iv (nonce)
146
  */
147
int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
148
712
{
149
712
    const word32* constants;
150
712
    const byte*   k;
151
152
#ifdef XSTREAM_ALIGN
153
    word32 alignKey[8];
154
#endif
155
156
712
    if (ctx == NULL || key == NULL)
157
0
        return BAD_FUNC_ARG;
158
159
712
    if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
160
0
        return BAD_FUNC_ARG;
161
162
#ifdef XSTREAM_ALIGN
163
    if ((wc_ptr_t)key % 4) {
164
        WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
165
        XMEMCPY(alignKey, key, keySz);
166
        k = (byte*)alignKey;
167
    }
168
    else {
169
        k = key;
170
    }
171
#else
172
712
    k = key;
173
712
#endif /* XSTREAM_ALIGN */
174
175
#ifdef CHACHA_AEAD_TEST
176
    word32 i;
177
    printf("ChaCha key used :\n");
178
    for (i = 0; i < keySz; i++) {
179
        printf("%02x", key[i]);
180
        if ((i + 1) % 8 == 0)
181
           printf("\n");
182
    }
183
    printf("\n\n");
184
#endif
185
186
712
    ctx->X[4] = U8TO32_LITTLE(k +  0);
187
712
    ctx->X[5] = U8TO32_LITTLE(k +  4);
188
712
    ctx->X[6] = U8TO32_LITTLE(k +  8);
189
712
    ctx->X[7] = U8TO32_LITTLE(k + 12);
190
712
    if (keySz == CHACHA_MAX_KEY_SZ) {
191
712
        k += 16;
192
712
        constants = sigma;
193
712
    }
194
0
    else {
195
0
        constants = tau;
196
0
    }
197
712
    ctx->X[ 8] = U8TO32_LITTLE(k +  0);
198
712
    ctx->X[ 9] = U8TO32_LITTLE(k +  4);
199
712
    ctx->X[10] = U8TO32_LITTLE(k +  8);
200
712
    ctx->X[11] = U8TO32_LITTLE(k + 12);
201
712
    ctx->X[ 0] = constants[0];
202
712
    ctx->X[ 1] = constants[1];
203
712
    ctx->X[ 2] = constants[2];
204
712
    ctx->X[ 3] = constants[3];
205
712
    ctx->left = 0; /* resets state */
206
207
712
    return 0;
208
712
}
209
210
#ifndef USE_INTEL_CHACHA_SPEEDUP
211
/**
212
  * Converts word into bytes with rotations having been done.
213
  */
214
static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS],
215
        word32 state[CHACHA_CHUNK_WORDS])
216
12.6k
{
217
12.6k
    word32 i;
218
219
12.6k
    XMEMCPY(x, state, CHACHA_CHUNK_BYTES);
220
221
139k
    for (i = (ROUNDS); i > 0; i -= 2) {
222
126k
        QUARTERROUND(0, 4,  8, 12)
223
126k
        QUARTERROUND(1, 5,  9, 13)
224
126k
        QUARTERROUND(2, 6, 10, 14)
225
126k
        QUARTERROUND(3, 7, 11, 15)
226
126k
        QUARTERROUND(0, 5, 10, 15)
227
126k
        QUARTERROUND(1, 6, 11, 12)
228
126k
        QUARTERROUND(2, 7,  8, 13)
229
126k
        QUARTERROUND(3, 4,  9, 14)
230
126k
    }
231
232
215k
    for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
233
203k
        x[i] = PLUS(x[i], state[i]);
234
#ifdef BIG_ENDIAN_ORDER
235
        x[i] = LITTLE32(x[i]);
236
#endif
237
203k
    }
238
12.6k
}
239
#endif /* !USE_INTEL_CHACHA_SPEEDUP */
240
241
#ifdef __cplusplus
242
    extern "C" {
243
#endif
244
245
extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
246
                               word32 bytes);
247
extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
248
                                word32 bytes);
249
extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
250
                                word32 bytes);
251
252
#ifdef __cplusplus
253
    }  /* extern "C" */
254
#endif
255
256
257
#ifndef USE_INTEL_CHACHA_SPEEDUP
258
/**
259
  * Encrypt a stream of bytes
260
  */
261
static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
262
                                    word32 bytes)
263
4.54k
{
264
4.54k
    union {
265
4.54k
        byte state[CHACHA_CHUNK_BYTES];
266
4.54k
        word32 state32[CHACHA_CHUNK_WORDS];
267
4.54k
        wolfssl_word align_word; /* align for xorbufout */
268
4.54k
    } tmp;
269
270
    /* handle left overs */
271
4.54k
    if (bytes > 0 && ctx->left > 0) {
272
0
        word32 processed = min(bytes, ctx->left);
273
0
        wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */
274
0
        xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed);
275
0
        ctx->left -= processed;
276
277
        /* Used up all of the stream that was left, increment the counter */
278
0
        if (ctx->left == 0) {
279
0
            ctx->X[CHACHA_MATRIX_CNT_IV] =
280
0
                                          PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
281
0
        }
282
0
        bytes -= processed;
283
0
        c += processed;
284
0
        m += processed;
285
0
    }
286
287
12.6k
    while (bytes >= CHACHA_CHUNK_BYTES) {
288
8.14k
        wc_Chacha_wordtobyte(tmp.state32, ctx->X);
289
8.14k
        ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
290
8.14k
        xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES);
291
8.14k
        bytes -= CHACHA_CHUNK_BYTES;
292
8.14k
        c += CHACHA_CHUNK_BYTES;
293
8.14k
        m += CHACHA_CHUNK_BYTES;
294
8.14k
    }
295
296
4.54k
    if (bytes) {
297
        /* in this case there will always be some left over since bytes is less
298
         * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
299
         * stream in order for the stream to be recreated on next call */
300
4.54k
        wc_Chacha_wordtobyte(tmp.state32, ctx->X);
301
4.54k
        xorbufout(c, m, tmp.state, bytes);
302
4.54k
        ctx->left = CHACHA_CHUNK_BYTES - bytes;
303
4.54k
    }
304
4.54k
}
305
#endif /* !USE_INTEL_CHACHA_SPEEDUP */
306
307
308
/**
309
  * API to encrypt/decrypt a message of any size.
310
  */
311
int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
312
                      word32 msglen)
313
4.54k
{
314
4.54k
    if (ctx == NULL || input == NULL || output == NULL)
315
0
        return BAD_FUNC_ARG;
316
317
#ifdef USE_INTEL_CHACHA_SPEEDUP
318
    /* handle left overs */
319
    if (msglen > 0 && ctx->left > 0) {
320
        byte*  out;
321
        word32 processed = min(msglen, ctx->left);
322
323
        out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
324
        xorbufout(output, input, out, processed);
325
        ctx->left -= processed;
326
        msglen -= processed;
327
        output += processed;
328
        input += processed;
329
    }
330
331
    if (msglen == 0) {
332
        return 0;
333
    }
334
335
    if (!cpuidFlagsSet) {
336
        cpuidFlags = cpuid_get_flags();
337
        cpuidFlagsSet = 1;
338
    }
339
340
    #ifdef HAVE_INTEL_AVX2
341
    if (IS_INTEL_AVX2(cpuidFlags)) {
342
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
343
        chacha_encrypt_avx2(ctx, input, output, msglen);
344
        RESTORE_VECTOR_REGISTERS();
345
        return 0;
346
    }
347
    #endif
348
    if (IS_INTEL_AVX1(cpuidFlags)) {
349
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
350
        chacha_encrypt_avx1(ctx, input, output, msglen);
351
        RESTORE_VECTOR_REGISTERS();
352
        return 0;
353
    }
354
    else {
355
        chacha_encrypt_x64(ctx, input, output, msglen);
356
        return 0;
357
    }
358
#else
359
4.54k
    wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
360
4.54k
    return 0;
361
4.54k
#endif
362
4.54k
}
363
364
#endif /* HAVE_CHACHA */
365
#endif /* END ChaCha C implementation */
366
367
#if defined(HAVE_CHACHA) && defined(HAVE_XCHACHA)
368
369
void wc_Chacha_purge_current_block(ChaCha* ctx)
370
0
{
371
0
    if (ctx->left > 0) {
372
0
        byte scratch[CHACHA_CHUNK_BYTES];
373
0
        XMEMSET(scratch, 0, sizeof(scratch));
374
0
        (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
375
0
    }
376
0
}
377
378
/*
379
 * wc_HChacha_block - half a ChaCha block, for XChaCha
380
 *
381
 * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
382
 */
383
static WC_INLINE void wc_HChacha_block(ChaCha* ctx,
384
    word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds)
385
0
{
386
0
    word32 x[CHACHA_CHUNK_WORDS];
387
0
    word32 i;
388
389
0
    for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
390
0
        x[i] = ctx->X[i];
391
0
    }
392
393
0
    for (i = nrounds; i > 0; i -= 2) {
394
0
        QUARTERROUND(0, 4,  8, 12)
395
0
        QUARTERROUND(1, 5,  9, 13)
396
0
        QUARTERROUND(2, 6, 10, 14)
397
0
        QUARTERROUND(3, 7, 11, 15)
398
0
        QUARTERROUND(0, 5, 10, 15)
399
0
        QUARTERROUND(1, 6, 11, 12)
400
0
        QUARTERROUND(2, 7,  8, 13)
401
0
        QUARTERROUND(3, 4,  9, 14)
402
0
    }
403
404
0
    for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
405
0
        stream[i] = x[i];
406
0
    for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
407
0
        stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
408
0
}
409
410
/* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
411
int wc_XChacha_SetKey(ChaCha *ctx,
412
                      const byte *key, word32 keySz,
413
                      const byte *nonce, word32 nonceSz,
414
                      word32 counter)
415
0
{
416
0
    int ret;
417
0
    word32 k[CHACHA_MAX_KEY_SZ];
418
0
    byte   iv[CHACHA_IV_BYTES];
419
420
0
    if (nonceSz != XCHACHA_NONCE_BYTES)
421
0
        return BAD_FUNC_ARG;
422
423
0
    if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
424
0
        return ret;
425
426
    /* form a first chacha IV from the first 16 bytes of the nonce.
427
     * the first word is supplied in the "counter" arg, and
428
     * the result is a full 128 bit nonceful IV for the one-time block
429
     * crypto op that follows.
430
     */
431
0
    if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
432
0
        return ret;
433
434
0
    wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
435
436
    /* the HChacha output is used as a 256 bit key for the main cipher. */
437
0
    XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
438
439
    /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
440
     * to form the IV for the main cipher.
441
     */
442
0
    XMEMSET(iv, 0, 4);
443
0
    XMEMCPY(iv + 4, nonce + 16, 8);
444
445
0
    if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
446
0
        return ret;
447
448
0
    ForceZero(k, sizeof k);
449
0
    ForceZero(iv, sizeof iv);
450
451
0
    return 0;
452
0
}
453
454
#endif /* HAVE_CHACHA && HAVE_XCHACHA */