Coverage Report

Created: 2026-04-05 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl-fastmath/wolfcrypt/src/poly1305.c
Line
Count
Source
1
/* poly1305.c
2
 *
3
 * Copyright (C) 2006-2026 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
/*
22
23
DESCRIPTION
24
This library contains implementation for the Poly1305 authenticator.
25
26
Based off the public domain implementations by Andrew Moon
27
and Daniel J. Bernstein
28
29
*/
30
31
32
/*
33
 * WOLFSSL_W64_WRAPPER Uses wrappers around word64 types for a system that does
34
 *                     not have word64 available. As expected it reduces
35
 *                     performance. Benchmarks collected July 2024 show
36
 *                     303.004 MiB/s with and 1874.194 MiB/s without.
37
 */
38
39
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
40
41
/*
42
 * Poly1305 Build Options:
43
 *
44
 * HAVE_POLY1305:            Enable Poly1305 authenticator          default: off
45
 * POLY130564:               Use 64-bit Poly1305 implementation    default: auto
46
 * USE_INTEL_POLY1305_SPEEDUP: Intel AVX/AVX2 Poly1305 accel      default: off
47
 */
48
49
#ifdef HAVE_POLY1305
50
#include <wolfssl/wolfcrypt/poly1305.h>
51
#include <wolfssl/wolfcrypt/cpuid.h>
52
#ifdef NO_INLINE
53
    #include <wolfssl/wolfcrypt/misc.h>
54
#else
55
    #define WOLFSSL_MISC_INCLUDED
56
    #include <wolfcrypt/src/misc.c>
57
#endif
58
#ifdef CHACHA_AEAD_TEST
59
    #include <stdio.h>
60
#endif
61
62
#ifdef _MSC_VER
63
    /* 4127 warning constant while(1)  */
64
    #pragma warning(disable: 4127)
65
#endif
66
67
#ifdef USE_INTEL_POLY1305_SPEEDUP
68
    #include <emmintrin.h>
69
    #include <immintrin.h>
70
71
    #if defined(__GNUC__) && ((__GNUC__ < 4) || \
72
                              (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
73
        #undef  NO_AVX2_SUPPORT
74
        #define NO_AVX2_SUPPORT
75
    #endif
76
    #if defined(__clang__) && ((__clang_major__ < 3) || \
77
                               (__clang_major__ == 3 && __clang_minor__ <= 5))
78
        #define NO_AVX2_SUPPORT
79
    #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
80
        #undef NO_AVX2_SUPPORT
81
    #endif
82
    #if defined(_MSC_VER) && (_MSC_VER <= 1900)
83
        #undef  NO_AVX2_SUPPORT
84
        #define NO_AVX2_SUPPORT
85
    #endif
86
87
    #define HAVE_INTEL_AVX1
88
    #ifndef NO_AVX2_SUPPORT
89
        #define HAVE_INTEL_AVX2
90
    #endif
91
#endif
92
93
#ifdef USE_INTEL_POLY1305_SPEEDUP
94
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
95
#endif
96
97
#if defined(USE_INTEL_POLY1305_SPEEDUP) || defined(POLY130564)
98
    #if defined(__WATCOMC__)
99
        #error "POLY130564 || USE_INTEL_POLY1305_SPEEDUP Watcom not supported"
100
    #elif defined(_MSC_VER)
101
        #define POLY1305_NOINLINE __declspec(noinline)
102
    #elif defined(__GNUC__)
103
        #define POLY1305_NOINLINE __attribute__((noinline))
104
    #else
105
        #define POLY1305_NOINLINE
106
    #endif
107
108
    #if defined(_MSC_VER) && !(__WATCOMC__)
109
        #include <intrin.h>
110
111
        typedef struct poly1305_word128 {
112
            word64 lo;
113
            word64 hi;
114
        } poly1305_word128;
115
116
        #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
117
        #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo; \
118
                               out.hi += (out.lo < t) + in.hi; }
119
        #define ADDLO(out, in) { word64 t = out.lo; out.lo += in; \
120
                                 out.hi += (out.lo < t); }
121
        #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
122
        #define LO(in) (in.lo)
123
124
    #elif defined(__GNUC__)
125
        #if defined(__SIZEOF_INT128__)
126
            PEDANTIC_EXTENSION typedef unsigned __int128 poly1305_word128;
127
        #else
128
            typedef unsigned poly1305_word128 __attribute__((mode(TI)));
129
        #endif
130
131
475k
        #define MUL(out, x, y) out = ((poly1305_word128)(x) * (y))
132
317k
        #define ADD(out, in) (out) += (in)
133
105k
        #define ADDLO(out, in) (out) += (in)
134
158k
        #define SHR(in, shift) (word64)((in) >> (shift))
135
158k
        #define LO(in) (word64)(in)
136
    #endif
137
#endif
138
139
#ifdef USE_INTEL_POLY1305_SPEEDUP
140
#ifdef __cplusplus
141
    extern "C" {
142
#endif
143
144
#ifdef HAVE_INTEL_AVX1
145
/* Process one block (16 bytes) of data.
146
 *
147
 * ctx  Poly1305 context.
148
 * m    One block of message data.
149
 */
150
WOLFSSL_LOCAL void poly1305_block_avx(Poly1305* ctx, const unsigned char *m);
151
/* Process multiple blocks (n * 16 bytes) of data.
152
 *
153
 * ctx    Poly1305 context.
154
 * m      Blocks of message data.
155
 * bytes  The number of bytes to process.
156
 */
157
WOLFSSL_LOCAL void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m,
158
                                size_t bytes);
159
/* Set the key to use when processing data.
160
 * Initialize the context.
161
 *
162
 * ctx  Poly1305 context.
163
 * key  The key data (16 bytes).
164
 */
165
WOLFSSL_LOCAL void poly1305_setkey_avx(Poly1305* ctx, const byte* key);
166
/* Calculate the final result - authentication data.
167
 * Zeros out the private data in the context.
168
 *
169
 * ctx  Poly1305 context.
170
 * mac  Buffer to hold 16 bytes.
171
 */
172
WOLFSSL_LOCAL void poly1305_final_avx(Poly1305* ctx, byte* mac);
173
#endif
174
175
#ifdef HAVE_INTEL_AVX2
176
/* Process multiple blocks (n * 16 bytes) of data.
177
 *
178
 * ctx    Poly1305 context.
179
 * m      Blocks of message data.
180
 * bytes  The number of bytes to process.
181
 */
182
WOLFSSL_LOCAL void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m,
183
                                 size_t bytes);
184
/* Calculate R^1, R^2, R^3 and R^4 and store them in the context.
185
 *
186
 * ctx    Poly1305 context.
187
 */
188
WOLFSSL_LOCAL void poly1305_calc_powers_avx2(Poly1305* ctx);
189
/* Set the key to use when processing data.
190
 * Initialize the context.
191
 * Calls AVX set key function as final function calls AVX code.
192
 *
193
 * ctx  Poly1305 context.
194
 * key  The key data (16 bytes).
195
 */
196
WOLFSSL_LOCAL void poly1305_setkey_avx2(Poly1305* ctx, const byte* key);
197
/* Calculate the final result - authentication data.
198
 * Zeros out the private data in the context.
199
 * Calls AVX final function to quickly process last blocks.
200
 *
201
 * ctx  Poly1305 context.
202
 * mac  Buffer to hold 16 bytes - authentication data.
203
 */
204
WOLFSSL_LOCAL void poly1305_final_avx2(Poly1305* ctx, byte* mac);
205
#endif
206
207
#ifdef __cplusplus
208
    }  /* extern "C" */
209
#endif
210
211
#elif defined(POLY130564)
212
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
213
    static word64 U8TO64(const byte* p)
214
118k
    {
215
118k
        return
216
118k
            (((word64)(p[0] & 0xff)      ) |
217
118k
             ((word64)(p[1] & 0xff) <<  8) |
218
118k
             ((word64)(p[2] & 0xff) << 16) |
219
118k
             ((word64)(p[3] & 0xff) << 24) |
220
118k
             ((word64)(p[4] & 0xff) << 32) |
221
118k
             ((word64)(p[5] & 0xff) << 40) |
222
118k
             ((word64)(p[6] & 0xff) << 48) |
223
118k
             ((word64)(p[7] & 0xff) << 56));
224
118k
    }
225
226
6.60k
    static void U64TO8(byte* p, word64 v) {
227
6.60k
        p[0] = (byte)v;
228
6.60k
        p[1] = (byte)(v >>  8);
229
6.60k
        p[2] = (byte)(v >> 16);
230
6.60k
        p[3] = (byte)(v >> 24);
231
6.60k
        p[4] = (byte)(v >> 32);
232
6.60k
        p[5] = (byte)(v >> 40);
233
6.60k
        p[6] = (byte)(v >> 48);
234
6.60k
        p[7] = (byte)(v >> 56);
235
6.60k
    }
236
#endif/* !WOLFSSL_ARMASM && !WOLFSSL_RISCV_ASM */
237
/* if not 64 bit then use 32 bit */
238
#elif !defined(WOLFSSL_ARMASM)
239
240
    static word32 U8TO32(const byte *p)
241
    {
242
        return
243
            (((word32)(p[0] & 0xff)      ) |
244
             ((word32)(p[1] & 0xff) <<  8) |
245
             ((word32)(p[2] & 0xff) << 16) |
246
             ((word32)(p[3] & 0xff) << 24));
247
    }
248
249
    static void U32TO8(byte *p, word32 v) {
250
        p[0] = (byte)((v      ) & 0xff);
251
        p[1] = (byte)((v >>  8) & 0xff);
252
        p[2] = (byte)((v >> 16) & 0xff);
253
        p[3] = (byte)((v >> 24) & 0xff);
254
    }
255
#endif
256
257
/* convert 32-bit unsigned to little endian 64 bit type as byte array */
258
static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8])
259
6.57k
{
260
#ifndef WOLFSSL_X86_64_BUILD
261
    outLe64[0] = (byte)(inLe32  & 0x000000FF);
262
    outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8);
263
    outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16);
264
    outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24);
265
    outLe64[4] = 0;
266
    outLe64[5] = 0;
267
    outLe64[6] = 0;
268
    outLe64[7] = 0;
269
#else
270
6.57k
    *(word64*)outLe64 = inLe32;
271
6.57k
#endif
272
6.57k
}
273
274
275
#if !defined(WOLFSSL_RISCV_ASM)
276
/*
277
This local function operates on a message with a given number of bytes
278
with a given ctx pointer to a Poly1305 structure.
279
*/
280
static int poly1305_blocks(Poly1305* ctx, const unsigned char *m,
281
                     size_t bytes)
282
10.6k
{
283
#ifdef USE_INTEL_POLY1305_SPEEDUP
284
    /* AVX2 is handled in wc_Poly1305Update. */
285
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
286
    poly1305_blocks_avx(ctx, m, bytes);
287
    RESTORE_VECTOR_REGISTERS();
288
    return 0;
289
#elif defined(WOLFSSL_ARMASM) && defined(__aarch64__)
290
    poly1305_arm64_blocks(ctx, m, bytes);
291
    return 0;
292
#elif defined(WOLFSSL_ARMASM) && defined(WOLFSSL_ARMASM_THUMB2)
293
    poly1305_blocks_thumb2_16(ctx, m, bytes, 1);
294
    return 0;
295
#elif defined(WOLFSSL_ARMASM)
296
#ifndef WOLFSSL_ARMASM_NO_NEON
297
    poly1305_arm32_blocks(ctx, m, bytes);
298
    return 0;
299
#else
300
    poly1305_arm32_blocks_16(ctx, m, bytes, 1);
301
    return 0;
302
#endif
303
#elif defined(POLY130564)
304
10.6k
    const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */
305
10.6k
    word64 r0,r1,r2;
306
10.6k
    word64 s1,s2;
307
10.6k
    word64 h0,h1,h2;
308
10.6k
    word64 c;
309
10.6k
    poly1305_word128 d0,d1,d2,d;
310
311
10.6k
    r0 = ctx->r[0];
312
10.6k
    r1 = ctx->r[1];
313
10.6k
    r2 = ctx->r[2];
314
315
10.6k
    h0 = ctx->h[0];
316
10.6k
    h1 = ctx->h[1];
317
10.6k
    h2 = ctx->h[2];
318
319
10.6k
    s1 = r1 * (5 << 2);
320
10.6k
    s2 = r2 * (5 << 2);
321
322
63.4k
    while (bytes >= POLY1305_BLOCK_SIZE) {
323
52.8k
        word64 t0,t1;
324
325
        /* h += m[i] */
326
52.8k
        t0 = U8TO64(&m[0]);
327
52.8k
        t1 = U8TO64(&m[8]);
328
329
52.8k
        h0 += (( t0                    ) & 0xfffffffffff);
330
52.8k
        h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
331
52.8k
        h2 += (((t1 >> 24)             ) & 0x3ffffffffff) | hibit;
332
333
        /* h *= r */
334
52.8k
        MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
335
52.8k
        MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
336
52.8k
        MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
337
338
        /* (partial) h %= p */
339
52.8k
                      c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
340
52.8k
        ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
341
52.8k
        ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
342
52.8k
        h0  += c * 5; c = (h0 >> 44);  h0 =    h0  & 0xfffffffffff;
343
52.8k
        h1  += c;
344
345
52.8k
        m += POLY1305_BLOCK_SIZE;
346
52.8k
        bytes -= POLY1305_BLOCK_SIZE;
347
52.8k
    }
348
349
10.6k
    ctx->h[0] = h0;
350
10.6k
    ctx->h[1] = h1;
351
10.6k
    ctx->h[2] = h2;
352
353
10.6k
    return 0;
354
355
#else /* if not 64 bit then use 32 bit */
356
    const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */
357
    word32 r0,r1,r2,r3,r4;
358
    word32 s1,s2,s3,s4;
359
    word32 h0,h1,h2,h3,h4;
360
    word32 c;
361
#ifdef WOLFSSL_W64_WRAPPER
362
    #ifdef WOLFSSL_SMALL_STACK
363
    w64wrapper* d;
364
365
    d = (w64wrapper*)XMALLOC(5 * sizeof(w64wrapper), NULL,
366
        DYNAMIC_TYPE_TMP_BUFFER);
367
    if (d == NULL) {
368
        return MEMORY_E;
369
    }
370
    #else
371
    w64wrapper d[5];
372
    #endif
373
#else
374
    word64 d0,d1,d2,d3,d4;
375
#endif
376
377
378
    r0 = ctx->r[0];
379
    r1 = ctx->r[1];
380
    r2 = ctx->r[2];
381
    r3 = ctx->r[3];
382
    r4 = ctx->r[4];
383
384
    s1 = r1 * 5;
385
    s2 = r2 * 5;
386
    s3 = r3 * 5;
387
    s4 = r4 * 5;
388
389
    h0 = ctx->h[0];
390
    h1 = ctx->h[1];
391
    h2 = ctx->h[2];
392
    h3 = ctx->h[3];
393
    h4 = ctx->h[4];
394
395
    while (bytes >= POLY1305_BLOCK_SIZE) {
396
        /* h += m[i] */
397
        h0 += (U8TO32(m+ 0)     ) & 0x3ffffff;
398
        h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
399
        h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
400
        h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
401
        h4 += (U8TO32(m+12) >> 8) | hibit;
402
403
        /* h *= r */
404
#ifdef WOLFSSL_W64_WRAPPER
405
        {
406
            w64wrapper tmp;
407
408
            d[0] = w64Mul(h0, r0); tmp = w64Mul(h1, s4);
409
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h2, s3);
410
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h3, s2);
411
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h4, s1);
412
            d[0] = w64Add(d[0], tmp, NULL);
413
414
            d[1] = w64Mul(h0, r1); tmp = w64Mul(h1, r0);
415
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h2, s4);
416
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h3, s3);
417
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h4, s2);
418
            d[1] = w64Add(d[1], tmp, NULL);
419
420
            d[2] = w64Mul(h0, r2); tmp = w64Mul(h1, r1);
421
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h2, r0);
422
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h3, s4);
423
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h4, s3);
424
            d[2] = w64Add(d[2], tmp, NULL);
425
426
            d[3] = w64Mul(h0, r3); tmp = w64Mul(h1, r2);
427
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h2, r1);
428
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h3, r0);
429
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h4, s4);
430
            d[3] = w64Add(d[3], tmp, NULL);
431
432
            d[4] = w64Mul(h0, r4); tmp = w64Mul(h1, r3);
433
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h2, r2);
434
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h3, r1);
435
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h4, r0);
436
            d[4] = w64Add(d[4], tmp, NULL);
437
        }
438
#else
439
        d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
440
             ((word64)h3 * s2) + ((word64)h4 * s1);
441
        d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
442
             ((word64)h3 * s3) + ((word64)h4 * s2);
443
        d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) +
444
             ((word64)h3 * s4) + ((word64)h4 * s3);
445
        d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) +
446
             ((word64)h3 * r0) + ((word64)h4 * s4);
447
        d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
448
             ((word64)h3 * r1) + ((word64)h4 * r0);
449
#endif
450
451
        /* (partial) h %= p */
452
#ifdef WOLFSSL_W64_WRAPPER
453
        c = w64GetLow32(w64ShiftRight(d[0], 26));
454
        h0 = w64GetLow32(d[0]) & 0x3ffffff;
455
        d[1] = w64Add32(d[1], c, NULL);
456
        c = w64GetLow32(w64ShiftRight(d[1], 26));
457
        h1 = w64GetLow32(d[1]) & 0x3ffffff;
458
        d[2] = w64Add32(d[2], c, NULL);
459
        c = w64GetLow32(w64ShiftRight(d[2], 26));
460
        h2 = w64GetLow32(d[2]) & 0x3ffffff;
461
        d[3] = w64Add32(d[3], c, NULL);
462
        c = w64GetLow32(w64ShiftRight(d[3], 26));
463
        h3 = w64GetLow32(d[3]) & 0x3ffffff;
464
        d[4] = w64Add32(d[4], c, NULL);
465
        c = w64GetLow32(w64ShiftRight(d[4], 26));
466
        h4 = w64GetLow32(d[4]) & 0x3ffffff;
467
#else
468
                      c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
469
        d1 += c;      c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
470
        d2 += c;      c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
471
        d3 += c;      c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
472
        d4 += c;      c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
473
#endif
474
        h0 += c * 5;  c =  (h0 >> 26); h0 =                h0 & 0x3ffffff;
475
        h1 += c;
476
477
        m += POLY1305_BLOCK_SIZE;
478
        bytes -= POLY1305_BLOCK_SIZE;
479
    }
480
481
    ctx->h[0] = h0;
482
    ctx->h[1] = h1;
483
    ctx->h[2] = h2;
484
    ctx->h[3] = h3;
485
    ctx->h[4] = h4;
486
487
#if defined(WOLFSSL_W64_WRAPPER) && defined(WOLFSSL_SMALL_STACK)
488
    XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
489
#endif
490
491
    return 0;
492
493
#endif /* end of 64 bit cpu blocks or 32 bit cpu */
494
10.6k
}
495
496
/*
497
This local function is used for the last call when a message with a given
498
number of bytes is less than the block size.
499
*/
500
static WC_INLINE int poly1305_block(Poly1305* ctx, const unsigned char *m)
501
6.39k
{
502
#ifdef USE_INTEL_POLY1305_SPEEDUP
503
    /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
504
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
505
    poly1305_block_avx(ctx, m);
506
    RESTORE_VECTOR_REGISTERS();
507
    return 0;
508
#elif defined(WOLFSSL_ARMASM) && defined(WOLFSSL_ARMASM_THUMB2)
509
    poly1305_blocks_thumb2_16(ctx, m, POLY1305_BLOCK_SIZE, !ctx->finished);
510
    return 0;
511
#elif defined(WOLFSSL_ARMASM) && !defined(__aarch64__)
512
    poly1305_arm32_blocks_16(ctx, m, POLY1305_BLOCK_SIZE, !ctx->finished);
513
    return 0;
514
#elif defined(WOLFSSL_ARMASM)
515
    /* Only called from finished. */
516
    poly1305_arm64_block_16(ctx, m);
517
    return 0;
518
#else
519
6.39k
    return poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
520
6.39k
#endif
521
6.39k
}
522
523
int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
524
3.30k
{
525
3.30k
#if defined(POLY130564) && !defined(USE_INTEL_POLY1305_SPEEDUP) && \
526
3.30k
    !defined(WOLFSSL_ARMASM)
527
3.30k
    word64 t0,t1;
528
3.30k
#endif
529
530
3.30k
    if (key == NULL)
531
0
        return BAD_FUNC_ARG;
532
533
#ifdef CHACHA_AEAD_TEST
534
    word32 k;
535
    printf("Poly key used:\n");
536
    for (k = 0; k < keySz; k++) {
537
        printf("%02x", key[k]);
538
        if ((k+1) % 8 == 0)
539
            printf("\n");
540
    }
541
    printf("\n");
542
#endif
543
544
3.30k
    if ((ctx == NULL) || (key == NULL) || (keySz != 32)) {
545
0
        return BAD_FUNC_ARG;
546
0
    }
547
548
#ifdef USE_INTEL_POLY1305_SPEEDUP
549
    cpuid_get_flags_ex(&intel_flags);
550
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
551
    #ifdef HAVE_INTEL_AVX2
552
    if (IS_INTEL_AVX2(intel_flags))
553
        poly1305_setkey_avx2(ctx, key);
554
    else
555
    #endif
556
        poly1305_setkey_avx(ctx, key);
557
    RESTORE_VECTOR_REGISTERS();
558
    ctx->started = 0;
559
#elif defined(WOLFSSL_ARMASM)
560
    poly1305_set_key(ctx, key);
561
    ctx->finished = 0;
562
#elif defined(POLY130564)
563
564
    /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
565
3.30k
    t0 = U8TO64(key + 0);
566
3.30k
    t1 = U8TO64(key + 8);
567
568
3.30k
    ctx->r[0] = ( t0                    ) & 0xffc0fffffff;
569
3.30k
    ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
570
3.30k
    ctx->r[2] = ((t1 >> 24)             ) & 0x00ffffffc0f;
571
572
    /* h (accumulator) = 0 */
573
3.30k
    ctx->h[0] = 0;
574
3.30k
    ctx->h[1] = 0;
575
3.30k
    ctx->h[2] = 0;
576
577
    /* save pad for later */
578
3.30k
    ctx->pad[0] = U8TO64(key + 16);
579
3.30k
    ctx->pad[1] = U8TO64(key + 24);
580
581
3.30k
    ctx->leftover = 0;
582
3.30k
    ctx->finished = 0;
583
584
#else /* if not 64 bit then use 32 bit */
585
586
    /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
587
    ctx->r[0] = (U8TO32(key +  0)     ) & 0x3ffffff;
588
    ctx->r[1] = (U8TO32(key +  3) >> 2) & 0x3ffff03;
589
    ctx->r[2] = (U8TO32(key +  6) >> 4) & 0x3ffc0ff;
590
    ctx->r[3] = (U8TO32(key +  9) >> 6) & 0x3f03fff;
591
    ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff;
592
593
    /* h = 0 */
594
    ctx->h[0] = 0;
595
    ctx->h[1] = 0;
596
    ctx->h[2] = 0;
597
    ctx->h[3] = 0;
598
    ctx->h[4] = 0;
599
600
    /* save pad for later */
601
    ctx->pad[0] = U8TO32(key + 16);
602
    ctx->pad[1] = U8TO32(key + 20);
603
    ctx->pad[2] = U8TO32(key + 24);
604
    ctx->pad[3] = U8TO32(key + 28);
605
606
    ctx->leftover = 0;
607
    ctx->finished = 0;
608
609
#endif
610
611
3.30k
    return 0;
612
3.30k
}
613
614
int wc_Poly1305Final(Poly1305* ctx, byte* mac)
615
3.30k
{
616
#ifdef USE_INTEL_POLY1305_SPEEDUP
617
#elif defined(WOLFSSL_ARMASM)
618
#elif defined(POLY130564)
619
620
3.30k
    word64 h0,h1,h2,c;
621
3.30k
    word64 g0,g1,g2;
622
3.30k
    word64 t0,t1;
623
624
#else
625
626
    word32 h0,h1,h2,h3,h4,c;
627
    word32 g0,g1,g2,g3,g4;
628
#ifdef WOLFSSL_W64_WRAPPER
629
    w64wrapper f;
630
#else
631
    word64 f;
632
#endif
633
    word32 mask;
634
635
#endif
636
637
3.30k
    if (ctx == NULL || mac == NULL)
638
0
        return BAD_FUNC_ARG;
639
640
#ifdef USE_INTEL_POLY1305_SPEEDUP
641
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
642
    #ifdef HAVE_INTEL_AVX2
643
    if (IS_INTEL_AVX2(intel_flags))
644
        poly1305_final_avx2(ctx, mac);
645
    else
646
    #endif
647
        poly1305_final_avx(ctx, mac);
648
    RESTORE_VECTOR_REGISTERS();
649
#elif defined(WOLFSSL_ARMASM)
650
    #if !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) && \
651
        !defined(__aarch64__)
652
        if (ctx->leftover >= POLY1305_BLOCK_SIZE) {
653
             size_t len = ctx->leftover & (~(POLY1305_BLOCK_SIZE - 1));
654
             poly1305_arm32_blocks(ctx, ctx->buffer, len);
655
             ctx->leftover -= len;
656
             if (ctx->leftover) {
657
                 XMEMCPY(ctx->buffer, ctx->buffer + len, ctx->leftover);
658
             }
659
        }
660
    #endif
661
        if (ctx->leftover) {
662
             size_t i = ctx->leftover;
663
             ctx->buffer[i++] = 1;
664
             for (; i < POLY1305_BLOCK_SIZE; i++) {
665
                 ctx->buffer[i] = 0;
666
             }
667
            ctx->finished = 1;
668
            poly1305_block(ctx, ctx->buffer);
669
        }
670
671
        poly1305_final(ctx, mac);
672
#elif defined(POLY130564)
673
674
    /* process the remaining block */
675
3.30k
    if (ctx->leftover) {
676
10
        size_t i = ctx->leftover;
677
10
        ctx->buffer[i] = 1;
678
56
        for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
679
46
            ctx->buffer[i] = 0;
680
10
        ctx->finished = 1;
681
10
        poly1305_block(ctx, ctx->buffer);
682
10
    }
683
684
    /* fully carry h */
685
3.30k
    h0 = ctx->h[0];
686
3.30k
    h1 = ctx->h[1];
687
3.30k
    h2 = ctx->h[2];
688
689
3.30k
                 c = (h1 >> 44); h1 &= 0xfffffffffff;
690
3.30k
    h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
691
3.30k
    h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
692
3.30k
    h1 += c;     c = (h1 >> 44); h1 &= 0xfffffffffff;
693
3.30k
    h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
694
3.30k
    h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
695
3.30k
    h1 += c;
696
697
    /* compute h + -p */
698
3.30k
    g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
699
3.30k
    g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
700
3.30k
    g2 = h2 + c - ((word64)1 << 42);
701
702
    /* select h if h < p, or h + -p if h >= p */
703
3.30k
    c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1;
704
3.30k
    g0 &= c;
705
3.30k
    g1 &= c;
706
3.30k
    g2 &= c;
707
3.30k
    c = ~c;
708
3.30k
    h0 = (h0 & c) | g0;
709
3.30k
    h1 = (h1 & c) | g1;
710
3.30k
    h2 = (h2 & c) | g2;
711
712
    /* h = (h + pad) */
713
3.30k
    t0 = ctx->pad[0];
714
3.30k
    t1 = ctx->pad[1];
715
716
3.30k
    h0 += (( t0                    ) & 0xfffffffffff)    ;
717
3.30k
    c = (h0 >> 44); h0 &= 0xfffffffffff;
718
3.30k
    h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
719
3.30k
    c = (h1 >> 44); h1 &= 0xfffffffffff;
720
3.30k
    h2 += (((t1 >> 24)             ) & 0x3ffffffffff) + c;
721
3.30k
    h2 &= 0x3ffffffffff;
722
723
    /* mac = h % (2^128) */
724
3.30k
    h0 = ((h0      ) | (h1 << 44));
725
3.30k
    h1 = ((h1 >> 20) | (h2 << 24));
726
727
3.30k
    U64TO8(mac + 0, h0);
728
3.30k
    U64TO8(mac + 8, h1);
729
730
    /* zero out the state */
731
3.30k
    ctx->h[0] = 0;
732
3.30k
    ctx->h[1] = 0;
733
3.30k
    ctx->h[2] = 0;
734
3.30k
    ctx->r[0] = 0;
735
3.30k
    ctx->r[1] = 0;
736
3.30k
    ctx->r[2] = 0;
737
3.30k
    ctx->pad[0] = 0;
738
3.30k
    ctx->pad[1] = 0;
739
740
#else /* if not 64 bit then use 32 bit */
741
742
    /* process the remaining block */
743
    if (ctx->leftover) {
744
        size_t i = ctx->leftover;
745
        ctx->buffer[i++] = 1;
746
        for (; i < POLY1305_BLOCK_SIZE; i++)
747
            ctx->buffer[i] = 0;
748
        ctx->finished = 1;
749
        poly1305_block(ctx, ctx->buffer);
750
    }
751
752
    /* fully carry h */
753
    h0 = ctx->h[0];
754
    h1 = ctx->h[1];
755
    h2 = ctx->h[2];
756
    h3 = ctx->h[3];
757
    h4 = ctx->h[4];
758
759
                 c = h1 >> 26; h1 = h1 & 0x3ffffff;
760
    h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
761
    h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
762
    h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
763
    h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
764
    h1 +=     c;
765
766
    /* compute h + -p */
767
    g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
768
    g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
769
    g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
770
    g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
771
    g4 = h4 + c - ((word32)1 << 26);
772
773
    /* select h if h < p, or h + -p if h >= p */
774
    mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1;
775
    g0 &= mask;
776
    g1 &= mask;
777
    g2 &= mask;
778
    g3 &= mask;
779
    g4 &= mask;
780
    mask = ~mask;
781
    h0 = (h0 & mask) | g0;
782
    h1 = (h1 & mask) | g1;
783
    h2 = (h2 & mask) | g2;
784
    h3 = (h3 & mask) | g3;
785
    h4 = (h4 & mask) | g4;
786
787
    /* h = h % (2^128) */
788
    h0 = ((h0      ) | (h1 << 26)) & 0xffffffff;
789
    h1 = ((h1 >>  6) | (h2 << 20)) & 0xffffffff;
790
    h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
791
    h3 = ((h3 >> 18) | (h4 <<  8)) & 0xffffffff;
792
793
    /* mac = (h + pad) % (2^128) */
794
#ifdef WOLFSSL_W64_WRAPPER
795
    f = w64From32(0, h0);
796
    f = w64Add32(f, ctx->pad[0], NULL);
797
    h0 = w64GetLow32(f);
798
799
    f = w64ShiftRight(f, 32);
800
    f = w64Add32(f, h1, NULL);
801
    f = w64Add32(f, ctx->pad[1], NULL);
802
    h1 = w64GetLow32(f);
803
804
    f = w64ShiftRight(f, 32);
805
    f = w64Add32(f, h2, NULL);
806
    f = w64Add32(f, ctx->pad[2], NULL);
807
    h2 = w64GetLow32(f);
808
809
    f = w64ShiftRight(f, 32);
810
    f = w64Add32(f, h3, NULL);
811
    f = w64Add32(f, ctx->pad[3], NULL);
812
    h3 = w64GetLow32(f);
813
#else
814
    f = (word64)h0 + ctx->pad[0]            ; h0 = (word32)f;
815
    f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
816
    f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
817
    f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
818
#endif
819
820
    U32TO8(mac + 0, h0);
821
    U32TO8(mac + 4, h1);
822
    U32TO8(mac + 8, h2);
823
    U32TO8(mac + 12, h3);
824
825
    /* zero out the state */
826
    ctx->h[0] = 0;
827
    ctx->h[1] = 0;
828
    ctx->h[2] = 0;
829
    ctx->h[3] = 0;
830
    ctx->h[4] = 0;
831
    ctx->r[0] = 0;
832
    ctx->r[1] = 0;
833
    ctx->r[2] = 0;
834
    ctx->r[3] = 0;
835
    ctx->r[4] = 0;
836
    ctx->pad[0] = 0;
837
    ctx->pad[1] = 0;
838
    ctx->pad[2] = 0;
839
    ctx->pad[3] = 0;
840
841
#endif
842
843
3.30k
    return 0;
844
3.30k
}
845
#endif /* !WOLFSSL_RISCV_ASM */
846
847
848
int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
849
16.2k
{
850
16.2k
    size_t i;
851
852
16.2k
    if (ctx == NULL || (m == NULL && bytes > 0))
853
0
        return BAD_FUNC_ARG;
854
855
16.2k
    if (bytes == 0) {
856
        /* valid, but do nothing */
857
5
        return 0;
858
5
    }
859
#ifdef CHACHA_AEAD_TEST
860
    word32 k;
861
    printf("Raw input to poly:\n");
862
    for (k = 0; k < bytes; k++) {
863
        printf("%02x", m[k]);
864
        if ((k+1) % 16 == 0)
865
            printf("\n");
866
    }
867
    printf("\n");
868
#endif
869
870
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_THUMB2) && \
871
    !defined(WOLFSSL_ARMASM_NO_NEON)
872
    /* handle leftover */
873
    if (ctx->leftover) {
874
        size_t want = sizeof(ctx->buffer) - ctx->leftover;
875
        if (want > bytes)
876
            want = bytes;
877
878
        for (i = 0; i < want; i++)
879
            ctx->buffer[ctx->leftover + i] = m[i];
880
        bytes -= (word32)want;
881
        m += want;
882
        ctx->leftover += want;
883
        if (ctx->leftover < sizeof(ctx->buffer)) {
884
            return 0;
885
        }
886
887
        poly1305_blocks(ctx, ctx->buffer, sizeof(ctx->buffer));
888
        ctx->leftover = 0;
889
    }
890
891
    /* process full blocks */
892
    if (bytes >= sizeof(ctx->buffer)) {
893
        size_t want = bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1);
894
895
        poly1305_blocks(ctx, m, want);
896
        m += want;
897
        bytes -= (word32)want;
898
    }
899
900
    /* store leftover */
901
    if (bytes) {
902
        for (i = 0; i < bytes; i++)
903
            ctx->buffer[ctx->leftover + i] = m[i];
904
        ctx->leftover += bytes;
905
    }
906
#else
907
#ifdef USE_INTEL_POLY1305_SPEEDUP
908
    #ifdef HAVE_INTEL_AVX2
909
    if (IS_INTEL_AVX2(intel_flags)) {
910
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
911
912
        /* handle leftover */
913
        if (ctx->leftover) {
914
            size_t want = sizeof(ctx->buffer) - ctx->leftover;
915
            if (want > bytes)
916
                want = bytes;
917
918
            for (i = 0; i < want; i++)
919
                ctx->buffer[ctx->leftover + i] = m[i];
920
            bytes -= (word32)want;
921
            m += want;
922
            ctx->leftover += want;
923
            if (ctx->leftover < sizeof(ctx->buffer)) {
924
                RESTORE_VECTOR_REGISTERS();
925
                return 0;
926
            }
927
928
            if (!ctx->started) {
929
                poly1305_calc_powers_avx2(ctx);
930
                ctx->started = 1;
931
            }
932
            poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
933
            ctx->leftover = 0;
934
        }
935
936
        /* process full blocks */
937
        if (bytes >= sizeof(ctx->buffer)) {
938
            size_t want = bytes & ~(sizeof(ctx->buffer) - 1);
939
940
            if (!ctx->started) {
941
                poly1305_calc_powers_avx2(ctx);
942
                ctx->started = 1;
943
            }
944
            poly1305_blocks_avx2(ctx, m, want);
945
            m += want;
946
            bytes -= (word32)want;
947
        }
948
949
        /* store leftover */
950
        if (bytes) {
951
            for (i = 0; i < bytes; i++)
952
                ctx->buffer[ctx->leftover + i] = m[i];
953
            ctx->leftover += bytes;
954
        }
955
        RESTORE_VECTOR_REGISTERS();
956
    }
957
    else
958
    #endif
959
#endif
960
16.2k
    {
961
        /* handle leftover */
962
16.2k
        if (ctx->leftover) {
963
6.39k
            size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover);
964
6.39k
            if (want > bytes)
965
12
                want = bytes;
966
50.8k
            for (i = 0; i < want; i++)
967
44.4k
                ctx->buffer[ctx->leftover + i] = m[i];
968
6.39k
            bytes -= (word32)want;
969
6.39k
            m += want;
970
6.39k
            ctx->leftover += want;
971
6.39k
            if (ctx->leftover < POLY1305_BLOCK_SIZE)
972
12
                return 0;
973
6.38k
            poly1305_block(ctx, ctx->buffer);
974
6.38k
            ctx->leftover = 0;
975
6.38k
        }
976
977
        /* process full blocks */
978
16.2k
        if (bytes >= POLY1305_BLOCK_SIZE) {
979
4.20k
            size_t want = ((size_t)bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1));
980
4.20k
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
981
4.20k
            int ret;
982
4.20k
            ret = poly1305_blocks(ctx, m, want);
983
4.20k
            if (ret != 0)
984
0
                return ret;
985
#else
986
            poly1305_blocks(ctx, m, want);
987
#endif
988
4.20k
            m += want;
989
4.20k
            bytes -= (word32)want;
990
4.20k
        }
991
992
        /* store leftover */
993
16.2k
        if (bytes) {
994
64.2k
            for (i = 0; i < bytes; i++)
995
57.8k
                ctx->buffer[ctx->leftover + i] = m[i];
996
6.39k
            ctx->leftover += bytes;
997
6.39k
        }
998
16.2k
    }
999
0
#endif
1000
1001
0
    return 0;
1002
16.2k
}
1003
1004
/*  Takes a Poly1305 struct that has a key loaded and pads the provided length
1005
    ctx        : Initialized Poly1305 struct to use
1006
    lenToPad   : Current number of bytes updated that needs padding to 16
1007
 */
1008
int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad)
1009
6.57k
{
1010
6.57k
    int ret = 0;
1011
6.57k
    word32 paddingLen;
1012
6.57k
    byte padding[WC_POLY1305_PAD_SZ - 1];
1013
1014
6.57k
    if (ctx == NULL) {
1015
0
        return BAD_FUNC_ARG;
1016
0
    }
1017
6.57k
    if (lenToPad == 0) {
1018
5
        return 0; /* nothing needs to be done */
1019
5
    }
1020
1021
6.57k
    XMEMSET(padding, 0, sizeof(padding));
1022
1023
    /* Pad length to 16 bytes */
1024
6.57k
    paddingLen = (-(int)lenToPad) & (WC_POLY1305_PAD_SZ - 1);
1025
6.57k
    if ((paddingLen > 0) && (paddingLen < WC_POLY1305_PAD_SZ)) {
1026
6.36k
        ret = wc_Poly1305Update(ctx, padding, paddingLen);
1027
6.36k
    }
1028
6.57k
    return ret;
1029
6.57k
}
1030
1031
/*  Takes a Poly1305 struct that has a key loaded and adds the AEAD length
1032
    encoding in 64-bit little endian
1033
    aadSz      : Size of the additional authentication data
1034
    dataSz     : Size of the plaintext or ciphertext
1035
 */
1036
int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz)
1037
3.28k
{
1038
3.28k
    int ret;
1039
3.28k
    byte little64[16]; /* sizeof(word64) * 2 */
1040
1041
3.28k
    if (ctx == NULL) {
1042
0
        return BAD_FUNC_ARG;
1043
0
    }
1044
1045
3.28k
    XMEMSET(little64, 0, sizeof(little64));
1046
1047
    /* size of additional data and input data as little endian 64 bit types */
1048
3.28k
    u32tole64(aadSz,  little64);
1049
3.28k
    u32tole64(dataSz, little64 + 8);
1050
3.28k
    ret = wc_Poly1305Update(ctx, little64, sizeof(little64));
1051
1052
3.28k
    return ret;
1053
3.28k
}
1054
1055
#ifdef WORD64_AVAILABLE
1056
int wc_Poly1305_EncodeSizes64(Poly1305* ctx, word64 aadSz, word64 dataSz)
1057
0
{
1058
0
    int ret;
1059
0
    word64 little64[2];
1060
1061
0
    if (ctx == NULL) {
1062
0
        return BAD_FUNC_ARG;
1063
0
    }
1064
1065
#ifdef BIG_ENDIAN_ORDER
1066
    little64[0] = ByteReverseWord64(aadSz);
1067
    little64[1] = ByteReverseWord64(dataSz);
1068
#else
1069
0
    little64[0] = aadSz;
1070
0
    little64[1] = dataSz;
1071
0
#endif
1072
1073
0
    ret = wc_Poly1305Update(ctx, (byte *)little64, sizeof(little64));
1074
1075
0
    return ret;
1076
0
}
1077
#endif
1078
1079
/*  Takes in an initialized Poly1305 struct that has a key loaded and creates
1080
    a MAC (tag) using recent TLS AEAD padding scheme.
1081
    ctx        : Initialized Poly1305 struct to use
1082
    additional : Additional data to use
1083
    addSz      : Size of additional buffer
1084
    input      : Input buffer to create tag from
1085
    sz         : Size of input buffer
1086
    tag        : Buffer to hold created tag
1087
    tagSz      : Size of input tag buffer (must be at least
1088
                 WC_POLY1305_MAC_SZ(16))
1089
 */
1090
int wc_Poly1305_MAC(Poly1305* ctx, const byte* additional, word32 addSz,
1091
                    const byte* input, word32 sz, byte* tag, word32 tagSz)
1092
3.28k
{
1093
3.28k
    int ret;
1094
1095
    /* sanity check on arguments */
1096
3.28k
    if (ctx == NULL || input == NULL || tag == NULL ||
1097
3.28k
                                                   tagSz < WC_POLY1305_MAC_SZ) {
1098
0
        return BAD_FUNC_ARG;
1099
0
    }
1100
1101
    /* additional allowed to be 0 */
1102
3.28k
    if (addSz > 0) {
1103
3.28k
        if (additional == NULL)
1104
0
            return BAD_FUNC_ARG;
1105
1106
        /* additional data plus padding */
1107
3.28k
        if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) {
1108
0
            return ret;
1109
0
        }
1110
        /* pad additional data */
1111
3.28k
        if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) {
1112
0
            return ret;
1113
0
        }
1114
3.28k
    }
1115
1116
    /* input plus padding */
1117
3.28k
    if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) {
1118
0
        return ret;
1119
0
    }
1120
    /* pad input data */
1121
3.28k
    if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) {
1122
0
        return ret;
1123
0
    }
1124
1125
    /* encode size of AAD and input data as little endian 64 bit types */
1126
3.28k
    if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) {
1127
0
        return ret;
1128
0
    }
1129
1130
    /* Finalize the auth tag */
1131
3.28k
    ret = wc_Poly1305Final(ctx, tag);
1132
1133
3.28k
    return ret;
1134
1135
3.28k
}
1136
#endif /* HAVE_POLY1305 */