Coverage Report

Created: 2025-11-16 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wolfssl-sp-math-all/wolfcrypt/src/poly1305.c
Line
Count
Source
1
/* poly1305.c
2
 *
3
 * Copyright (C) 2006-2025 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
/*
22
23
DESCRIPTION
24
This library contains implementation for the Poly1305 authenticator.
25
26
Based off the public domain implementations by Andrew Moon
27
and Daniel J. Bernstein
28
29
*/
30
31
32
/*
33
 * WOLFSSL_W64_WRAPPER Uses wrappers around word64 types for a system that does
34
 *                     not have word64 available. As expected it reduces
35
 *                     performance. Benchmarks collected July 2024 show
36
 *                     303.004 MiB/s with and 1874.194 MiB/s without.
37
 */
38
39
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
40
41
#ifdef HAVE_POLY1305
42
#include <wolfssl/wolfcrypt/poly1305.h>
43
#include <wolfssl/wolfcrypt/cpuid.h>
44
#ifdef NO_INLINE
45
    #include <wolfssl/wolfcrypt/misc.h>
46
#else
47
    #define WOLFSSL_MISC_INCLUDED
48
    #include <wolfcrypt/src/misc.c>
49
#endif
50
#ifdef CHACHA_AEAD_TEST
51
    #include <stdio.h>
52
#endif
53
54
#ifdef _MSC_VER
55
    /* 4127 warning constant while(1)  */
56
    #pragma warning(disable: 4127)
57
#endif
58
59
#ifdef USE_INTEL_POLY1305_SPEEDUP
60
    #include <emmintrin.h>
61
    #include <immintrin.h>
62
63
    #if defined(__GNUC__) && ((__GNUC__ < 4) || \
64
                              (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
65
        #undef  NO_AVX2_SUPPORT
66
        #define NO_AVX2_SUPPORT
67
    #endif
68
    #if defined(__clang__) && ((__clang_major__ < 3) || \
69
                               (__clang_major__ == 3 && __clang_minor__ <= 5))
70
        #define NO_AVX2_SUPPORT
71
    #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
72
        #undef NO_AVX2_SUPPORT
73
    #endif
74
    #if defined(_MSC_VER) && (_MSC_VER <= 1900)
75
        #undef  NO_AVX2_SUPPORT
76
        #define NO_AVX2_SUPPORT
77
    #endif
78
79
    #define HAVE_INTEL_AVX1
80
    #ifndef NO_AVX2_SUPPORT
81
        #define HAVE_INTEL_AVX2
82
    #endif
83
#endif
84
85
#ifdef USE_INTEL_POLY1305_SPEEDUP
86
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
87
#endif
88
89
#if defined(USE_INTEL_POLY1305_SPEEDUP) || defined(POLY130564)
90
    #if defined(__WATCOMC__)
91
        #error "POLY130564 || USE_INTEL_POLY1305_SPEEDUP Watcom not supported"
92
    #elif defined(_MSC_VER)
93
        #define POLY1305_NOINLINE __declspec(noinline)
94
    #elif defined(__GNUC__)
95
        #define POLY1305_NOINLINE __attribute__((noinline))
96
    #else
97
        #define POLY1305_NOINLINE
98
    #endif
99
100
    #if defined(_MSC_VER) && !(__WATCOMC__)
101
        #include <intrin.h>
102
103
        typedef struct word128 {
104
            word64 lo;
105
            word64 hi;
106
        } word128;
107
108
        #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
109
        #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo; \
110
                               out.hi += (out.lo < t) + in.hi; }
111
        #define ADDLO(out, in) { word64 t = out.lo; out.lo += in; \
112
                                 out.hi += (out.lo < t); }
113
        #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
114
        #define LO(in) (in.lo)
115
116
    #elif defined(__GNUC__)
117
        #if defined(__SIZEOF_INT128__)
118
            PEDANTIC_EXTENSION typedef unsigned __int128 word128;
119
        #else
120
            typedef unsigned word128 __attribute__((mode(TI)));
121
        #endif
122
123
429k
        #define MUL(out, x, y) out = ((word128)(x) * (y))
124
286k
        #define ADD(out, in) (out) += (in)
125
95.4k
        #define ADDLO(out, in) (out) += (in)
126
143k
        #define SHR(in, shift) (word64)((in) >> (shift))
127
143k
        #define LO(in) (word64)(in)
128
    #endif
129
#endif
130
131
#ifdef USE_INTEL_POLY1305_SPEEDUP
132
#ifdef __cplusplus
133
    extern "C" {
134
#endif
135
136
#ifdef HAVE_INTEL_AVX1
137
/* Process one block (16 bytes) of data.
138
 *
139
 * ctx  Poly1305 context.
140
 * m    One block of message data.
141
 */
142
WOLFSSL_LOCAL void poly1305_block_avx(Poly1305* ctx, const unsigned char *m);
143
/* Process multiple blocks (n * 16 bytes) of data.
144
 *
145
 * ctx    Poly1305 context.
146
 * m      Blocks of message data.
147
 * bytes  The number of bytes to process.
148
 */
149
WOLFSSL_LOCAL void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m,
150
                                size_t bytes);
151
/* Set the key to use when processing data.
152
 * Initialize the context.
153
 *
154
 * ctx  Poly1305 context.
155
 * key  The key data (16 bytes).
156
 */
157
WOLFSSL_LOCAL void poly1305_setkey_avx(Poly1305* ctx, const byte* key);
158
/* Calculate the final result - authentication data.
159
 * Zeros out the private data in the context.
160
 *
161
 * ctx  Poly1305 context.
162
 * mac  Buffer to hold 16 bytes.
163
 */
164
WOLFSSL_LOCAL void poly1305_final_avx(Poly1305* ctx, byte* mac);
165
#endif
166
167
#ifdef HAVE_INTEL_AVX2
168
/* Process multiple blocks (n * 16 bytes) of data.
169
 *
170
 * ctx    Poly1305 context.
171
 * m      Blocks of message data.
172
 * bytes  The number of bytes to process.
173
 */
174
WOLFSSL_LOCAL void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m,
175
                                 size_t bytes);
176
/* Calculate R^1, R^2, R^3 and R^4 and store them in the context.
177
 *
178
 * ctx    Poly1305 context.
179
 */
180
WOLFSSL_LOCAL void poly1305_calc_powers_avx2(Poly1305* ctx);
181
/* Set the key to use when processing data.
182
 * Initialize the context.
183
 * Calls AVX set key function as final function calls AVX code.
184
 *
185
 * ctx  Poly1305 context.
186
 * key  The key data (16 bytes).
187
 */
188
WOLFSSL_LOCAL void poly1305_setkey_avx2(Poly1305* ctx, const byte* key);
189
/* Calculate the final result - authentication data.
190
 * Zeros out the private data in the context.
191
 * Calls AVX final function to quickly process last blocks.
192
 *
193
 * ctx  Poly1305 context.
194
 * mac  Buffer to hold 16 bytes - authentication data.
195
 */
196
WOLFSSL_LOCAL void poly1305_final_avx2(Poly1305* ctx, byte* mac);
197
#endif
198
199
#ifdef __cplusplus
200
    }  /* extern "C" */
201
#endif
202
203
#elif defined(POLY130564)
204
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
205
    static word64 U8TO64(const byte* p)
206
108k
    {
207
108k
        return
208
108k
            (((word64)(p[0] & 0xff)      ) |
209
108k
             ((word64)(p[1] & 0xff) <<  8) |
210
108k
             ((word64)(p[2] & 0xff) << 16) |
211
108k
             ((word64)(p[3] & 0xff) << 24) |
212
108k
             ((word64)(p[4] & 0xff) << 32) |
213
108k
             ((word64)(p[5] & 0xff) << 40) |
214
108k
             ((word64)(p[6] & 0xff) << 48) |
215
108k
             ((word64)(p[7] & 0xff) << 56));
216
108k
    }
217
218
6.34k
    static void U64TO8(byte* p, word64 v) {
219
6.34k
        p[0] = (byte)v;
220
6.34k
        p[1] = (byte)(v >>  8);
221
6.34k
        p[2] = (byte)(v >> 16);
222
6.34k
        p[3] = (byte)(v >> 24);
223
6.34k
        p[4] = (byte)(v >> 32);
224
6.34k
        p[5] = (byte)(v >> 40);
225
6.34k
        p[6] = (byte)(v >> 48);
226
6.34k
        p[7] = (byte)(v >> 56);
227
6.34k
    }
228
#endif/* !WOLFSSL_ARMASM && !WOLFSSL_RISCV_ASM */
229
/* if not 64 bit then use 32 bit */
230
#elif !defined(WOLFSSL_ARMASM)
231
232
    static word32 U8TO32(const byte *p)
233
    {
234
        return
235
            (((word32)(p[0] & 0xff)      ) |
236
             ((word32)(p[1] & 0xff) <<  8) |
237
             ((word32)(p[2] & 0xff) << 16) |
238
             ((word32)(p[3] & 0xff) << 24));
239
    }
240
241
    static void U32TO8(byte *p, word32 v) {
242
        p[0] = (byte)((v      ) & 0xff);
243
        p[1] = (byte)((v >>  8) & 0xff);
244
        p[2] = (byte)((v >> 16) & 0xff);
245
        p[3] = (byte)((v >> 24) & 0xff);
246
    }
247
#endif
248
249
/* convert 32-bit unsigned to little endian 64 bit type as byte array */
250
static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8])
251
6.31k
{
252
#ifndef WOLFSSL_X86_64_BUILD
253
    outLe64[0] = (byte)(inLe32  & 0x000000FF);
254
    outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8);
255
    outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16);
256
    outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24);
257
    outLe64[4] = 0;
258
    outLe64[5] = 0;
259
    outLe64[6] = 0;
260
    outLe64[7] = 0;
261
#else
262
6.31k
    *(word64*)outLe64 = inLe32;
263
6.31k
#endif
264
6.31k
}
265
266
267
#if !defined(WOLFSSL_RISCV_ASM)
268
/*
269
This local function operates on a message with a given number of bytes
270
with a given ctx pointer to a Poly1305 structure.
271
*/
272
static int poly1305_blocks(Poly1305* ctx, const unsigned char *m,
273
                     size_t bytes)
274
10.0k
{
275
#ifdef USE_INTEL_POLY1305_SPEEDUP
276
    /* AVX2 is handled in wc_Poly1305Update. */
277
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
278
    poly1305_blocks_avx(ctx, m, bytes);
279
    RESTORE_VECTOR_REGISTERS();
280
    return 0;
281
#elif defined(WOLFSSL_ARMASM) && defined(__aarch64__)
282
    poly1305_arm64_blocks(ctx, m, bytes);
283
    return 0;
284
#elif defined(WOLFSSL_ARMASM) && defined(WOLFSSL_ARMASM_THUMB2)
285
    poly1305_blocks_thumb2_16(ctx, m, bytes, 1);
286
    return 0;
287
#elif defined(WOLFSSL_ARMASM)
288
#ifndef WOLFSSL_ARMASM_NO_NEON
289
    poly1305_arm32_blocks(ctx, m, bytes);
290
    return 0;
291
#else
292
    poly1305_arm32_blocks_16(ctx, m, bytes, 1);
293
    return 0;
294
#endif
295
#elif defined(POLY130564)
296
10.0k
    const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */
297
10.0k
    word64 r0,r1,r2;
298
10.0k
    word64 s1,s2;
299
10.0k
    word64 h0,h1,h2;
300
10.0k
    word64 c;
301
10.0k
    word128 d0,d1,d2,d;
302
303
10.0k
    r0 = ctx->r[0];
304
10.0k
    r1 = ctx->r[1];
305
10.0k
    r2 = ctx->r[2];
306
307
10.0k
    h0 = ctx->h[0];
308
10.0k
    h1 = ctx->h[1];
309
10.0k
    h2 = ctx->h[2];
310
311
10.0k
    s1 = r1 * (5 << 2);
312
10.0k
    s2 = r2 * (5 << 2);
313
314
57.7k
    while (bytes >= POLY1305_BLOCK_SIZE) {
315
47.7k
        word64 t0,t1;
316
317
        /* h += m[i] */
318
47.7k
        t0 = U8TO64(&m[0]);
319
47.7k
        t1 = U8TO64(&m[8]);
320
321
47.7k
        h0 += (( t0                    ) & 0xfffffffffff);
322
47.7k
        h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
323
47.7k
        h2 += (((t1 >> 24)             ) & 0x3ffffffffff) | hibit;
324
325
        /* h *= r */
326
47.7k
        MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
327
47.7k
        MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
328
47.7k
        MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
329
330
        /* (partial) h %= p */
331
47.7k
                      c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
332
47.7k
        ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
333
47.7k
        ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
334
47.7k
        h0  += c * 5; c = (h0 >> 44);  h0 =    h0  & 0xfffffffffff;
335
47.7k
        h1  += c;
336
337
47.7k
        m += POLY1305_BLOCK_SIZE;
338
47.7k
        bytes -= POLY1305_BLOCK_SIZE;
339
47.7k
    }
340
341
10.0k
    ctx->h[0] = h0;
342
10.0k
    ctx->h[1] = h1;
343
10.0k
    ctx->h[2] = h2;
344
345
10.0k
    return 0;
346
347
#else /* if not 64 bit then use 32 bit */
348
    const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */
349
    word32 r0,r1,r2,r3,r4;
350
    word32 s1,s2,s3,s4;
351
    word32 h0,h1,h2,h3,h4;
352
    word32 c;
353
#ifdef WOLFSSL_W64_WRAPPER
354
    #ifdef WOLFSSL_SMALL_STACK
355
    w64wrapper* d;
356
357
    d = (w64wrapper*)XMALLOC(5 * sizeof(w64wrapper), NULL,
358
        DYNAMIC_TYPE_TMP_BUFFER);
359
    if (d == NULL) {
360
        return MEMORY_E;
361
    }
362
    #else
363
    w64wrapper d[5];
364
    #endif
365
#else
366
    word64 d0,d1,d2,d3,d4;
367
#endif
368
369
370
    r0 = ctx->r[0];
371
    r1 = ctx->r[1];
372
    r2 = ctx->r[2];
373
    r3 = ctx->r[3];
374
    r4 = ctx->r[4];
375
376
    s1 = r1 * 5;
377
    s2 = r2 * 5;
378
    s3 = r3 * 5;
379
    s4 = r4 * 5;
380
381
    h0 = ctx->h[0];
382
    h1 = ctx->h[1];
383
    h2 = ctx->h[2];
384
    h3 = ctx->h[3];
385
    h4 = ctx->h[4];
386
387
    while (bytes >= POLY1305_BLOCK_SIZE) {
388
        /* h += m[i] */
389
        h0 += (U8TO32(m+ 0)     ) & 0x3ffffff;
390
        h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
391
        h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
392
        h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
393
        h4 += (U8TO32(m+12) >> 8) | hibit;
394
395
        /* h *= r */
396
#ifdef WOLFSSL_W64_WRAPPER
397
        {
398
            w64wrapper tmp;
399
400
            d[0] = w64Mul(h0, r0); tmp = w64Mul(h1, s4);
401
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h2, s3);
402
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h3, s2);
403
            d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h4, s1);
404
            d[0] = w64Add(d[0], tmp, NULL);
405
406
            d[1] = w64Mul(h0, r1); tmp = w64Mul(h1, r0);
407
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h2, s4);
408
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h3, s3);
409
            d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h4, s2);
410
            d[1] = w64Add(d[1], tmp, NULL);
411
412
            d[2] = w64Mul(h0, r2); tmp = w64Mul(h1, r1);
413
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h2, r0);
414
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h3, s4);
415
            d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h4, s3);
416
            d[2] = w64Add(d[2], tmp, NULL);
417
418
            d[3] = w64Mul(h0, r3); tmp = w64Mul(h1, r2);
419
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h2, r1);
420
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h3, r0);
421
            d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h4, s4);
422
            d[3] = w64Add(d[3], tmp, NULL);
423
424
            d[4] = w64Mul(h0, r4); tmp = w64Mul(h1, r3);
425
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h2, r2);
426
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h3, r1);
427
            d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h4, r0);
428
            d[4] = w64Add(d[4], tmp, NULL);
429
        }
430
#else
431
        d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
432
             ((word64)h3 * s2) + ((word64)h4 * s1);
433
        d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
434
             ((word64)h3 * s3) + ((word64)h4 * s2);
435
        d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) +
436
             ((word64)h3 * s4) + ((word64)h4 * s3);
437
        d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) +
438
             ((word64)h3 * r0) + ((word64)h4 * s4);
439
        d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
440
             ((word64)h3 * r1) + ((word64)h4 * r0);
441
#endif
442
443
        /* (partial) h %= p */
444
#ifdef WOLFSSL_W64_WRAPPER
445
        c = w64GetLow32(w64ShiftRight(d[0], 26));
446
        h0 = w64GetLow32(d[0]) & 0x3ffffff;
447
        d[1] = w64Add32(d[1], c, NULL);
448
        c = w64GetLow32(w64ShiftRight(d[1], 26));
449
        h1 = w64GetLow32(d[1]) & 0x3ffffff;
450
        d[2] = w64Add32(d[2], c, NULL);
451
        c = w64GetLow32(w64ShiftRight(d[2], 26));
452
        h2 = w64GetLow32(d[2]) & 0x3ffffff;
453
        d[3] = w64Add32(d[3], c, NULL);
454
        c = w64GetLow32(w64ShiftRight(d[3], 26));
455
        h3 = w64GetLow32(d[3]) & 0x3ffffff;
456
        d[4] = w64Add32(d[4], c, NULL);
457
        c = w64GetLow32(w64ShiftRight(d[4], 26));
458
        h4 = w64GetLow32(d[4]) & 0x3ffffff;
459
#else
460
                      c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
461
        d1 += c;      c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
462
        d2 += c;      c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
463
        d3 += c;      c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
464
        d4 += c;      c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
465
#endif
466
        h0 += c * 5;  c =  (h0 >> 26); h0 =                h0 & 0x3ffffff;
467
        h1 += c;
468
469
        m += POLY1305_BLOCK_SIZE;
470
        bytes -= POLY1305_BLOCK_SIZE;
471
    }
472
473
    ctx->h[0] = h0;
474
    ctx->h[1] = h1;
475
    ctx->h[2] = h2;
476
    ctx->h[3] = h3;
477
    ctx->h[4] = h4;
478
479
#if defined(WOLFSSL_W64_WRAPPER) && defined(WOLFSSL_SMALL_STACK)
480
    XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
481
#endif
482
483
    return 0;
484
485
#endif /* end of 64 bit cpu blocks or 32 bit cpu */
486
10.0k
}
487
488
/*
489
This local function is used for the last call when a message with a given
490
number of bytes is less than the block size.
491
*/
492
static WC_INLINE int poly1305_block(Poly1305* ctx, const unsigned char *m)
493
6.17k
{
494
#ifdef USE_INTEL_POLY1305_SPEEDUP
495
    /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
496
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
497
    poly1305_block_avx(ctx, m);
498
    RESTORE_VECTOR_REGISTERS();
499
    return 0;
500
#elif defined(WOLFSSL_ARMASM) && defined(WOLFSSL_ARMASM_THUMB2)
501
    poly1305_blocks_thumb2_16(ctx, m, POLY1305_BLOCK_SIZE, !ctx->finished);
502
    return 0;
503
#elif defined(WOLFSSL_ARMASM) && !defined(__aarch64__)
504
    poly1305_arm32_blocks_16(ctx, m, POLY1305_BLOCK_SIZE, !ctx->finished);
505
    return 0;
506
#elif defined(WOLFSSL_ARMASM)
507
    /* Only called from finished. */
508
    poly1305_arm64_block_16(ctx, m);
509
    return 0;
510
#else
511
6.17k
    return poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
512
6.17k
#endif
513
6.17k
}
514
515
int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
516
3.17k
{
517
3.17k
#if defined(POLY130564) && !defined(USE_INTEL_POLY1305_SPEEDUP) && \
518
3.17k
    !defined(WOLFSSL_ARMASM)
519
3.17k
    word64 t0,t1;
520
3.17k
#endif
521
522
3.17k
    if (key == NULL)
523
0
        return BAD_FUNC_ARG;
524
525
#ifdef CHACHA_AEAD_TEST
526
    word32 k;
527
    printf("Poly key used:\n");
528
    for (k = 0; k < keySz; k++) {
529
        printf("%02x", key[k]);
530
        if ((k+1) % 8 == 0)
531
            printf("\n");
532
    }
533
    printf("\n");
534
#endif
535
536
3.17k
    if ((ctx == NULL) || (key == NULL) || (keySz != 32)) {
537
0
        return BAD_FUNC_ARG;
538
0
    }
539
540
#ifdef USE_INTEL_POLY1305_SPEEDUP
541
    cpuid_get_flags_ex(&intel_flags);
542
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
543
    #ifdef HAVE_INTEL_AVX2
544
    if (IS_INTEL_AVX2(intel_flags))
545
        poly1305_setkey_avx2(ctx, key);
546
    else
547
    #endif
548
        poly1305_setkey_avx(ctx, key);
549
    RESTORE_VECTOR_REGISTERS();
550
    ctx->started = 0;
551
#elif defined(WOLFSSL_ARMASM)
552
    poly1305_set_key(ctx, key);
553
    ctx->finished = 0;
554
#elif defined(POLY130564)
555
556
    /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
557
3.17k
    t0 = U8TO64(key + 0);
558
3.17k
    t1 = U8TO64(key + 8);
559
560
3.17k
    ctx->r[0] = ( t0                    ) & 0xffc0fffffff;
561
3.17k
    ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
562
3.17k
    ctx->r[2] = ((t1 >> 24)             ) & 0x00ffffffc0f;
563
564
    /* h (accumulator) = 0 */
565
3.17k
    ctx->h[0] = 0;
566
3.17k
    ctx->h[1] = 0;
567
3.17k
    ctx->h[2] = 0;
568
569
    /* save pad for later */
570
3.17k
    ctx->pad[0] = U8TO64(key + 16);
571
3.17k
    ctx->pad[1] = U8TO64(key + 24);
572
573
3.17k
    ctx->leftover = 0;
574
3.17k
    ctx->finished = 0;
575
576
#else /* if not 64 bit then use 32 bit */
577
578
    /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
579
    ctx->r[0] = (U8TO32(key +  0)     ) & 0x3ffffff;
580
    ctx->r[1] = (U8TO32(key +  3) >> 2) & 0x3ffff03;
581
    ctx->r[2] = (U8TO32(key +  6) >> 4) & 0x3ffc0ff;
582
    ctx->r[3] = (U8TO32(key +  9) >> 6) & 0x3f03fff;
583
    ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff;
584
585
    /* h = 0 */
586
    ctx->h[0] = 0;
587
    ctx->h[1] = 0;
588
    ctx->h[2] = 0;
589
    ctx->h[3] = 0;
590
    ctx->h[4] = 0;
591
592
    /* save pad for later */
593
    ctx->pad[0] = U8TO32(key + 16);
594
    ctx->pad[1] = U8TO32(key + 20);
595
    ctx->pad[2] = U8TO32(key + 24);
596
    ctx->pad[3] = U8TO32(key + 28);
597
598
    ctx->leftover = 0;
599
    ctx->finished = 0;
600
601
#endif
602
603
3.17k
    return 0;
604
3.17k
}
605
606
int wc_Poly1305Final(Poly1305* ctx, byte* mac)
607
3.17k
{
608
#ifdef USE_INTEL_POLY1305_SPEEDUP
609
#elif defined(WOLFSSL_ARMASM)
610
#elif defined(POLY130564)
611
612
3.17k
    word64 h0,h1,h2,c;
613
3.17k
    word64 g0,g1,g2;
614
3.17k
    word64 t0,t1;
615
616
#else
617
618
    word32 h0,h1,h2,h3,h4,c;
619
    word32 g0,g1,g2,g3,g4;
620
#ifdef WOLFSSL_W64_WRAPPER
621
    w64wrapper f;
622
#else
623
    word64 f;
624
#endif
625
    word32 mask;
626
627
#endif
628
629
3.17k
    if (ctx == NULL || mac == NULL)
630
0
        return BAD_FUNC_ARG;
631
632
#ifdef USE_INTEL_POLY1305_SPEEDUP
633
    SAVE_VECTOR_REGISTERS(return _svr_ret;);
634
    #ifdef HAVE_INTEL_AVX2
635
    if (IS_INTEL_AVX2(intel_flags))
636
        poly1305_final_avx2(ctx, mac);
637
    else
638
    #endif
639
        poly1305_final_avx(ctx, mac);
640
    RESTORE_VECTOR_REGISTERS();
641
#elif defined(WOLFSSL_ARMASM)
642
    #if !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) && \
643
        !defined(__aarch64__)
644
        if (ctx->leftover >= POLY1305_BLOCK_SIZE) {
645
             size_t len = ctx->leftover & (~(POLY1305_BLOCK_SIZE - 1));
646
             poly1305_arm32_blocks(ctx, ctx->buffer, len);
647
             ctx->leftover -= len;
648
             if (ctx->leftover) {
649
                 XMEMCPY(ctx->buffer, ctx->buffer + len, ctx->leftover);
650
             }
651
        }
652
    #endif
653
        if (ctx->leftover) {
654
             size_t i = ctx->leftover;
655
             ctx->buffer[i++] = 1;
656
             for (; i < POLY1305_BLOCK_SIZE; i++) {
657
                 ctx->buffer[i] = 0;
658
             }
659
            ctx->finished = 1;
660
            poly1305_block(ctx, ctx->buffer);
661
        }
662
663
        poly1305_final(ctx, mac);
664
#elif defined(POLY130564)
665
666
    /* process the remaining block */
667
3.17k
    if (ctx->leftover) {
668
10
        size_t i = ctx->leftover;
669
10
        ctx->buffer[i] = 1;
670
56
        for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
671
46
            ctx->buffer[i] = 0;
672
10
        ctx->finished = 1;
673
10
        poly1305_block(ctx, ctx->buffer);
674
10
    }
675
676
    /* fully carry h */
677
3.17k
    h0 = ctx->h[0];
678
3.17k
    h1 = ctx->h[1];
679
3.17k
    h2 = ctx->h[2];
680
681
3.17k
                 c = (h1 >> 44); h1 &= 0xfffffffffff;
682
3.17k
    h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
683
3.17k
    h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
684
3.17k
    h1 += c;     c = (h1 >> 44); h1 &= 0xfffffffffff;
685
3.17k
    h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
686
3.17k
    h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
687
3.17k
    h1 += c;
688
689
    /* compute h + -p */
690
3.17k
    g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
691
3.17k
    g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
692
3.17k
    g2 = h2 + c - ((word64)1 << 42);
693
694
    /* select h if h < p, or h + -p if h >= p */
695
3.17k
    c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1;
696
3.17k
    g0 &= c;
697
3.17k
    g1 &= c;
698
3.17k
    g2 &= c;
699
3.17k
    c = ~c;
700
3.17k
    h0 = (h0 & c) | g0;
701
3.17k
    h1 = (h1 & c) | g1;
702
3.17k
    h2 = (h2 & c) | g2;
703
704
    /* h = (h + pad) */
705
3.17k
    t0 = ctx->pad[0];
706
3.17k
    t1 = ctx->pad[1];
707
708
3.17k
    h0 += (( t0                    ) & 0xfffffffffff)    ;
709
3.17k
    c = (h0 >> 44); h0 &= 0xfffffffffff;
710
3.17k
    h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
711
3.17k
    c = (h1 >> 44); h1 &= 0xfffffffffff;
712
3.17k
    h2 += (((t1 >> 24)             ) & 0x3ffffffffff) + c;
713
3.17k
    h2 &= 0x3ffffffffff;
714
715
    /* mac = h % (2^128) */
716
3.17k
    h0 = ((h0      ) | (h1 << 44));
717
3.17k
    h1 = ((h1 >> 20) | (h2 << 24));
718
719
3.17k
    U64TO8(mac + 0, h0);
720
3.17k
    U64TO8(mac + 8, h1);
721
722
    /* zero out the state */
723
3.17k
    ctx->h[0] = 0;
724
3.17k
    ctx->h[1] = 0;
725
3.17k
    ctx->h[2] = 0;
726
3.17k
    ctx->r[0] = 0;
727
3.17k
    ctx->r[1] = 0;
728
3.17k
    ctx->r[2] = 0;
729
3.17k
    ctx->pad[0] = 0;
730
3.17k
    ctx->pad[1] = 0;
731
732
#else /* if not 64 bit then use 32 bit */
733
734
    /* process the remaining block */
735
    if (ctx->leftover) {
736
        size_t i = ctx->leftover;
737
        ctx->buffer[i++] = 1;
738
        for (; i < POLY1305_BLOCK_SIZE; i++)
739
            ctx->buffer[i] = 0;
740
        ctx->finished = 1;
741
        poly1305_block(ctx, ctx->buffer);
742
    }
743
744
    /* fully carry h */
745
    h0 = ctx->h[0];
746
    h1 = ctx->h[1];
747
    h2 = ctx->h[2];
748
    h3 = ctx->h[3];
749
    h4 = ctx->h[4];
750
751
                 c = h1 >> 26; h1 = h1 & 0x3ffffff;
752
    h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
753
    h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
754
    h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
755
    h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
756
    h1 +=     c;
757
758
    /* compute h + -p */
759
    g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
760
    g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
761
    g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
762
    g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
763
    g4 = h4 + c - ((word32)1 << 26);
764
765
    /* select h if h < p, or h + -p if h >= p */
766
    mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1;
767
    g0 &= mask;
768
    g1 &= mask;
769
    g2 &= mask;
770
    g3 &= mask;
771
    g4 &= mask;
772
    mask = ~mask;
773
    h0 = (h0 & mask) | g0;
774
    h1 = (h1 & mask) | g1;
775
    h2 = (h2 & mask) | g2;
776
    h3 = (h3 & mask) | g3;
777
    h4 = (h4 & mask) | g4;
778
779
    /* h = h % (2^128) */
780
    h0 = ((h0      ) | (h1 << 26)) & 0xffffffff;
781
    h1 = ((h1 >>  6) | (h2 << 20)) & 0xffffffff;
782
    h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
783
    h3 = ((h3 >> 18) | (h4 <<  8)) & 0xffffffff;
784
785
    /* mac = (h + pad) % (2^128) */
786
#ifdef WOLFSSL_W64_WRAPPER
787
    f = w64From32(0, h0);
788
    f = w64Add32(f, ctx->pad[0], NULL);
789
    h0 = w64GetLow32(f);
790
791
    f = w64ShiftRight(f, 32);
792
    f = w64Add32(f, h1, NULL);
793
    f = w64Add32(f, ctx->pad[1], NULL);
794
    h1 = w64GetLow32(f);
795
796
    f = w64ShiftRight(f, 32);
797
    f = w64Add32(f, h2, NULL);
798
    f = w64Add32(f, ctx->pad[2], NULL);
799
    h2 = w64GetLow32(f);
800
801
    f = w64ShiftRight(f, 32);
802
    f = w64Add32(f, h3, NULL);
803
    f = w64Add32(f, ctx->pad[3], NULL);
804
    h3 = w64GetLow32(f);
805
#else
806
    f = (word64)h0 + ctx->pad[0]            ; h0 = (word32)f;
807
    f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
808
    f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
809
    f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
810
#endif
811
812
    U32TO8(mac + 0, h0);
813
    U32TO8(mac + 4, h1);
814
    U32TO8(mac + 8, h2);
815
    U32TO8(mac + 12, h3);
816
817
    /* zero out the state */
818
    ctx->h[0] = 0;
819
    ctx->h[1] = 0;
820
    ctx->h[2] = 0;
821
    ctx->h[3] = 0;
822
    ctx->h[4] = 0;
823
    ctx->r[0] = 0;
824
    ctx->r[1] = 0;
825
    ctx->r[2] = 0;
826
    ctx->r[3] = 0;
827
    ctx->r[4] = 0;
828
    ctx->pad[0] = 0;
829
    ctx->pad[1] = 0;
830
    ctx->pad[2] = 0;
831
    ctx->pad[3] = 0;
832
833
#endif
834
835
3.17k
    return 0;
836
3.17k
}
837
#endif /* !WOLFSSL_RISCV_ASM */
838
839
840
int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
841
15.6k
{
842
15.6k
    size_t i;
843
844
15.6k
    if (ctx == NULL || (m == NULL && bytes > 0))
845
0
        return BAD_FUNC_ARG;
846
847
15.6k
    if (bytes == 0) {
848
        /* valid, but do nothing */
849
9
        return 0;
850
9
    }
851
#ifdef CHACHA_AEAD_TEST
852
    word32 k;
853
    printf("Raw input to poly:\n");
854
    for (k = 0; k < bytes; k++) {
855
        printf("%02x", m[k]);
856
        if ((k+1) % 16 == 0)
857
            printf("\n");
858
    }
859
    printf("\n");
860
#endif
861
862
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_THUMB2) && \
863
    !defined(WOLFSSL_ARMASM_NO_NEON)
864
    /* handle leftover */
865
    if (ctx->leftover) {
866
        size_t want = sizeof(ctx->buffer) - ctx->leftover;
867
        if (want > bytes)
868
            want = bytes;
869
870
        for (i = 0; i < want; i++)
871
            ctx->buffer[ctx->leftover + i] = m[i];
872
        bytes -= (word32)want;
873
        m += want;
874
        ctx->leftover += want;
875
        if (ctx->leftover < sizeof(ctx->buffer)) {
876
            return 0;
877
        }
878
879
        poly1305_blocks(ctx, ctx->buffer, sizeof(ctx->buffer));
880
        ctx->leftover = 0;
881
    }
882
883
    /* process full blocks */
884
    if (bytes >= sizeof(ctx->buffer)) {
885
        size_t want = bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1);
886
887
        poly1305_blocks(ctx, m, want);
888
        m += want;
889
        bytes -= (word32)want;
890
    }
891
892
    /* store leftover */
893
    if (bytes) {
894
        for (i = 0; i < bytes; i++)
895
            ctx->buffer[ctx->leftover + i] = m[i];
896
        ctx->leftover += bytes;
897
    }
898
#else
899
#ifdef USE_INTEL_POLY1305_SPEEDUP
900
    #ifdef HAVE_INTEL_AVX2
901
    if (IS_INTEL_AVX2(intel_flags)) {
902
        SAVE_VECTOR_REGISTERS(return _svr_ret;);
903
904
        /* handle leftover */
905
        if (ctx->leftover) {
906
            size_t want = sizeof(ctx->buffer) - ctx->leftover;
907
            if (want > bytes)
908
                want = bytes;
909
910
            for (i = 0; i < want; i++)
911
                ctx->buffer[ctx->leftover + i] = m[i];
912
            bytes -= (word32)want;
913
            m += want;
914
            ctx->leftover += want;
915
            if (ctx->leftover < sizeof(ctx->buffer)) {
916
                RESTORE_VECTOR_REGISTERS();
917
                return 0;
918
            }
919
920
            if (!ctx->started) {
921
                poly1305_calc_powers_avx2(ctx);
922
                ctx->started = 1;
923
            }
924
            poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
925
            ctx->leftover = 0;
926
        }
927
928
        /* process full blocks */
929
        if (bytes >= sizeof(ctx->buffer)) {
930
            size_t want = bytes & ~(sizeof(ctx->buffer) - 1);
931
932
            if (!ctx->started) {
933
                poly1305_calc_powers_avx2(ctx);
934
                ctx->started = 1;
935
            }
936
            poly1305_blocks_avx2(ctx, m, want);
937
            m += want;
938
            bytes -= (word32)want;
939
        }
940
941
        /* store leftover */
942
        if (bytes) {
943
            for (i = 0; i < bytes; i++)
944
                ctx->buffer[ctx->leftover + i] = m[i];
945
            ctx->leftover += bytes;
946
        }
947
        RESTORE_VECTOR_REGISTERS();
948
    }
949
    else
950
    #endif
951
#endif
952
15.6k
    {
953
        /* handle leftover */
954
15.6k
        if (ctx->leftover) {
955
6.17k
            size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover);
956
6.17k
            if (want > bytes)
957
12
                want = bytes;
958
47.5k
            for (i = 0; i < want; i++)
959
41.3k
                ctx->buffer[ctx->leftover + i] = m[i];
960
6.17k
            bytes -= (word32)want;
961
6.17k
            m += want;
962
6.17k
            ctx->leftover += want;
963
6.17k
            if (ctx->leftover < POLY1305_BLOCK_SIZE)
964
12
                return 0;
965
6.16k
            poly1305_block(ctx, ctx->buffer);
966
6.16k
            ctx->leftover = 0;
967
6.16k
        }
968
969
        /* process full blocks */
970
15.6k
        if (bytes >= POLY1305_BLOCK_SIZE) {
971
3.90k
            size_t want = ((size_t)bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1));
972
3.90k
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
973
3.90k
            int ret;
974
3.90k
            ret = poly1305_blocks(ctx, m, want);
975
3.90k
            if (ret != 0)
976
0
                return ret;
977
#else
978
            poly1305_blocks(ctx, m, want);
979
#endif
980
3.90k
            m += want;
981
3.90k
            bytes -= (word32)want;
982
3.90k
        }
983
984
        /* store leftover */
985
15.6k
        if (bytes) {
986
63.5k
            for (i = 0; i < bytes; i++)
987
57.3k
                ctx->buffer[ctx->leftover + i] = m[i];
988
6.17k
            ctx->leftover += bytes;
989
6.17k
        }
990
15.6k
    }
991
0
#endif
992
993
0
    return 0;
994
15.6k
}
995
996
/*  Takes a Poly1305 struct that has a key loaded and pads the provided length
997
    ctx        : Initialized Poly1305 struct to use
998
    lenToPad   : Current number of bytes updated that needs padding to 16
999
 */
1000
int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad)
1001
6.31k
{
1002
6.31k
    int ret = 0;
1003
6.31k
    word32 paddingLen;
1004
6.31k
    byte padding[WC_POLY1305_PAD_SZ - 1];
1005
1006
6.31k
    if (ctx == NULL) {
1007
0
        return BAD_FUNC_ARG;
1008
0
    }
1009
6.31k
    if (lenToPad == 0) {
1010
9
        return 0; /* nothing needs to be done */
1011
9
    }
1012
1013
6.30k
    XMEMSET(padding, 0, sizeof(padding));
1014
1015
    /* Pad length to 16 bytes */
1016
6.30k
    paddingLen = (-(int)lenToPad) & (WC_POLY1305_PAD_SZ - 1);
1017
6.30k
    if ((paddingLen > 0) && (paddingLen < WC_POLY1305_PAD_SZ)) {
1018
6.14k
        ret = wc_Poly1305Update(ctx, padding, paddingLen);
1019
6.14k
    }
1020
6.30k
    return ret;
1021
6.31k
}
1022
1023
/*  Takes a Poly1305 struct that has a key loaded and adds the AEAD length
1024
    encoding in 64-bit little endian
1025
    aadSz      : Size of the additional authentication data
1026
    dataSz     : Size of the plaintext or ciphertext
1027
 */
1028
int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz)
1029
3.15k
{
1030
3.15k
    int ret;
1031
3.15k
    byte little64[16]; /* sizeof(word64) * 2 */
1032
1033
3.15k
    if (ctx == NULL) {
1034
0
        return BAD_FUNC_ARG;
1035
0
    }
1036
1037
3.15k
    XMEMSET(little64, 0, sizeof(little64));
1038
1039
    /* size of additional data and input data as little endian 64 bit types */
1040
3.15k
    u32tole64(aadSz,  little64);
1041
3.15k
    u32tole64(dataSz, little64 + 8);
1042
3.15k
    ret = wc_Poly1305Update(ctx, little64, sizeof(little64));
1043
1044
3.15k
    return ret;
1045
3.15k
}
1046
1047
#ifdef WORD64_AVAILABLE
1048
int wc_Poly1305_EncodeSizes64(Poly1305* ctx, word64 aadSz, word64 dataSz)
1049
0
{
1050
0
    int ret;
1051
0
    word64 little64[2];
1052
1053
0
    if (ctx == NULL) {
1054
0
        return BAD_FUNC_ARG;
1055
0
    }
1056
1057
#ifdef BIG_ENDIAN_ORDER
1058
    little64[0] = ByteReverseWord64(aadSz);
1059
    little64[1] = ByteReverseWord64(dataSz);
1060
#else
1061
0
    little64[0] = aadSz;
1062
0
    little64[1] = dataSz;
1063
0
#endif
1064
1065
0
    ret = wc_Poly1305Update(ctx, (byte *)little64, sizeof(little64));
1066
1067
0
    return ret;
1068
0
}
1069
#endif
1070
1071
/*  Takes in an initialized Poly1305 struct that has a key loaded and creates
1072
    a MAC (tag) using recent TLS AEAD padding scheme.
1073
    ctx        : Initialized Poly1305 struct to use
1074
    additional : Additional data to use
1075
    addSz      : Size of additional buffer
1076
    input      : Input buffer to create tag from
1077
    sz         : Size of input buffer
1078
    tag        : Buffer to hold created tag
1079
    tagSz      : Size of input tag buffer (must be at least
1080
                 WC_POLY1305_MAC_SZ(16))
1081
 */
1082
int wc_Poly1305_MAC(Poly1305* ctx, const byte* additional, word32 addSz,
1083
                    const byte* input, word32 sz, byte* tag, word32 tagSz)
1084
3.15k
{
1085
3.15k
    int ret;
1086
1087
    /* sanity check on arguments */
1088
3.15k
    if (ctx == NULL || input == NULL || tag == NULL ||
1089
3.15k
                                                   tagSz < WC_POLY1305_MAC_SZ) {
1090
0
        return BAD_FUNC_ARG;
1091
0
    }
1092
1093
    /* additional allowed to be 0 */
1094
3.15k
    if (addSz > 0) {
1095
3.15k
        if (additional == NULL)
1096
0
            return BAD_FUNC_ARG;
1097
1098
        /* additional data plus padding */
1099
3.15k
        if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) {
1100
0
            return ret;
1101
0
        }
1102
        /* pad additional data */
1103
3.15k
        if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) {
1104
0
            return ret;
1105
0
        }
1106
3.15k
    }
1107
1108
    /* input plus padding */
1109
3.15k
    if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) {
1110
0
        return ret;
1111
0
    }
1112
    /* pad input data */
1113
3.15k
    if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) {
1114
0
        return ret;
1115
0
    }
1116
1117
    /* encode size of AAD and input data as little endian 64 bit types */
1118
3.15k
    if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) {
1119
0
        return ret;
1120
0
    }
1121
1122
    /* Finalize the auth tag */
1123
3.15k
    ret = wc_Poly1305Final(ctx, tag);
1124
1125
3.15k
    return ret;
1126
1127
3.15k
}
1128
#endif /* HAVE_POLY1305 */