Coverage Report

Created: 2025-06-13 06:58

/src/openssl32/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
/* RISC-V uses C implementation as a fallback. */
31
#if defined(__riscv)
32
# define INCLUDE_C_GMULT_4BIT
33
# define INCLUDE_C_GHASH_4BIT
34
#endif
35
36
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
37
0
#define REDUCE1BIT(V)   do { \
38
0
        if (sizeof(size_t)==8) { \
39
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
40
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
41
0
                V.hi  = (V.hi>>1 )^T; \
42
0
        } \
43
0
        else { \
44
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
45
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
46
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
47
0
        } \
48
0
} while(0)
49
50
/*-
51
 *
52
 * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
53
 *
54
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55
 * never be set to 8. 8 is effectively reserved for testing purposes.
56
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58
 * whole spectrum of possible table driven implementations. Why? In
59
 * non-"Shoup's" case memory access pattern is segmented in such manner,
60
 * that it's trivial to see that cache timing information can reveal
61
 * fair portion of intermediate hash value. Given that ciphertext is
62
 * always available to attacker, it's possible for him to attempt to
63
 * deduce secret parameter H and if successful, tamper with messages
64
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65
 * not as trivial, but there is no reason to believe that it's resistant
66
 * to cache-timing attack. And the thing about "8-bit" implementation is
67
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
68
 * key + 1KB shared. Well, on pros side it should be twice as fast as
69
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70
 * was observed to run ~75% faster, closer to 100% for commercial
71
 * compilers... Yet "4-bit" procedure is preferred, because it's
72
 * believed to provide better security-performance balance and adequate
73
 * all-round performance. "All-round" refers to things like:
74
 *
75
 * - shorter setup time effectively improves overall timing for
76
 *   handling short messages;
77
 * - larger table allocation can become unbearable because of VM
78
 *   subsystem penalties (for example on Windows large enough free
79
 *   results in VM working set trimming, meaning that consequent
80
 *   malloc would immediately incur working set expansion);
81
 * - larger table has larger cache footprint, which can affect
82
 *   performance of other code paths (not necessarily even from same
83
 *   thread in Hyper-Threading world);
84
 *
85
 * Value of 1 is not appropriate for performance reasons.
86
 */
87
88
static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
89
0
{
90
0
    u128 V;
91
# if defined(OPENSSL_SMALL_FOOTPRINT)
92
    int i;
93
# endif
94
95
0
    Htable[0].hi = 0;
96
0
    Htable[0].lo = 0;
97
0
    V.hi = H[0];
98
0
    V.lo = H[1];
99
100
# if defined(OPENSSL_SMALL_FOOTPRINT)
101
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
102
        REDUCE1BIT(V);
103
        Htable[i] = V;
104
    }
105
106
    for (i = 2; i < 16; i <<= 1) {
107
        u128 *Hi = Htable + i;
108
        int j;
109
        for (V = *Hi, j = 1; j < i; ++j) {
110
            Hi[j].hi = V.hi ^ Htable[j].hi;
111
            Hi[j].lo = V.lo ^ Htable[j].lo;
112
        }
113
    }
114
# else
115
0
    Htable[8] = V;
116
0
    REDUCE1BIT(V);
117
0
    Htable[4] = V;
118
0
    REDUCE1BIT(V);
119
0
    Htable[2] = V;
120
0
    REDUCE1BIT(V);
121
0
    Htable[1] = V;
122
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
123
0
    V = Htable[4];
124
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
125
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
126
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
127
0
    V = Htable[8];
128
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
129
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
130
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
131
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
132
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
133
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
134
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
135
0
# endif
136
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137
    /*
138
     * ARM assembler expects specific dword order in Htable.
139
     */
140
    {
141
        int j;
142
        DECLARE_IS_ENDIAN;
143
144
        if (IS_LITTLE_ENDIAN)
145
            for (j = 0; j < 16; ++j) {
146
                V = Htable[j];
147
                Htable[j].hi = V.lo;
148
                Htable[j].lo = V.hi;
149
        } else
150
            for (j = 0; j < 16; ++j) {
151
                V = Htable[j];
152
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
153
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
154
            }
155
    }
156
# endif
157
0
}
158
159
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
160
static const size_t rem_4bit[16] = {
161
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
162
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
163
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
164
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
165
};
166
167
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
168
{
169
    u128 Z;
170
    int cnt = 15;
171
    size_t rem, nlo, nhi;
172
    DECLARE_IS_ENDIAN;
173
174
    nlo = ((const u8 *)Xi)[15];
175
    nhi = nlo >> 4;
176
    nlo &= 0xf;
177
178
    Z.hi = Htable[nlo].hi;
179
    Z.lo = Htable[nlo].lo;
180
181
    while (1) {
182
        rem = (size_t)Z.lo & 0xf;
183
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
184
        Z.hi = (Z.hi >> 4);
185
        if (sizeof(size_t) == 8)
186
            Z.hi ^= rem_4bit[rem];
187
        else
188
            Z.hi ^= (u64)rem_4bit[rem] << 32;
189
190
        Z.hi ^= Htable[nhi].hi;
191
        Z.lo ^= Htable[nhi].lo;
192
193
        if (--cnt < 0)
194
            break;
195
196
        nlo = ((const u8 *)Xi)[cnt];
197
        nhi = nlo >> 4;
198
        nlo &= 0xf;
199
200
        rem = (size_t)Z.lo & 0xf;
201
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
202
        Z.hi = (Z.hi >> 4);
203
        if (sizeof(size_t) == 8)
204
            Z.hi ^= rem_4bit[rem];
205
        else
206
            Z.hi ^= (u64)rem_4bit[rem] << 32;
207
208
        Z.hi ^= Htable[nlo].hi;
209
        Z.lo ^= Htable[nlo].lo;
210
    }
211
212
    if (IS_LITTLE_ENDIAN) {
213
#  ifdef BSWAP8
214
        Xi[0] = BSWAP8(Z.hi);
215
        Xi[1] = BSWAP8(Z.lo);
216
#  else
217
        u8 *p = (u8 *)Xi;
218
        u32 v;
219
        v = (u32)(Z.hi >> 32);
220
        PUTU32(p, v);
221
        v = (u32)(Z.hi);
222
        PUTU32(p + 4, v);
223
        v = (u32)(Z.lo >> 32);
224
        PUTU32(p + 8, v);
225
        v = (u32)(Z.lo);
226
        PUTU32(p + 12, v);
227
#  endif
228
    } else {
229
        Xi[0] = Z.hi;
230
        Xi[1] = Z.lo;
231
    }
232
}
233
234
# endif
235
236
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
237
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
238
/*
239
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
240
 * details... Compiler-generated code doesn't seem to give any
241
 * performance improvement, at least not on x86[_64]. It's here
242
 * mostly as reference and a placeholder for possible future
243
 * non-trivial optimization[s]...
244
 */
245
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
246
                           const u8 *inp, size_t len)
247
{
248
    u128 Z;
249
    int cnt;
250
    size_t rem, nlo, nhi;
251
    DECLARE_IS_ENDIAN;
252
253
    do {
254
        cnt = 15;
255
        nlo = ((const u8 *)Xi)[15];
256
        nlo ^= inp[15];
257
        nhi = nlo >> 4;
258
        nlo &= 0xf;
259
260
        Z.hi = Htable[nlo].hi;
261
        Z.lo = Htable[nlo].lo;
262
263
        while (1) {
264
            rem = (size_t)Z.lo & 0xf;
265
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
266
            Z.hi = (Z.hi >> 4);
267
            if (sizeof(size_t) == 8)
268
                Z.hi ^= rem_4bit[rem];
269
            else
270
                Z.hi ^= (u64)rem_4bit[rem] << 32;
271
272
            Z.hi ^= Htable[nhi].hi;
273
            Z.lo ^= Htable[nhi].lo;
274
275
            if (--cnt < 0)
276
                break;
277
278
            nlo = ((const u8 *)Xi)[cnt];
279
            nlo ^= inp[cnt];
280
            nhi = nlo >> 4;
281
            nlo &= 0xf;
282
283
            rem = (size_t)Z.lo & 0xf;
284
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
285
            Z.hi = (Z.hi >> 4);
286
            if (sizeof(size_t) == 8)
287
                Z.hi ^= rem_4bit[rem];
288
            else
289
                Z.hi ^= (u64)rem_4bit[rem] << 32;
290
291
            Z.hi ^= Htable[nlo].hi;
292
            Z.lo ^= Htable[nlo].lo;
293
        }
294
295
        if (IS_LITTLE_ENDIAN) {
296
#   ifdef BSWAP8
297
            Xi[0] = BSWAP8(Z.hi);
298
            Xi[1] = BSWAP8(Z.lo);
299
#   else
300
            u8 *p = (u8 *)Xi;
301
            u32 v;
302
            v = (u32)(Z.hi >> 32);
303
            PUTU32(p, v);
304
            v = (u32)(Z.hi);
305
            PUTU32(p + 4, v);
306
            v = (u32)(Z.lo >> 32);
307
            PUTU32(p + 8, v);
308
            v = (u32)(Z.lo);
309
            PUTU32(p + 12, v);
310
#   endif
311
        } else {
312
            Xi[0] = Z.hi;
313
            Xi[1] = Z.lo;
314
        }
315
316
        inp += 16;
317
        /* Block size is 128 bits so len is a multiple of 16 */
318
        len -= 16;
319
    } while (len > 0);
320
}
321
#  endif
322
# else
323
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
324
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
325
                    size_t len);
326
# endif
327
328
291k
# define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
329
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
330
3.19M
#  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
331
/*
332
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
333
 * effect. In other words idea is to hash data while it's still in L1 cache
334
 * after encryption pass...
335
 */
336
2.24M
#  define GHASH_CHUNK       (3*1024)
337
# endif
338
339
#if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
340
# if    !defined(I386_ONLY) && \
341
        (defined(__i386)        || defined(__i386__)    || \
342
         defined(__x86_64)      || defined(__x86_64__)  || \
343
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
344
#  define GHASH_ASM_X86_OR_64
345
346
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
347
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
348
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
349
                     size_t len);
350
351
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
352
#   define gcm_init_avx   gcm_init_clmul
353
#   define gcm_gmult_avx  gcm_gmult_clmul
354
#   define gcm_ghash_avx  gcm_ghash_clmul
355
#  else
356
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
357
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
358
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
359
                   size_t len);
360
#  endif
361
362
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
363
#   define GHASH_ASM_X86
364
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
365
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
366
                        size_t len);
367
368
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
369
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
370
                        size_t len);
371
#  endif
372
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)
373
#  include "arm_arch.h"
374
#  if __ARM_MAX_ARCH__>=7
375
#   define GHASH_ASM_ARM
376
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
377
#   if defined(__arm__) || defined(__arm)
378
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
379
#   endif
380
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
381
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
382
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
383
                    size_t len);
384
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
385
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
386
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
387
                  size_t len);
388
#  endif
389
# elif defined(__sparc__) || defined(__sparc)
390
#  include "crypto/sparc_arch.h"
391
#  define GHASH_ASM_SPARC
392
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
393
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
394
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
395
                    size_t len);
396
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
397
#  include "crypto/ppc_arch.h"
398
#  define GHASH_ASM_PPC
399
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
400
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
401
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
402
                  size_t len);
403
# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
404
#  include "crypto/riscv_arch.h"
405
#  define GHASH_ASM_RV64I
406
/* Zbc/Zbkc (scalar crypto with clmul) based routines. */
407
void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);
408
void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);
409
void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);
410
void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
411
void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
412
void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
413
                         const u8 *inp, size_t len);
414
void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
415
                               const u8 *inp, size_t len);
416
# endif
417
#endif
418
419
static void gcm_get_funcs(struct gcm_funcs_st *ctx)
420
224k
{
421
    /* set defaults -- overridden below as needed */
422
224k
    ctx->ginit = gcm_init_4bit;
423
#if !defined(GHASH_ASM)
424
    ctx->gmult = gcm_gmult_4bit;
425
#else
426
224k
    ctx->gmult = NULL;
427
224k
#endif
428
#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
429
    ctx->ghash = gcm_ghash_4bit;
430
#else
431
224k
    ctx->ghash = NULL;
432
224k
#endif
433
434
224k
#if defined(GHASH_ASM_X86_OR_64)
435
224k
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
436
    /* x86_64 */
437
224k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
438
224k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
439
224k
            ctx->ginit = gcm_init_avx;
440
224k
            ctx->gmult = gcm_gmult_avx;
441
224k
            ctx->ghash = gcm_ghash_avx;
442
224k
        } else {
443
0
            ctx->ginit = gcm_init_clmul;
444
0
            ctx->gmult = gcm_gmult_clmul;
445
0
            ctx->ghash = gcm_ghash_clmul;
446
0
        }
447
224k
        return;
448
224k
    }
449
0
# endif
450
# if defined(GHASH_ASM_X86)
451
    /* x86 only */
452
#  if defined(OPENSSL_IA32_SSE2)
453
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
454
        ctx->gmult = gcm_gmult_4bit_mmx;
455
        ctx->ghash = gcm_ghash_4bit_mmx;
456
        return;
457
    }
458
#  else
459
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
460
        ctx->gmult = gcm_gmult_4bit_mmx;
461
        ctx->ghash = gcm_ghash_4bit_mmx;
462
        return;
463
    }
464
#  endif
465
    ctx->gmult = gcm_gmult_4bit_x86;
466
    ctx->ghash = gcm_ghash_4bit_x86;
467
    return;
468
# else
469
    /* x86_64 fallback defaults */
470
0
    ctx->gmult = gcm_gmult_4bit;
471
0
    ctx->ghash = gcm_ghash_4bit;
472
0
    return;
473
224k
# endif
474
#elif defined(GHASH_ASM_ARM)
475
    /* ARM defaults */
476
    ctx->gmult = gcm_gmult_4bit;
477
# if !defined(OPENSSL_SMALL_FOOTPRINT)
478
    ctx->ghash = gcm_ghash_4bit;
479
# else
480
    ctx->ghash = NULL;
481
# endif
482
# ifdef PMULL_CAPABLE
483
    if (PMULL_CAPABLE) {
484
        ctx->ginit = (gcm_init_fn)gcm_init_v8;
485
        ctx->gmult = gcm_gmult_v8;
486
        ctx->ghash = gcm_ghash_v8;
487
    }
488
# elif defined(NEON_CAPABLE)
489
    if (NEON_CAPABLE) {
490
        ctx->ginit = gcm_init_neon;
491
        ctx->gmult = gcm_gmult_neon;
492
        ctx->ghash = gcm_ghash_neon;
493
    }
494
# endif
495
    return;
496
#elif defined(GHASH_ASM_SPARC)
497
    /* SPARC defaults */
498
    ctx->gmult = gcm_gmult_4bit;
499
    ctx->ghash = gcm_ghash_4bit;
500
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
501
        ctx->ginit = gcm_init_vis3;
502
        ctx->gmult = gcm_gmult_vis3;
503
        ctx->ghash = gcm_ghash_vis3;
504
    }
505
    return;
506
#elif defined(GHASH_ASM_PPC)
507
    /* PowerPC does not define GHASH_ASM; defaults set above */
508
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
509
        ctx->ginit = gcm_init_p8;
510
        ctx->gmult = gcm_gmult_p8;
511
        ctx->ghash = gcm_ghash_p8;
512
    }
513
    return;
514
#elif defined(GHASH_ASM_RV64I)
515
    /* RISCV defaults */
516
    ctx->gmult = gcm_gmult_4bit;
517
    ctx->ghash = gcm_ghash_4bit;
518
519
    if (RISCV_HAS_ZBC()) {
520
        if (RISCV_HAS_ZBKB()) {
521
            ctx->ginit = gcm_init_rv64i_zbc__zbkb;
522
            ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;
523
            ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;
524
        } else if (RISCV_HAS_ZBB()) {
525
            ctx->ginit = gcm_init_rv64i_zbc__zbb;
526
            ctx->gmult = gcm_gmult_rv64i_zbc;
527
            ctx->ghash = gcm_ghash_rv64i_zbc;
528
        } else {
529
            ctx->ginit = gcm_init_rv64i_zbc;
530
            ctx->gmult = gcm_gmult_rv64i_zbc;
531
            ctx->ghash = gcm_ghash_rv64i_zbc;
532
        }
533
    }
534
    return;
535
#elif defined(GHASH_ASM)
536
    /* all other architectures use the generic names */
537
    ctx->gmult = gcm_gmult_4bit;
538
    ctx->ghash = gcm_ghash_4bit;
539
    return;
540
#endif
541
224k
}
542
543
void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
544
0
{
545
0
    struct gcm_funcs_st funcs;
546
547
0
    gcm_get_funcs(&funcs);
548
0
    funcs.ginit(Htable, H);
549
0
}
550
551
void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
552
0
{
553
0
    struct gcm_funcs_st funcs;
554
555
0
    gcm_get_funcs(&funcs);
556
0
    funcs.gmult(Xi, Htable);
557
0
}
558
559
void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
560
                         const u8 *inp, size_t len)
561
0
{
562
0
    struct gcm_funcs_st funcs;
563
0
    u64 tmp[2];
564
0
    size_t i;
565
566
0
    gcm_get_funcs(&funcs);
567
0
    if (funcs.ghash != NULL) {
568
0
        funcs.ghash(Xi, Htable, inp, len);
569
0
    } else {
570
        /* Emulate ghash if needed */
571
0
        for (i = 0; i < len; i += 16) {
572
0
            memcpy(tmp, &inp[i], sizeof(tmp));
573
0
            Xi[0] ^= tmp[0];
574
0
            Xi[1] ^= tmp[1];
575
0
            funcs.gmult(Xi, Htable);
576
0
        }
577
0
    }
578
0
}
579
580
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
581
224k
{
582
224k
    DECLARE_IS_ENDIAN;
583
584
224k
    memset(ctx, 0, sizeof(*ctx));
585
224k
    ctx->block = block;
586
224k
    ctx->key = key;
587
588
224k
    (*block) (ctx->H.c, ctx->H.c, key);
589
590
224k
    if (IS_LITTLE_ENDIAN) {
591
        /* H is stored in host byte order */
592
#ifdef BSWAP8
593
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
594
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
595
#else
596
224k
        u8 *p = ctx->H.c;
597
224k
        u64 hi, lo;
598
224k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
599
224k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
600
224k
        ctx->H.u[0] = hi;
601
224k
        ctx->H.u[1] = lo;
602
224k
#endif
603
224k
    }
604
605
224k
    gcm_get_funcs(&ctx->funcs);
606
224k
    ctx->funcs.ginit(ctx->Htable, ctx->H.u);
607
224k
}
608
609
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
610
                         size_t len)
611
1.50M
{
612
1.50M
    DECLARE_IS_ENDIAN;
613
1.50M
    unsigned int ctr;
614
615
1.50M
    ctx->len.u[0] = 0;          /* AAD length */
616
1.50M
    ctx->len.u[1] = 0;          /* message length */
617
1.50M
    ctx->ares = 0;
618
1.50M
    ctx->mres = 0;
619
620
1.50M
    if (len == 12) {
621
1.50M
        memcpy(ctx->Yi.c, iv, 12);
622
1.50M
        ctx->Yi.c[12] = 0;
623
1.50M
        ctx->Yi.c[13] = 0;
624
1.50M
        ctx->Yi.c[14] = 0;
625
1.50M
        ctx->Yi.c[15] = 1;
626
1.50M
        ctr = 1;
627
1.50M
    } else {
628
0
        size_t i;
629
0
        u64 len0 = len;
630
631
        /* Borrow ctx->Xi to calculate initial Yi */
632
0
        ctx->Xi.u[0] = 0;
633
0
        ctx->Xi.u[1] = 0;
634
635
0
        while (len >= 16) {
636
0
            for (i = 0; i < 16; ++i)
637
0
                ctx->Xi.c[i] ^= iv[i];
638
0
            GCM_MUL(ctx);
639
0
            iv += 16;
640
0
            len -= 16;
641
0
        }
642
0
        if (len) {
643
0
            for (i = 0; i < len; ++i)
644
0
                ctx->Xi.c[i] ^= iv[i];
645
0
            GCM_MUL(ctx);
646
0
        }
647
0
        len0 <<= 3;
648
0
        if (IS_LITTLE_ENDIAN) {
649
#ifdef BSWAP8
650
            ctx->Xi.u[1] ^= BSWAP8(len0);
651
#else
652
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
653
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
654
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
655
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
656
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
657
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
658
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
659
0
            ctx->Xi.c[15] ^= (u8)(len0);
660
0
#endif
661
0
        } else {
662
0
            ctx->Xi.u[1] ^= len0;
663
0
        }
664
665
0
        GCM_MUL(ctx);
666
667
0
        if (IS_LITTLE_ENDIAN)
668
#ifdef BSWAP4
669
            ctr = BSWAP4(ctx->Xi.d[3]);
670
#else
671
0
            ctr = GETU32(ctx->Xi.c + 12);
672
0
#endif
673
0
        else
674
0
            ctr = ctx->Xi.d[3];
675
676
        /* Copy borrowed Xi to Yi */
677
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
678
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
679
0
    }
680
681
1.50M
    ctx->Xi.u[0] = 0;
682
1.50M
    ctx->Xi.u[1] = 0;
683
684
1.50M
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
685
1.50M
    ++ctr;
686
1.50M
    if (IS_LITTLE_ENDIAN)
687
#ifdef BSWAP4
688
        ctx->Yi.d[3] = BSWAP4(ctr);
689
#else
690
1.50M
        PUTU32(ctx->Yi.c + 12, ctr);
691
0
#endif
692
0
    else
693
0
        ctx->Yi.d[3] = ctr;
694
1.50M
}
695
696
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
697
                      size_t len)
698
1.60M
{
699
1.60M
    size_t i;
700
1.60M
    unsigned int n;
701
1.60M
    u64 alen = ctx->len.u[0];
702
703
1.60M
    if (ctx->len.u[1])
704
0
        return -2;
705
706
1.60M
    alen += len;
707
1.60M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
708
0
        return -1;
709
1.60M
    ctx->len.u[0] = alen;
710
711
1.60M
    n = ctx->ares;
712
1.60M
    if (n) {
713
452k
        while (n && len) {
714
358k
            ctx->Xi.c[n] ^= *(aad++);
715
358k
            --len;
716
358k
            n = (n + 1) % 16;
717
358k
        }
718
94.8k
        if (n == 0)
719
9.12k
            GCM_MUL(ctx);
720
85.7k
        else {
721
85.7k
            ctx->ares = n;
722
85.7k
            return 0;
723
85.7k
        }
724
94.8k
    }
725
1.51M
#ifdef GHASH
726
1.51M
    if ((i = (len & (size_t)-16))) {
727
529k
        GHASH(ctx, aad, i);
728
529k
        aad += i;
729
529k
        len -= i;
730
529k
    }
731
#else
732
    while (len >= 16) {
733
        for (i = 0; i < 16; ++i)
734
            ctx->Xi.c[i] ^= aad[i];
735
        GCM_MUL(ctx);
736
        aad += 16;
737
        len -= 16;
738
    }
739
#endif
740
1.51M
    if (len) {
741
1.34M
        n = (unsigned int)len;
742
15.6M
        for (i = 0; i < len; ++i)
743
14.2M
            ctx->Xi.c[i] ^= aad[i];
744
1.34M
    }
745
746
1.51M
    ctx->ares = n;
747
1.51M
    return 0;
748
1.60M
}
749
750
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
751
                          const unsigned char *in, unsigned char *out,
752
                          size_t len)
753
611k
{
754
611k
    DECLARE_IS_ENDIAN;
755
611k
    unsigned int n, ctr, mres;
756
611k
    size_t i;
757
611k
    u64 mlen = ctx->len.u[1];
758
611k
    block128_f block = ctx->block;
759
611k
    void *key = ctx->key;
760
761
611k
    mlen += len;
762
611k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
763
0
        return -1;
764
611k
    ctx->len.u[1] = mlen;
765
766
611k
    mres = ctx->mres;
767
768
611k
    if (ctx->ares) {
769
        /* First call to encrypt finalizes GHASH(AAD) */
770
4.90k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
771
4.90k
        if (len == 0) {
772
4.45k
            GCM_MUL(ctx);
773
4.45k
            ctx->ares = 0;
774
4.45k
            return 0;
775
4.45k
        }
776
449
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
777
449
        ctx->Xi.u[0] = 0;
778
449
        ctx->Xi.u[1] = 0;
779
449
        mres = sizeof(ctx->Xi);
780
#else
781
        GCM_MUL(ctx);
782
#endif
783
449
        ctx->ares = 0;
784
449
    }
785
786
606k
    if (IS_LITTLE_ENDIAN)
787
#ifdef BSWAP4
788
        ctr = BSWAP4(ctx->Yi.d[3]);
789
#else
790
606k
        ctr = GETU32(ctx->Yi.c + 12);
791
0
#endif
792
0
    else
793
0
        ctr = ctx->Yi.d[3];
794
795
606k
    n = mres % 16;
796
606k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
797
606k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
798
606k
        do {
799
606k
            if (n) {
800
596k
# if defined(GHASH)
801
8.82M
                while (n && len) {
802
8.23M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
803
8.23M
                    --len;
804
8.23M
                    n = (n + 1) % 16;
805
8.23M
                }
806
596k
                if (n == 0) {
807
596k
                    GHASH(ctx, ctx->Xn, mres);
808
596k
                    mres = 0;
809
596k
                } else {
810
0
                    ctx->mres = mres;
811
0
                    return 0;
812
0
                }
813
# else
814
                while (n && len) {
815
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
816
                    --len;
817
                    n = (n + 1) % 16;
818
                }
819
                if (n == 0) {
820
                    GCM_MUL(ctx);
821
                    mres = 0;
822
                } else {
823
                    ctx->mres = n;
824
                    return 0;
825
                }
826
# endif
827
596k
            }
828
606k
# if defined(STRICT_ALIGNMENT)
829
606k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
830
603k
                break;
831
3.16k
# endif
832
3.16k
# if defined(GHASH)
833
3.16k
            if (len >= 16 && mres) {
834
234
                GHASH(ctx, ctx->Xn, mres);
835
234
                mres = 0;
836
234
            }
837
3.16k
#  if defined(GHASH_CHUNK)
838
3.16k
            while (len >= GHASH_CHUNK) {
839
0
                size_t j = GHASH_CHUNK;
840
841
0
                while (j) {
842
0
                    size_t_aX *out_t = (size_t_aX *)out;
843
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
844
845
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
846
0
                    ++ctr;
847
0
                    if (IS_LITTLE_ENDIAN)
848
#   ifdef BSWAP4
849
                        ctx->Yi.d[3] = BSWAP4(ctr);
850
#   else
851
0
                        PUTU32(ctx->Yi.c + 12, ctr);
852
0
#   endif
853
0
                    else
854
0
                        ctx->Yi.d[3] = ctr;
855
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
856
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
857
0
                    out += 16;
858
0
                    in += 16;
859
0
                    j -= 16;
860
0
                }
861
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
862
0
                len -= GHASH_CHUNK;
863
0
            }
864
3.16k
#  endif
865
3.16k
            if ((i = (len & (size_t)-16))) {
866
234
                size_t j = i;
867
868
468
                while (len >= 16) {
869
234
                    size_t_aX *out_t = (size_t_aX *)out;
870
234
                    const size_t_aX *in_t = (const size_t_aX *)in;
871
872
234
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
873
234
                    ++ctr;
874
234
                    if (IS_LITTLE_ENDIAN)
875
#  ifdef BSWAP4
876
                        ctx->Yi.d[3] = BSWAP4(ctr);
877
#  else
878
234
                        PUTU32(ctx->Yi.c + 12, ctr);
879
0
#  endif
880
0
                    else
881
0
                        ctx->Yi.d[3] = ctr;
882
702
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
883
468
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
884
234
                    out += 16;
885
234
                    in += 16;
886
234
                    len -= 16;
887
234
                }
888
234
                GHASH(ctx, out - j, j);
889
234
            }
890
# else
891
            while (len >= 16) {
892
                size_t *out_t = (size_t *)out;
893
                const size_t *in_t = (const size_t *)in;
894
895
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
896
                ++ctr;
897
                if (IS_LITTLE_ENDIAN)
898
#  ifdef BSWAP4
899
                    ctx->Yi.d[3] = BSWAP4(ctr);
900
#  else
901
                    PUTU32(ctx->Yi.c + 12, ctr);
902
#  endif
903
                else
904
                    ctx->Yi.d[3] = ctr;
905
                for (i = 0; i < 16 / sizeof(size_t); ++i)
906
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
907
                GCM_MUL(ctx);
908
                out += 16;
909
                in += 16;
910
                len -= 16;
911
            }
912
# endif
913
3.16k
            if (len) {
914
215
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
915
215
                ++ctr;
916
215
                if (IS_LITTLE_ENDIAN)
917
# ifdef BSWAP4
918
                    ctx->Yi.d[3] = BSWAP4(ctr);
919
# else
920
215
                    PUTU32(ctx->Yi.c + 12, ctr);
921
0
# endif
922
0
                else
923
0
                    ctx->Yi.d[3] = ctr;
924
215
# if defined(GHASH)
925
645
                while (len--) {
926
430
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
927
430
                    ++n;
928
430
                }
929
# else
930
                while (len--) {
931
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
932
                    ++n;
933
                }
934
                mres = n;
935
# endif
936
215
            }
937
938
3.16k
            ctx->mres = mres;
939
3.16k
            return 0;
940
606k
        } while (0);
941
606k
    }
942
603k
#endif
943
603k
    for (i = 0; i < len; ++i) {
944
0
        if (n == 0) {
945
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
946
0
            ++ctr;
947
0
            if (IS_LITTLE_ENDIAN)
948
#ifdef BSWAP4
949
                ctx->Yi.d[3] = BSWAP4(ctr);
950
#else
951
0
                PUTU32(ctx->Yi.c + 12, ctr);
952
0
#endif
953
0
            else
954
0
                ctx->Yi.d[3] = ctr;
955
0
        }
956
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
957
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
958
0
        n = (n + 1) % 16;
959
0
        if (mres == sizeof(ctx->Xn)) {
960
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
961
0
            mres = 0;
962
0
        }
963
#else
964
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
965
        mres = n = (n + 1) % 16;
966
        if (n == 0)
967
            GCM_MUL(ctx);
968
#endif
969
0
    }
970
971
603k
    ctx->mres = mres;
972
603k
    return 0;
973
606k
}
974
975
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
976
                          const unsigned char *in, unsigned char *out,
977
                          size_t len)
978
251k
{
979
251k
    DECLARE_IS_ENDIAN;
980
251k
    unsigned int n, ctr, mres;
981
251k
    size_t i;
982
251k
    u64 mlen = ctx->len.u[1];
983
251k
    block128_f block = ctx->block;
984
251k
    void *key = ctx->key;
985
986
251k
    mlen += len;
987
251k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
988
0
        return -1;
989
251k
    ctx->len.u[1] = mlen;
990
991
251k
    mres = ctx->mres;
992
993
251k
    if (ctx->ares) {
994
        /* First call to decrypt finalizes GHASH(AAD) */
995
95.1k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
996
95.1k
        if (len == 0) {
997
88.1k
            GCM_MUL(ctx);
998
88.1k
            ctx->ares = 0;
999
88.1k
            return 0;
1000
88.1k
        }
1001
7.00k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1002
7.00k
        ctx->Xi.u[0] = 0;
1003
7.00k
        ctx->Xi.u[1] = 0;
1004
7.00k
        mres = sizeof(ctx->Xi);
1005
#else
1006
        GCM_MUL(ctx);
1007
#endif
1008
7.00k
        ctx->ares = 0;
1009
7.00k
    }
1010
1011
163k
    if (IS_LITTLE_ENDIAN)
1012
#ifdef BSWAP4
1013
        ctr = BSWAP4(ctx->Yi.d[3]);
1014
#else
1015
163k
        ctr = GETU32(ctx->Yi.c + 12);
1016
0
#endif
1017
0
    else
1018
0
        ctr = ctx->Yi.d[3];
1019
1020
163k
    n = mres % 16;
1021
163k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1022
163k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1023
163k
        do {
1024
163k
            if (n) {
1025
0
# if defined(GHASH)
1026
0
                while (n && len) {
1027
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1028
0
                    --len;
1029
0
                    n = (n + 1) % 16;
1030
0
                }
1031
0
                if (n == 0) {
1032
0
                    GHASH(ctx, ctx->Xn, mres);
1033
0
                    mres = 0;
1034
0
                } else {
1035
0
                    ctx->mres = mres;
1036
0
                    return 0;
1037
0
                }
1038
# else
1039
                while (n && len) {
1040
                    u8 c = *(in++);
1041
                    *(out++) = c ^ ctx->EKi.c[n];
1042
                    ctx->Xi.c[n] ^= c;
1043
                    --len;
1044
                    n = (n + 1) % 16;
1045
                }
1046
                if (n == 0) {
1047
                    GCM_MUL(ctx);
1048
                    mres = 0;
1049
                } else {
1050
                    ctx->mres = n;
1051
                    return 0;
1052
                }
1053
# endif
1054
0
            }
1055
163k
# if defined(STRICT_ALIGNMENT)
1056
163k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1057
391
                break;
1058
163k
# endif
1059
163k
# if defined(GHASH)
1060
163k
            if (len >= 16 && mres) {
1061
1.88k
                GHASH(ctx, ctx->Xn, mres);
1062
1.88k
                mres = 0;
1063
1.88k
            }
1064
163k
#  if defined(GHASH_CHUNK)
1065
164k
            while (len >= GHASH_CHUNK) {
1066
1.27k
                size_t j = GHASH_CHUNK;
1067
1068
1.27k
                GHASH(ctx, in, GHASH_CHUNK);
1069
246k
                while (j) {
1070
245k
                    size_t_aX *out_t = (size_t_aX *)out;
1071
245k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1072
1073
245k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1074
245k
                    ++ctr;
1075
245k
                    if (IS_LITTLE_ENDIAN)
1076
#   ifdef BSWAP4
1077
                        ctx->Yi.d[3] = BSWAP4(ctr);
1078
#   else
1079
245k
                        PUTU32(ctx->Yi.c + 12, ctr);
1080
0
#   endif
1081
0
                    else
1082
0
                        ctx->Yi.d[3] = ctr;
1083
735k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1084
490k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1085
245k
                    out += 16;
1086
245k
                    in += 16;
1087
245k
                    j -= 16;
1088
245k
                }
1089
1.27k
                len -= GHASH_CHUNK;
1090
1.27k
            }
1091
163k
#  endif
1092
163k
            if ((i = (len & (size_t)-16))) {
1093
1.86k
                GHASH(ctx, in, i);
1094
25.0k
                while (len >= 16) {
1095
23.1k
                    size_t_aX *out_t = (size_t_aX *)out;
1096
23.1k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1097
1098
23.1k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1099
23.1k
                    ++ctr;
1100
23.1k
                    if (IS_LITTLE_ENDIAN)
1101
#  ifdef BSWAP4
1102
                        ctx->Yi.d[3] = BSWAP4(ctr);
1103
#  else
1104
23.1k
                        PUTU32(ctx->Yi.c + 12, ctr);
1105
0
#  endif
1106
0
                    else
1107
0
                        ctx->Yi.d[3] = ctr;
1108
69.5k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1109
46.3k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1110
23.1k
                    out += 16;
1111
23.1k
                    in += 16;
1112
23.1k
                    len -= 16;
1113
23.1k
                }
1114
1.86k
            }
1115
# else
1116
            while (len >= 16) {
1117
                size_t *out_t = (size_t *)out;
1118
                const size_t *in_t = (const size_t *)in;
1119
1120
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1121
                ++ctr;
1122
                if (IS_LITTLE_ENDIAN)
1123
#  ifdef BSWAP4
1124
                    ctx->Yi.d[3] = BSWAP4(ctr);
1125
#  else
1126
                    PUTU32(ctx->Yi.c + 12, ctr);
1127
#  endif
1128
                else
1129
                    ctx->Yi.d[3] = ctr;
1130
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1131
                    size_t c = in_t[i];
1132
                    out_t[i] = c ^ ctx->EKi.t[i];
1133
                    ctx->Xi.t[i] ^= c;
1134
                }
1135
                GCM_MUL(ctx);
1136
                out += 16;
1137
                in += 16;
1138
                len -= 16;
1139
            }
1140
# endif
1141
163k
            if (len) {
1142
6.69k
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143
6.69k
                ++ctr;
1144
6.69k
                if (IS_LITTLE_ENDIAN)
1145
# ifdef BSWAP4
1146
                    ctx->Yi.d[3] = BSWAP4(ctr);
1147
# else
1148
6.69k
                    PUTU32(ctx->Yi.c + 12, ctr);
1149
0
# endif
1150
0
                else
1151
0
                    ctx->Yi.d[3] = ctr;
1152
6.69k
# if defined(GHASH)
1153
65.6k
                while (len--) {
1154
58.9k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1155
58.9k
                    ++n;
1156
58.9k
                }
1157
# else
1158
                while (len--) {
1159
                    u8 c = in[n];
1160
                    ctx->Xi.c[n] ^= c;
1161
                    out[n] = c ^ ctx->EKi.c[n];
1162
                    ++n;
1163
                }
1164
                mres = n;
1165
# endif
1166
6.69k
            }
1167
1168
163k
            ctx->mres = mres;
1169
163k
            return 0;
1170
163k
        } while (0);
1171
163k
    }
1172
391
#endif
1173
391
    for (i = 0; i < len; ++i) {
1174
0
        u8 c;
1175
0
        if (n == 0) {
1176
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1177
0
            ++ctr;
1178
0
            if (IS_LITTLE_ENDIAN)
1179
#ifdef BSWAP4
1180
                ctx->Yi.d[3] = BSWAP4(ctr);
1181
#else
1182
0
                PUTU32(ctx->Yi.c + 12, ctr);
1183
0
#endif
1184
0
            else
1185
0
                ctx->Yi.d[3] = ctr;
1186
0
        }
1187
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1188
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1189
0
        n = (n + 1) % 16;
1190
0
        if (mres == sizeof(ctx->Xn)) {
1191
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1192
0
            mres = 0;
1193
0
        }
1194
#else
1195
        c = in[i];
1196
        out[i] = c ^ ctx->EKi.c[n];
1197
        ctx->Xi.c[n] ^= c;
1198
        mres = n = (n + 1) % 16;
1199
        if (n == 0)
1200
            GCM_MUL(ctx);
1201
#endif
1202
0
    }
1203
1204
391
    ctx->mres = mres;
1205
391
    return 0;
1206
163k
}
1207
1208
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1209
                                const unsigned char *in, unsigned char *out,
1210
                                size_t len, ctr128_f stream)
1211
1.55M
{
1212
#if defined(OPENSSL_SMALL_FOOTPRINT)
1213
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1214
#else
1215
1.55M
    DECLARE_IS_ENDIAN;
1216
1.55M
    unsigned int n, ctr, mres;
1217
1.55M
    size_t i;
1218
1.55M
    u64 mlen = ctx->len.u[1];
1219
1.55M
    void *key = ctx->key;
1220
1221
1.55M
    mlen += len;
1222
1.55M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1223
0
        return -1;
1224
1.55M
    ctx->len.u[1] = mlen;
1225
1226
1.55M
    mres = ctx->mres;
1227
1228
1.55M
    if (ctx->ares) {
1229
        /* First call to encrypt finalizes GHASH(AAD) */
1230
776k
#if defined(GHASH)
1231
776k
        if (len == 0) {
1232
0
            GCM_MUL(ctx);
1233
0
            ctx->ares = 0;
1234
0
            return 0;
1235
0
        }
1236
776k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1237
776k
        ctx->Xi.u[0] = 0;
1238
776k
        ctx->Xi.u[1] = 0;
1239
776k
        mres = sizeof(ctx->Xi);
1240
#else
1241
        GCM_MUL(ctx);
1242
#endif
1243
776k
        ctx->ares = 0;
1244
776k
    }
1245
1246
1.55M
    if (IS_LITTLE_ENDIAN)
1247
# ifdef BSWAP4
1248
        ctr = BSWAP4(ctx->Yi.d[3]);
1249
# else
1250
1.55M
        ctr = GETU32(ctx->Yi.c + 12);
1251
0
# endif
1252
0
    else
1253
0
        ctr = ctx->Yi.d[3];
1254
1255
1.55M
    n = mres % 16;
1256
1.55M
    if (n) {
1257
154k
# if defined(GHASH)
1258
930k
        while (n && len) {
1259
775k
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1260
775k
            --len;
1261
775k
            n = (n + 1) % 16;
1262
775k
        }
1263
154k
        if (n == 0) {
1264
72.4k
            GHASH(ctx, ctx->Xn, mres);
1265
72.4k
            mres = 0;
1266
82.5k
        } else {
1267
82.5k
            ctx->mres = mres;
1268
82.5k
            return 0;
1269
82.5k
        }
1270
# else
1271
        while (n && len) {
1272
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1273
            --len;
1274
            n = (n + 1) % 16;
1275
        }
1276
        if (n == 0) {
1277
            GCM_MUL(ctx);
1278
            mres = 0;
1279
        } else {
1280
            ctx->mres = n;
1281
            return 0;
1282
        }
1283
# endif
1284
154k
    }
1285
1.46M
# if defined(GHASH)
1286
1.46M
        if (len >= 16 && mres) {
1287
48.2k
            GHASH(ctx, ctx->Xn, mres);
1288
48.2k
            mres = 0;
1289
48.2k
        }
1290
1.46M
#  if defined(GHASH_CHUNK)
1291
1.46M
    while (len >= GHASH_CHUNK) {
1292
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1293
0
        ctr += GHASH_CHUNK / 16;
1294
0
        if (IS_LITTLE_ENDIAN)
1295
#   ifdef BSWAP4
1296
            ctx->Yi.d[3] = BSWAP4(ctr);
1297
#   else
1298
0
            PUTU32(ctx->Yi.c + 12, ctr);
1299
0
#   endif
1300
0
        else
1301
0
            ctx->Yi.d[3] = ctr;
1302
0
        GHASH(ctx, out, GHASH_CHUNK);
1303
0
        out += GHASH_CHUNK;
1304
0
        in += GHASH_CHUNK;
1305
0
        len -= GHASH_CHUNK;
1306
0
    }
1307
1.46M
#  endif
1308
1.46M
# endif
1309
1.46M
    if ((i = (len & (size_t)-16))) {
1310
221k
        size_t j = i / 16;
1311
1312
221k
        (*stream) (in, out, j, key, ctx->Yi.c);
1313
221k
        ctr += (unsigned int)j;
1314
221k
        if (IS_LITTLE_ENDIAN)
1315
# ifdef BSWAP4
1316
            ctx->Yi.d[3] = BSWAP4(ctr);
1317
# else
1318
221k
            PUTU32(ctx->Yi.c + 12, ctr);
1319
0
# endif
1320
0
        else
1321
0
            ctx->Yi.d[3] = ctr;
1322
221k
        in += i;
1323
221k
        len -= i;
1324
221k
# if defined(GHASH)
1325
221k
        GHASH(ctx, out, i);
1326
221k
        out += i;
1327
# else
1328
        while (j--) {
1329
            for (i = 0; i < 16; ++i)
1330
                ctx->Xi.c[i] ^= out[i];
1331
            GCM_MUL(ctx);
1332
            out += 16;
1333
        }
1334
# endif
1335
221k
    }
1336
1.46M
    if (len) {
1337
1.44M
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1338
1.44M
        ++ctr;
1339
1.44M
        if (IS_LITTLE_ENDIAN)
1340
# ifdef BSWAP4
1341
            ctx->Yi.d[3] = BSWAP4(ctr);
1342
# else
1343
1.44M
            PUTU32(ctx->Yi.c + 12, ctr);
1344
0
# endif
1345
0
        else
1346
0
            ctx->Yi.d[3] = ctr;
1347
5.73M
        while (len--) {
1348
4.28M
# if defined(GHASH)
1349
4.28M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1350
# else
1351
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1352
# endif
1353
4.28M
            ++n;
1354
4.28M
        }
1355
1.44M
    }
1356
1357
1.46M
    ctx->mres = mres;
1358
1.46M
    return 0;
1359
1.55M
#endif
1360
1.55M
}
1361
1362
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1363
                                const unsigned char *in, unsigned char *out,
1364
                                size_t len, ctr128_f stream)
1365
606k
{
1366
#if defined(OPENSSL_SMALL_FOOTPRINT)
1367
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1368
#else
1369
606k
    DECLARE_IS_ENDIAN;
1370
606k
    unsigned int n, ctr, mres;
1371
606k
    size_t i;
1372
606k
    u64 mlen = ctx->len.u[1];
1373
606k
    void *key = ctx->key;
1374
1375
606k
    mlen += len;
1376
606k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1377
0
        return -1;
1378
606k
    ctx->len.u[1] = mlen;
1379
1380
606k
    mres = ctx->mres;
1381
1382
606k
    if (ctx->ares) {
1383
        /* First call to decrypt finalizes GHASH(AAD) */
1384
361k
# if defined(GHASH)
1385
361k
        if (len == 0) {
1386
1.43k
            GCM_MUL(ctx);
1387
1.43k
            ctx->ares = 0;
1388
1.43k
            return 0;
1389
1.43k
        }
1390
360k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1391
360k
        ctx->Xi.u[0] = 0;
1392
360k
        ctx->Xi.u[1] = 0;
1393
360k
        mres = sizeof(ctx->Xi);
1394
# else
1395
        GCM_MUL(ctx);
1396
# endif
1397
360k
        ctx->ares = 0;
1398
360k
    }
1399
1400
604k
    if (IS_LITTLE_ENDIAN)
1401
# ifdef BSWAP4
1402
        ctr = BSWAP4(ctx->Yi.d[3]);
1403
# else
1404
604k
        ctr = GETU32(ctx->Yi.c + 12);
1405
0
# endif
1406
0
    else
1407
0
        ctr = ctx->Yi.d[3];
1408
1409
604k
    n = mres % 16;
1410
604k
    if (n) {
1411
0
# if defined(GHASH)
1412
0
        while (n && len) {
1413
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1414
0
            --len;
1415
0
            n = (n + 1) % 16;
1416
0
        }
1417
0
        if (n == 0) {
1418
0
            GHASH(ctx, ctx->Xn, mres);
1419
0
            mres = 0;
1420
0
        } else {
1421
0
            ctx->mres = mres;
1422
0
            return 0;
1423
0
        }
1424
# else
1425
        while (n && len) {
1426
            u8 c = *(in++);
1427
            *(out++) = c ^ ctx->EKi.c[n];
1428
            ctx->Xi.c[n] ^= c;
1429
            --len;
1430
            n = (n + 1) % 16;
1431
        }
1432
        if (n == 0) {
1433
            GCM_MUL(ctx);
1434
            mres = 0;
1435
        } else {
1436
            ctx->mres = n;
1437
            return 0;
1438
        }
1439
# endif
1440
0
    }
1441
604k
# if defined(GHASH)
1442
604k
    if (len >= 16 && mres) {
1443
0
        GHASH(ctx, ctx->Xn, mres);
1444
0
        mres = 0;
1445
0
    }
1446
604k
#  if defined(GHASH_CHUNK)
1447
604k
    while (len >= GHASH_CHUNK) {
1448
0
        GHASH(ctx, in, GHASH_CHUNK);
1449
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1450
0
        ctr += GHASH_CHUNK / 16;
1451
0
        if (IS_LITTLE_ENDIAN)
1452
#   ifdef BSWAP4
1453
            ctx->Yi.d[3] = BSWAP4(ctr);
1454
#   else
1455
0
            PUTU32(ctx->Yi.c + 12, ctr);
1456
0
#   endif
1457
0
        else
1458
0
            ctx->Yi.d[3] = ctr;
1459
0
        out += GHASH_CHUNK;
1460
0
        in += GHASH_CHUNK;
1461
0
        len -= GHASH_CHUNK;
1462
0
    }
1463
604k
#  endif
1464
604k
# endif
1465
604k
    if ((i = (len & (size_t)-16))) {
1466
219k
        size_t j = i / 16;
1467
1468
219k
# if defined(GHASH)
1469
219k
        GHASH(ctx, in, i);
1470
# else
1471
        while (j--) {
1472
            size_t k;
1473
            for (k = 0; k < 16; ++k)
1474
                ctx->Xi.c[k] ^= in[k];
1475
            GCM_MUL(ctx);
1476
            in += 16;
1477
        }
1478
        j = i / 16;
1479
        in -= i;
1480
# endif
1481
219k
        (*stream) (in, out, j, key, ctx->Yi.c);
1482
219k
        ctr += (unsigned int)j;
1483
219k
        if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
            ctx->Yi.d[3] = BSWAP4(ctr);
1486
# else
1487
219k
            PUTU32(ctx->Yi.c + 12, ctr);
1488
0
# endif
1489
0
        else
1490
0
            ctx->Yi.d[3] = ctr;
1491
219k
        out += i;
1492
219k
        in += i;
1493
219k
        len -= i;
1494
219k
    }
1495
604k
    if (len) {
1496
593k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1497
593k
        ++ctr;
1498
593k
        if (IS_LITTLE_ENDIAN)
1499
# ifdef BSWAP4
1500
            ctx->Yi.d[3] = BSWAP4(ctr);
1501
# else
1502
593k
            PUTU32(ctx->Yi.c + 12, ctr);
1503
0
# endif
1504
0
        else
1505
0
            ctx->Yi.d[3] = ctr;
1506
2.53M
        while (len--) {
1507
1.93M
# if defined(GHASH)
1508
1.93M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1509
# else
1510
            u8 c = in[n];
1511
            ctx->Xi.c[mres++] ^= c;
1512
            out[n] = c ^ ctx->EKi.c[n];
1513
# endif
1514
1.93M
            ++n;
1515
1.93M
        }
1516
593k
    }
1517
1518
604k
    ctx->mres = mres;
1519
604k
    return 0;
1520
604k
#endif
1521
604k
}
1522
1523
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1524
                         size_t len)
1525
1.50M
{
1526
1.50M
    DECLARE_IS_ENDIAN;
1527
1.50M
    u64 alen = ctx->len.u[0] << 3;
1528
1.50M
    u64 clen = ctx->len.u[1] << 3;
1529
1530
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1531
1.50M
    u128 bitlen;
1532
1.50M
    unsigned int mres = ctx->mres;
1533
1534
1.50M
    if (mres) {
1535
1.37M
        unsigned blocks = (mres + 15) & -16;
1536
1537
1.37M
        memset(ctx->Xn + mres, 0, blocks - mres);
1538
1.37M
        mres = blocks;
1539
1.37M
        if (mres == sizeof(ctx->Xn)) {
1540
0
            GHASH(ctx, ctx->Xn, mres);
1541
0
            mres = 0;
1542
0
        }
1543
1.37M
    } else if (ctx->ares) {
1544
93.6k
        GCM_MUL(ctx);
1545
93.6k
    }
1546
#else
1547
    if (ctx->mres || ctx->ares)
1548
        GCM_MUL(ctx);
1549
#endif
1550
1551
1.50M
    if (IS_LITTLE_ENDIAN) {
1552
#ifdef BSWAP8
1553
        alen = BSWAP8(alen);
1554
        clen = BSWAP8(clen);
1555
#else
1556
1.50M
        u8 *p = ctx->len.c;
1557
1558
1.50M
        ctx->len.u[0] = alen;
1559
1.50M
        ctx->len.u[1] = clen;
1560
1561
1.50M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1562
1.50M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1563
1.50M
#endif
1564
1.50M
    }
1565
1566
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1567
1.50M
    bitlen.hi = alen;
1568
1.50M
    bitlen.lo = clen;
1569
1.50M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1570
1.50M
    mres += sizeof(bitlen);
1571
1.50M
    GHASH(ctx, ctx->Xn, mres);
1572
#else
1573
    ctx->Xi.u[0] ^= alen;
1574
    ctx->Xi.u[1] ^= clen;
1575
    GCM_MUL(ctx);
1576
#endif
1577
1578
1.50M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1579
1.50M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1580
1581
1.50M
    if (tag && len <= sizeof(ctx->Xi))
1582
613k
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1583
887k
    else
1584
887k
        return -1;
1585
1.50M
}
1586
1587
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1588
887k
{
1589
887k
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1590
887k
    memcpy(tag, ctx->Xi.c,
1591
887k
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1592
887k
}
1593
1594
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1595
0
{
1596
0
    GCM128_CONTEXT *ret;
1597
1598
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1599
0
        CRYPTO_gcm128_init(ret, key, block);
1600
1601
0
    return ret;
1602
0
}
1603
1604
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1605
0
{
1606
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1607
0
}