Coverage Report

Created: 2025-12-10 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl/crypto/modes/gcm128.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
#undef GETU32
25
#define GETU32(p) BSWAP4(*(const u32 *)(p))
26
#undef PUTU32
27
#define PUTU32(p, v) *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
/* RISC-V uses C implementation as a fallback. */
31
#if defined(__riscv)
32
#define INCLUDE_C_GMULT_4BIT
33
#define INCLUDE_C_GHASH_4BIT
34
#endif
35
36
#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
37
#define REDUCE1BIT(V)                                           \
38
0
    do {                                                        \
39
0
        if (sizeof(size_t) == 8) {                              \
40
0
            u64 T = U64(0xe100000000000000) & (0 - (V.lo & 1)); \
41
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
42
0
            V.hi = (V.hi >> 1) ^ T;                             \
43
0
        } else {                                                \
44
0
            u32 T = 0xe1000000U & (0 - (u32)(V.lo & 1));        \
45
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
46
0
            V.hi = (V.hi >> 1) ^ ((u64)T << 32);                \
47
0
        }                                                       \
48
0
    } while (0)
49
50
/*-
51
 *
52
 * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
53
 *
54
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55
 * never be set to 8. 8 is effectively reserved for testing purposes.
56
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58
 * whole spectrum of possible table driven implementations. Why? In
59
 * non-"Shoup's" case memory access pattern is segmented in such manner,
60
 * that it's trivial to see that cache timing information can reveal
61
 * fair portion of intermediate hash value. Given that ciphertext is
62
 * always available to attacker, it's possible for him to attempt to
63
 * deduce secret parameter H and if successful, tamper with messages
64
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65
 * not as trivial, but there is no reason to believe that it's resistant
66
 * to cache-timing attack. And the thing about "8-bit" implementation is
67
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
68
 * key + 1KB shared. Well, on pros side it should be twice as fast as
69
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70
 * was observed to run ~75% faster, closer to 100% for commercial
71
 * compilers... Yet "4-bit" procedure is preferred, because it's
72
 * believed to provide better security-performance balance and adequate
73
 * all-round performance. "All-round" refers to things like:
74
 *
75
 * - shorter setup time effectively improves overall timing for
76
 *   handling short messages;
77
 * - larger table allocation can become unbearable because of VM
78
 *   subsystem penalties (for example on Windows large enough free
79
 *   results in VM working set trimming, meaning that consequent
80
 *   malloc would immediately incur working set expansion);
81
 * - larger table has larger cache footprint, which can affect
82
 *   performance of other code paths (not necessarily even from same
83
 *   thread in Hyper-Threading world);
84
 *
85
 * Value of 1 is not appropriate for performance reasons.
86
 */
87
88
static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
89
0
{
90
0
    u128 V;
91
#if defined(OPENSSL_SMALL_FOOTPRINT)
92
    int i;
93
#endif
94
95
0
    Htable[0].hi = 0;
96
0
    Htable[0].lo = 0;
97
0
    V.hi = H[0];
98
0
    V.lo = H[1];
99
100
#if defined(OPENSSL_SMALL_FOOTPRINT)
101
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
102
        REDUCE1BIT(V);
103
        Htable[i] = V;
104
    }
105
106
    for (i = 2; i < 16; i <<= 1) {
107
        u128 *Hi = Htable + i;
108
        int j;
109
        for (V = *Hi, j = 1; j < i; ++j) {
110
            Hi[j].hi = V.hi ^ Htable[j].hi;
111
            Hi[j].lo = V.lo ^ Htable[j].lo;
112
        }
113
    }
114
#else
115
0
    Htable[8] = V;
116
0
    REDUCE1BIT(V);
117
0
    Htable[4] = V;
118
0
    REDUCE1BIT(V);
119
0
    Htable[2] = V;
120
0
    REDUCE1BIT(V);
121
0
    Htable[1] = V;
122
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
123
0
    V = Htable[4];
124
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
125
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
126
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
127
0
    V = Htable[8];
128
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
129
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
130
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
131
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
132
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
133
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
134
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
135
0
#endif
136
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137
    /*
138
     * ARM assembler expects specific dword order in Htable.
139
     */
140
    {
141
        int j;
142
        DECLARE_IS_ENDIAN;
143
144
        if (IS_LITTLE_ENDIAN)
145
            for (j = 0; j < 16; ++j) {
146
                V = Htable[j];
147
                Htable[j].hi = V.lo;
148
                Htable[j].lo = V.hi;
149
            }
150
        else
151
            for (j = 0; j < 16; ++j) {
152
                V = Htable[j];
153
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
154
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
155
            }
156
    }
157
#endif
158
0
}
159
160
#if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
161
static const size_t rem_4bit[16] = {
162
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
163
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
164
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
165
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
166
};
167
168
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
169
0
{
170
0
    u128 Z;
171
0
    int cnt = 15;
172
0
    size_t rem, nlo, nhi;
173
0
    DECLARE_IS_ENDIAN;
174
175
0
    nlo = ((const u8 *)Xi)[15];
176
0
    nhi = nlo >> 4;
177
0
    nlo &= 0xf;
178
179
0
    Z.hi = Htable[nlo].hi;
180
0
    Z.lo = Htable[nlo].lo;
181
182
0
    while (1) {
183
0
        rem = (size_t)Z.lo & 0xf;
184
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
185
0
        Z.hi = (Z.hi >> 4);
186
0
        if (sizeof(size_t) == 8)
187
0
            Z.hi ^= rem_4bit[rem];
188
0
        else
189
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
190
191
0
        Z.hi ^= Htable[nhi].hi;
192
0
        Z.lo ^= Htable[nhi].lo;
193
194
0
        if (--cnt < 0)
195
0
            break;
196
197
0
        nlo = ((const u8 *)Xi)[cnt];
198
0
        nhi = nlo >> 4;
199
0
        nlo &= 0xf;
200
201
0
        rem = (size_t)Z.lo & 0xf;
202
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
203
0
        Z.hi = (Z.hi >> 4);
204
0
        if (sizeof(size_t) == 8)
205
0
            Z.hi ^= rem_4bit[rem];
206
0
        else
207
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
208
209
0
        Z.hi ^= Htable[nlo].hi;
210
0
        Z.lo ^= Htable[nlo].lo;
211
0
    }
212
213
0
    if (IS_LITTLE_ENDIAN) {
214
#ifdef BSWAP8
215
        Xi[0] = BSWAP8(Z.hi);
216
        Xi[1] = BSWAP8(Z.lo);
217
#else
218
0
        u8 *p = (u8 *)Xi;
219
0
        u32 v;
220
0
        v = (u32)(Z.hi >> 32);
221
0
        PUTU32(p, v);
222
0
        v = (u32)(Z.hi);
223
0
        PUTU32(p + 4, v);
224
0
        v = (u32)(Z.lo >> 32);
225
0
        PUTU32(p + 8, v);
226
0
        v = (u32)(Z.lo);
227
0
        PUTU32(p + 12, v);
228
0
#endif
229
0
    } else {
230
0
        Xi[0] = Z.hi;
231
0
        Xi[1] = Z.lo;
232
0
    }
233
0
}
234
235
#endif
236
237
#if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
238
#if !defined(OPENSSL_SMALL_FOOTPRINT)
239
/*
240
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
241
 * details... Compiler-generated code doesn't seem to give any
242
 * performance improvement, at least not on x86[_64]. It's here
243
 * mostly as reference and a placeholder for possible future
244
 * non-trivial optimization[s]...
245
 */
246
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
247
    const u8 *inp, size_t len)
248
0
{
249
0
    u128 Z;
250
0
    int cnt;
251
0
    size_t rem, nlo, nhi;
252
0
    DECLARE_IS_ENDIAN;
253
254
0
    do {
255
0
        cnt = 15;
256
0
        nlo = ((const u8 *)Xi)[15];
257
0
        nlo ^= inp[15];
258
0
        nhi = nlo >> 4;
259
0
        nlo &= 0xf;
260
261
0
        Z.hi = Htable[nlo].hi;
262
0
        Z.lo = Htable[nlo].lo;
263
264
0
        while (1) {
265
0
            rem = (size_t)Z.lo & 0xf;
266
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
267
0
            Z.hi = (Z.hi >> 4);
268
0
            if (sizeof(size_t) == 8)
269
0
                Z.hi ^= rem_4bit[rem];
270
0
            else
271
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
272
273
0
            Z.hi ^= Htable[nhi].hi;
274
0
            Z.lo ^= Htable[nhi].lo;
275
276
0
            if (--cnt < 0)
277
0
                break;
278
279
0
            nlo = ((const u8 *)Xi)[cnt];
280
0
            nlo ^= inp[cnt];
281
0
            nhi = nlo >> 4;
282
0
            nlo &= 0xf;
283
284
0
            rem = (size_t)Z.lo & 0xf;
285
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
286
0
            Z.hi = (Z.hi >> 4);
287
0
            if (sizeof(size_t) == 8)
288
0
                Z.hi ^= rem_4bit[rem];
289
0
            else
290
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
291
292
0
            Z.hi ^= Htable[nlo].hi;
293
0
            Z.lo ^= Htable[nlo].lo;
294
0
        }
295
296
0
        if (IS_LITTLE_ENDIAN) {
297
#ifdef BSWAP8
298
            Xi[0] = BSWAP8(Z.hi);
299
            Xi[1] = BSWAP8(Z.lo);
300
#else
301
0
            u8 *p = (u8 *)Xi;
302
0
            u32 v;
303
0
            v = (u32)(Z.hi >> 32);
304
0
            PUTU32(p, v);
305
0
            v = (u32)(Z.hi);
306
0
            PUTU32(p + 4, v);
307
0
            v = (u32)(Z.lo >> 32);
308
0
            PUTU32(p + 8, v);
309
0
            v = (u32)(Z.lo);
310
0
            PUTU32(p + 12, v);
311
0
#endif
312
0
        } else {
313
0
            Xi[0] = Z.hi;
314
0
            Xi[1] = Z.lo;
315
0
        }
316
317
0
        inp += 16;
318
        /* Block size is 128 bits so len is a multiple of 16 */
319
0
        len -= 16;
320
0
    } while (len > 0);
321
0
}
322
#endif
323
#else
324
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
325
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
326
    size_t len);
327
#endif
328
329
0
#define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u, ctx->Htable)
330
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
331
0
#define GHASH(ctx, in, len) ctx->funcs.ghash((ctx)->Xi.u, (ctx)->Htable, in, len)
332
/*
333
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
334
 * effect. In other words idea is to hash data while it's still in L1 cache
335
 * after encryption pass...
336
 */
337
0
#define GHASH_CHUNK (3 * 1024)
338
#endif
339
340
#if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
341
#if !defined(I386_ONLY) && (defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
342
#define GHASH_ASM_X86_OR_64
343
344
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
345
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
346
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
347
    size_t len);
348
349
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
350
#define gcm_init_avx gcm_init_clmul
351
#define gcm_gmult_avx gcm_gmult_clmul
352
#define gcm_ghash_avx gcm_ghash_clmul
353
#else
354
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
355
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
356
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
357
    size_t len);
358
#endif
359
360
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
361
#define GHASH_ASM_X86
362
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
363
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
364
    size_t len);
365
366
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
367
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
368
    size_t len);
369
#endif
370
#elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)
371
#include "arm_arch.h"
372
#if __ARM_MAX_ARCH__ >= 7
373
#define GHASH_ASM_ARM
374
#define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
375
#if defined(__arm__) || defined(__arm)
376
#define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
377
#endif
378
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
379
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
380
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
381
    size_t len);
382
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
383
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
384
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
385
    size_t len);
386
#endif
387
#elif defined(__sparc__) || defined(__sparc)
388
#include "crypto/sparc_arch.h"
389
#define GHASH_ASM_SPARC
390
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
391
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
392
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
393
    size_t len);
394
#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__POWERPC__) || defined(_ARCH_PPC))
395
#include "crypto/ppc_arch.h"
396
#define GHASH_ASM_PPC
397
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
398
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
399
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
400
    size_t len);
401
#elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
402
#include "crypto/riscv_arch.h"
403
#define GHASH_ASM_RV64I
404
/* Zbc/Zbkc (scalar crypto with clmul) based routines. */
405
void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);
406
void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);
407
void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);
408
void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
409
void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
410
void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
411
    const u8 *inp, size_t len);
412
void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
413
    const u8 *inp, size_t len);
414
/* zvkb/Zvbc (vector crypto with vclmul) based routines. */
415
void gcm_init_rv64i_zvkb_zvbc(u128 Htable[16], const u64 Xi[2]);
416
void gcm_gmult_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16]);
417
void gcm_ghash_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16],
418
    const u8 *inp, size_t len);
419
/* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */
420
void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);
421
void gcm_init_rv64i_zvkg_zvkb(u128 Htable[16], const u64 Xi[2]);
422
void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
423
void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
424
    const u8 *inp, size_t len);
425
#endif
426
#endif
427
428
static void gcm_get_funcs(struct gcm_funcs_st *ctx)
429
0
{
430
    /* set defaults -- overridden below as needed */
431
0
    ctx->ginit = gcm_init_4bit;
432
0
#if !defined(GHASH_ASM)
433
0
    ctx->gmult = gcm_gmult_4bit;
434
#else
435
    ctx->gmult = NULL;
436
#endif
437
0
#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
438
0
    ctx->ghash = gcm_ghash_4bit;
439
#else
440
    ctx->ghash = NULL;
441
#endif
442
443
#if defined(GHASH_ASM_X86_OR_64)
444
#if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
445
    /* x86_64 */
446
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
447
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
448
            ctx->ginit = gcm_init_avx;
449
            ctx->gmult = gcm_gmult_avx;
450
            ctx->ghash = gcm_ghash_avx;
451
        } else {
452
            ctx->ginit = gcm_init_clmul;
453
            ctx->gmult = gcm_gmult_clmul;
454
            ctx->ghash = gcm_ghash_clmul;
455
        }
456
        return;
457
    }
458
#endif
459
#if defined(GHASH_ASM_X86)
460
    /* x86 only */
461
#if defined(OPENSSL_IA32_SSE2)
462
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
463
        ctx->gmult = gcm_gmult_4bit_mmx;
464
        ctx->ghash = gcm_ghash_4bit_mmx;
465
        return;
466
    }
467
#else
468
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
469
        ctx->gmult = gcm_gmult_4bit_mmx;
470
        ctx->ghash = gcm_ghash_4bit_mmx;
471
        return;
472
    }
473
#endif
474
    ctx->gmult = gcm_gmult_4bit_x86;
475
    ctx->ghash = gcm_ghash_4bit_x86;
476
    return;
477
#else
478
    /* x86_64 fallback defaults */
479
    ctx->gmult = gcm_gmult_4bit;
480
    ctx->ghash = gcm_ghash_4bit;
481
    return;
482
#endif
483
#elif defined(GHASH_ASM_ARM)
484
    /* ARM defaults */
485
    ctx->gmult = gcm_gmult_4bit;
486
#if !defined(OPENSSL_SMALL_FOOTPRINT)
487
    ctx->ghash = gcm_ghash_4bit;
488
#else
489
    ctx->ghash = NULL;
490
#endif
491
#ifdef PMULL_CAPABLE
492
    if (PMULL_CAPABLE) {
493
        ctx->ginit = (gcm_init_fn)gcm_init_v8;
494
        ctx->gmult = gcm_gmult_v8;
495
        ctx->ghash = gcm_ghash_v8;
496
    }
497
#elif defined(NEON_CAPABLE)
498
    if (NEON_CAPABLE) {
499
        ctx->ginit = gcm_init_neon;
500
        ctx->gmult = gcm_gmult_neon;
501
        ctx->ghash = gcm_ghash_neon;
502
    }
503
#endif
504
    return;
505
#elif defined(GHASH_ASM_SPARC)
506
    /* SPARC defaults */
507
    ctx->gmult = gcm_gmult_4bit;
508
    ctx->ghash = gcm_ghash_4bit;
509
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
510
        ctx->ginit = gcm_init_vis3;
511
        ctx->gmult = gcm_gmult_vis3;
512
        ctx->ghash = gcm_ghash_vis3;
513
    }
514
    return;
515
#elif defined(GHASH_ASM_PPC)
516
    /* PowerPC does not define GHASH_ASM; defaults set above */
517
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
518
        ctx->ginit = gcm_init_p8;
519
        ctx->gmult = gcm_gmult_p8;
520
        ctx->ghash = gcm_ghash_p8;
521
    }
522
    return;
523
#elif defined(GHASH_ASM_RV64I)
524
    /* RISCV defaults */
525
    ctx->gmult = gcm_gmult_4bit;
526
    ctx->ghash = gcm_ghash_4bit;
527
528
    if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) {
529
        if (RISCV_HAS_ZVKB())
530
            ctx->ginit = gcm_init_rv64i_zvkg_zvkb;
531
        else
532
            ctx->ginit = gcm_init_rv64i_zvkg;
533
        ctx->gmult = gcm_gmult_rv64i_zvkg;
534
        ctx->ghash = gcm_ghash_rv64i_zvkg;
535
    } else if (RISCV_HAS_ZVKB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
536
        ctx->ginit = gcm_init_rv64i_zvkb_zvbc;
537
        ctx->gmult = gcm_gmult_rv64i_zvkb_zvbc;
538
        ctx->ghash = gcm_ghash_rv64i_zvkb_zvbc;
539
    } else if (RISCV_HAS_ZBC()) {
540
        if (RISCV_HAS_ZBKB()) {
541
            ctx->ginit = gcm_init_rv64i_zbc__zbkb;
542
            ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;
543
            ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;
544
        } else if (RISCV_HAS_ZBB()) {
545
            ctx->ginit = gcm_init_rv64i_zbc__zbb;
546
            ctx->gmult = gcm_gmult_rv64i_zbc;
547
            ctx->ghash = gcm_ghash_rv64i_zbc;
548
        } else {
549
            ctx->ginit = gcm_init_rv64i_zbc;
550
            ctx->gmult = gcm_gmult_rv64i_zbc;
551
            ctx->ghash = gcm_ghash_rv64i_zbc;
552
        }
553
    }
554
    return;
555
#elif defined(GHASH_ASM)
556
    /* all other architectures use the generic names */
557
    ctx->gmult = gcm_gmult_4bit;
558
    ctx->ghash = gcm_ghash_4bit;
559
    return;
560
#endif
561
0
}
562
563
void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
564
0
{
565
0
    struct gcm_funcs_st funcs;
566
567
0
    gcm_get_funcs(&funcs);
568
0
    funcs.ginit(Htable, H);
569
0
}
570
571
void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
572
0
{
573
0
    struct gcm_funcs_st funcs;
574
575
0
    gcm_get_funcs(&funcs);
576
0
    funcs.gmult(Xi, Htable);
577
0
}
578
579
void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
580
    const u8 *inp, size_t len)
581
0
{
582
0
    struct gcm_funcs_st funcs;
583
0
    u64 tmp[2];
584
0
    size_t i;
585
586
0
    gcm_get_funcs(&funcs);
587
0
    if (funcs.ghash != NULL) {
588
0
        funcs.ghash(Xi, Htable, inp, len);
589
0
    } else {
590
        /* Emulate ghash if needed */
591
0
        for (i = 0; i < len; i += 16) {
592
0
            memcpy(tmp, &inp[i], sizeof(tmp));
593
0
            Xi[0] ^= tmp[0];
594
0
            Xi[1] ^= tmp[1];
595
0
            funcs.gmult(Xi, Htable);
596
0
        }
597
0
    }
598
0
}
599
600
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
601
0
{
602
0
    DECLARE_IS_ENDIAN;
603
604
0
    memset(ctx, 0, sizeof(*ctx));
605
0
    ctx->block = block;
606
0
    ctx->key = key;
607
608
0
    (*block)(ctx->H.c, ctx->H.c, key);
609
610
0
    if (IS_LITTLE_ENDIAN) {
611
        /* H is stored in host byte order */
612
#ifdef BSWAP8
613
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
614
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
615
#else
616
0
        u8 *p = ctx->H.c;
617
0
        u64 hi, lo;
618
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
619
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
620
0
        ctx->H.u[0] = hi;
621
0
        ctx->H.u[1] = lo;
622
0
#endif
623
0
    }
624
625
0
    gcm_get_funcs(&ctx->funcs);
626
0
    ctx->funcs.ginit(ctx->Htable, ctx->H.u);
627
0
}
628
629
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
630
    size_t len)
631
0
{
632
0
    DECLARE_IS_ENDIAN;
633
0
    unsigned int ctr;
634
635
0
    ctx->len.u[0] = 0; /* AAD length */
636
0
    ctx->len.u[1] = 0; /* message length */
637
0
    ctx->ares = 0;
638
0
    ctx->mres = 0;
639
640
0
    if (len == 12) {
641
0
        memcpy(ctx->Yi.c, iv, 12);
642
0
        ctx->Yi.c[12] = 0;
643
0
        ctx->Yi.c[13] = 0;
644
0
        ctx->Yi.c[14] = 0;
645
0
        ctx->Yi.c[15] = 1;
646
0
        ctr = 1;
647
0
    } else {
648
0
        size_t i;
649
0
        u64 len0 = len;
650
651
        /* Borrow ctx->Xi to calculate initial Yi */
652
0
        ctx->Xi.u[0] = 0;
653
0
        ctx->Xi.u[1] = 0;
654
655
0
        while (len >= 16) {
656
0
            for (i = 0; i < 16; ++i)
657
0
                ctx->Xi.c[i] ^= iv[i];
658
0
            GCM_MUL(ctx);
659
0
            iv += 16;
660
0
            len -= 16;
661
0
        }
662
0
        if (len) {
663
0
            for (i = 0; i < len; ++i)
664
0
                ctx->Xi.c[i] ^= iv[i];
665
0
            GCM_MUL(ctx);
666
0
        }
667
0
        len0 <<= 3;
668
0
        if (IS_LITTLE_ENDIAN) {
669
#ifdef BSWAP8
670
            ctx->Xi.u[1] ^= BSWAP8(len0);
671
#else
672
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
673
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
674
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
675
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
676
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
677
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
678
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
679
0
            ctx->Xi.c[15] ^= (u8)(len0);
680
0
#endif
681
0
        } else {
682
0
            ctx->Xi.u[1] ^= len0;
683
0
        }
684
685
0
        GCM_MUL(ctx);
686
687
0
        if (IS_LITTLE_ENDIAN)
688
#ifdef BSWAP4
689
            ctr = BSWAP4(ctx->Xi.d[3]);
690
#else
691
0
            ctr = GETU32(ctx->Xi.c + 12);
692
0
#endif
693
0
        else
694
0
            ctr = ctx->Xi.d[3];
695
696
        /* Copy borrowed Xi to Yi */
697
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
698
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
699
0
    }
700
701
0
    ctx->Xi.u[0] = 0;
702
0
    ctx->Xi.u[1] = 0;
703
704
0
    (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
705
0
    ++ctr;
706
0
    if (IS_LITTLE_ENDIAN)
707
#ifdef BSWAP4
708
        ctx->Yi.d[3] = BSWAP4(ctr);
709
#else
710
0
        PUTU32(ctx->Yi.c + 12, ctr);
711
0
#endif
712
0
    else
713
0
        ctx->Yi.d[3] = ctr;
714
0
}
715
716
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
717
    size_t len)
718
0
{
719
0
    size_t i;
720
0
    unsigned int n;
721
0
    u64 alen = ctx->len.u[0];
722
723
0
    if (ctx->len.u[1])
724
0
        return -2;
725
726
0
    alen += len;
727
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
728
0
        return -1;
729
0
    ctx->len.u[0] = alen;
730
731
0
    n = ctx->ares;
732
0
    if (n) {
733
0
        while (n && len) {
734
0
            ctx->Xi.c[n] ^= *(aad++);
735
0
            --len;
736
0
            n = (n + 1) % 16;
737
0
        }
738
0
        if (n == 0)
739
0
            GCM_MUL(ctx);
740
0
        else {
741
0
            ctx->ares = n;
742
0
            return 0;
743
0
        }
744
0
    }
745
0
#ifdef GHASH
746
0
    if ((i = (len & (size_t)-16))) {
747
0
        GHASH(ctx, aad, i);
748
0
        aad += i;
749
0
        len -= i;
750
0
    }
751
#else
752
    while (len >= 16) {
753
        for (i = 0; i < 16; ++i)
754
            ctx->Xi.c[i] ^= aad[i];
755
        GCM_MUL(ctx);
756
        aad += 16;
757
        len -= 16;
758
    }
759
#endif
760
0
    if (len) {
761
0
        n = (unsigned int)len;
762
0
        for (i = 0; i < len; ++i)
763
0
            ctx->Xi.c[i] ^= aad[i];
764
0
    }
765
766
0
    ctx->ares = n;
767
0
    return 0;
768
0
}
769
770
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
771
    const unsigned char *in, unsigned char *out,
772
    size_t len)
773
0
{
774
0
    DECLARE_IS_ENDIAN;
775
0
    unsigned int n, ctr, mres;
776
0
    size_t i;
777
0
    u64 mlen = ctx->len.u[1];
778
0
    block128_f block = ctx->block;
779
0
    void *key = ctx->key;
780
781
0
    mlen += len;
782
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
783
0
        return -1;
784
0
    ctx->len.u[1] = mlen;
785
786
0
    mres = ctx->mres;
787
788
0
    if (ctx->ares) {
789
        /* First call to encrypt finalizes GHASH(AAD) */
790
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
791
0
        if (len == 0) {
792
0
            GCM_MUL(ctx);
793
0
            ctx->ares = 0;
794
0
            return 0;
795
0
        }
796
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
797
0
        ctx->Xi.u[0] = 0;
798
0
        ctx->Xi.u[1] = 0;
799
0
        mres = sizeof(ctx->Xi);
800
#else
801
        GCM_MUL(ctx);
802
#endif
803
0
        ctx->ares = 0;
804
0
    }
805
806
0
    if (IS_LITTLE_ENDIAN)
807
#ifdef BSWAP4
808
        ctr = BSWAP4(ctx->Yi.d[3]);
809
#else
810
0
        ctr = GETU32(ctx->Yi.c + 12);
811
0
#endif
812
0
    else
813
0
        ctr = ctx->Yi.d[3];
814
815
0
    n = mres % 16;
816
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
817
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
818
0
        do {
819
0
            if (n) {
820
0
#if defined(GHASH)
821
0
                while (n && len) {
822
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
823
0
                    --len;
824
0
                    n = (n + 1) % 16;
825
0
                }
826
0
                if (n == 0) {
827
0
                    GHASH(ctx, ctx->Xn, mres);
828
0
                    mres = 0;
829
0
                } else {
830
0
                    ctx->mres = mres;
831
0
                    return 0;
832
0
                }
833
#else
834
                while (n && len) {
835
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
836
                    --len;
837
                    n = (n + 1) % 16;
838
                }
839
                if (n == 0) {
840
                    GCM_MUL(ctx);
841
                    mres = 0;
842
                } else {
843
                    ctx->mres = n;
844
                    return 0;
845
                }
846
#endif
847
0
            }
848
#if defined(STRICT_ALIGNMENT)
849
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
850
                break;
851
#endif
852
0
#if defined(GHASH)
853
0
            if (len >= 16 && mres) {
854
0
                GHASH(ctx, ctx->Xn, mres);
855
0
                mres = 0;
856
0
            }
857
0
#if defined(GHASH_CHUNK)
858
0
            while (len >= GHASH_CHUNK) {
859
0
                size_t j = GHASH_CHUNK;
860
861
0
                while (j) {
862
0
                    size_t_aX *out_t = (size_t_aX *)out;
863
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
864
865
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
866
0
                    ++ctr;
867
0
                    if (IS_LITTLE_ENDIAN)
868
#ifdef BSWAP4
869
                        ctx->Yi.d[3] = BSWAP4(ctr);
870
#else
871
0
                        PUTU32(ctx->Yi.c + 12, ctr);
872
0
#endif
873
0
                    else
874
0
                        ctx->Yi.d[3] = ctr;
875
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
876
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
877
0
                    out += 16;
878
0
                    in += 16;
879
0
                    j -= 16;
880
0
                }
881
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
882
0
                len -= GHASH_CHUNK;
883
0
            }
884
0
#endif
885
0
            if ((i = (len & (size_t)-16))) {
886
0
                size_t j = i;
887
888
0
                while (len >= 16) {
889
0
                    size_t_aX *out_t = (size_t_aX *)out;
890
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
891
892
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
893
0
                    ++ctr;
894
0
                    if (IS_LITTLE_ENDIAN)
895
#ifdef BSWAP4
896
                        ctx->Yi.d[3] = BSWAP4(ctr);
897
#else
898
0
                        PUTU32(ctx->Yi.c + 12, ctr);
899
0
#endif
900
0
                    else
901
0
                        ctx->Yi.d[3] = ctr;
902
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
903
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
904
0
                    out += 16;
905
0
                    in += 16;
906
0
                    len -= 16;
907
0
                }
908
0
                GHASH(ctx, out - j, j);
909
0
            }
910
#else
911
            while (len >= 16) {
912
                size_t *out_t = (size_t *)out;
913
                const size_t *in_t = (const size_t *)in;
914
915
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
916
                ++ctr;
917
                if (IS_LITTLE_ENDIAN)
918
#ifdef BSWAP4
919
                    ctx->Yi.d[3] = BSWAP4(ctr);
920
#else
921
                    PUTU32(ctx->Yi.c + 12, ctr);
922
#endif
923
                else
924
                    ctx->Yi.d[3] = ctr;
925
                for (i = 0; i < 16 / sizeof(size_t); ++i)
926
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
927
                GCM_MUL(ctx);
928
                out += 16;
929
                in += 16;
930
                len -= 16;
931
            }
932
#endif
933
0
            if (len) {
934
0
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
935
0
                ++ctr;
936
0
                if (IS_LITTLE_ENDIAN)
937
#ifdef BSWAP4
938
                    ctx->Yi.d[3] = BSWAP4(ctr);
939
#else
940
0
                    PUTU32(ctx->Yi.c + 12, ctr);
941
0
#endif
942
0
                else
943
0
                    ctx->Yi.d[3] = ctr;
944
0
#if defined(GHASH)
945
0
                while (len--) {
946
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
947
0
                    ++n;
948
0
                }
949
#else
950
                while (len--) {
951
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
952
                    ++n;
953
                }
954
                mres = n;
955
#endif
956
0
            }
957
958
0
            ctx->mres = mres;
959
0
            return 0;
960
0
        } while (0);
961
0
    }
962
0
#endif
963
0
    for (i = 0; i < len; ++i) {
964
0
        if (n == 0) {
965
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
966
0
            ++ctr;
967
0
            if (IS_LITTLE_ENDIAN)
968
#ifdef BSWAP4
969
                ctx->Yi.d[3] = BSWAP4(ctr);
970
#else
971
0
                PUTU32(ctx->Yi.c + 12, ctr);
972
0
#endif
973
0
            else
974
0
                ctx->Yi.d[3] = ctr;
975
0
        }
976
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
977
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
978
0
        n = (n + 1) % 16;
979
0
        if (mres == sizeof(ctx->Xn)) {
980
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
981
0
            mres = 0;
982
0
        }
983
#else
984
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
985
        mres = n = (n + 1) % 16;
986
        if (n == 0)
987
            GCM_MUL(ctx);
988
#endif
989
0
    }
990
991
0
    ctx->mres = mres;
992
0
    return 0;
993
0
}
994
995
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
996
    const unsigned char *in, unsigned char *out,
997
    size_t len)
998
0
{
999
0
    DECLARE_IS_ENDIAN;
1000
0
    unsigned int n, ctr, mres;
1001
0
    size_t i;
1002
0
    u64 mlen = ctx->len.u[1];
1003
0
    block128_f block = ctx->block;
1004
0
    void *key = ctx->key;
1005
1006
0
    mlen += len;
1007
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1008
0
        return -1;
1009
0
    ctx->len.u[1] = mlen;
1010
1011
0
    mres = ctx->mres;
1012
1013
0
    if (ctx->ares) {
1014
        /* First call to decrypt finalizes GHASH(AAD) */
1015
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1016
0
        if (len == 0) {
1017
0
            GCM_MUL(ctx);
1018
0
            ctx->ares = 0;
1019
0
            return 0;
1020
0
        }
1021
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1022
0
        ctx->Xi.u[0] = 0;
1023
0
        ctx->Xi.u[1] = 0;
1024
0
        mres = sizeof(ctx->Xi);
1025
#else
1026
        GCM_MUL(ctx);
1027
#endif
1028
0
        ctx->ares = 0;
1029
0
    }
1030
1031
0
    if (IS_LITTLE_ENDIAN)
1032
#ifdef BSWAP4
1033
        ctr = BSWAP4(ctx->Yi.d[3]);
1034
#else
1035
0
        ctr = GETU32(ctx->Yi.c + 12);
1036
0
#endif
1037
0
    else
1038
0
        ctr = ctx->Yi.d[3];
1039
1040
0
    n = mres % 16;
1041
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1042
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1043
0
        do {
1044
0
            if (n) {
1045
0
#if defined(GHASH)
1046
0
                while (n && len) {
1047
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1048
0
                    --len;
1049
0
                    n = (n + 1) % 16;
1050
0
                }
1051
0
                if (n == 0) {
1052
0
                    GHASH(ctx, ctx->Xn, mres);
1053
0
                    mres = 0;
1054
0
                } else {
1055
0
                    ctx->mres = mres;
1056
0
                    return 0;
1057
0
                }
1058
#else
1059
                while (n && len) {
1060
                    u8 c = *(in++);
1061
                    *(out++) = c ^ ctx->EKi.c[n];
1062
                    ctx->Xi.c[n] ^= c;
1063
                    --len;
1064
                    n = (n + 1) % 16;
1065
                }
1066
                if (n == 0) {
1067
                    GCM_MUL(ctx);
1068
                    mres = 0;
1069
                } else {
1070
                    ctx->mres = n;
1071
                    return 0;
1072
                }
1073
#endif
1074
0
            }
1075
#if defined(STRICT_ALIGNMENT)
1076
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1077
                break;
1078
#endif
1079
0
#if defined(GHASH)
1080
0
            if (len >= 16 && mres) {
1081
0
                GHASH(ctx, ctx->Xn, mres);
1082
0
                mres = 0;
1083
0
            }
1084
0
#if defined(GHASH_CHUNK)
1085
0
            while (len >= GHASH_CHUNK) {
1086
0
                size_t j = GHASH_CHUNK;
1087
1088
0
                GHASH(ctx, in, GHASH_CHUNK);
1089
0
                while (j) {
1090
0
                    size_t_aX *out_t = (size_t_aX *)out;
1091
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1092
1093
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1094
0
                    ++ctr;
1095
0
                    if (IS_LITTLE_ENDIAN)
1096
#ifdef BSWAP4
1097
                        ctx->Yi.d[3] = BSWAP4(ctr);
1098
#else
1099
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1100
0
#endif
1101
0
                    else
1102
0
                        ctx->Yi.d[3] = ctr;
1103
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1104
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1105
0
                    out += 16;
1106
0
                    in += 16;
1107
0
                    j -= 16;
1108
0
                }
1109
0
                len -= GHASH_CHUNK;
1110
0
            }
1111
0
#endif
1112
0
            if ((i = (len & (size_t)-16))) {
1113
0
                GHASH(ctx, in, i);
1114
0
                while (len >= 16) {
1115
0
                    size_t_aX *out_t = (size_t_aX *)out;
1116
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1117
1118
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1119
0
                    ++ctr;
1120
0
                    if (IS_LITTLE_ENDIAN)
1121
#ifdef BSWAP4
1122
                        ctx->Yi.d[3] = BSWAP4(ctr);
1123
#else
1124
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1125
0
#endif
1126
0
                    else
1127
0
                        ctx->Yi.d[3] = ctr;
1128
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
0
                    out += 16;
1131
0
                    in += 16;
1132
0
                    len -= 16;
1133
0
                }
1134
0
            }
1135
#else
1136
            while (len >= 16) {
1137
                size_t *out_t = (size_t *)out;
1138
                const size_t *in_t = (const size_t *)in;
1139
1140
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1141
                ++ctr;
1142
                if (IS_LITTLE_ENDIAN)
1143
#ifdef BSWAP4
1144
                    ctx->Yi.d[3] = BSWAP4(ctr);
1145
#else
1146
                    PUTU32(ctx->Yi.c + 12, ctr);
1147
#endif
1148
                else
1149
                    ctx->Yi.d[3] = ctr;
1150
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1151
                    size_t c = in_t[i];
1152
                    out_t[i] = c ^ ctx->EKi.t[i];
1153
                    ctx->Xi.t[i] ^= c;
1154
                }
1155
                GCM_MUL(ctx);
1156
                out += 16;
1157
                in += 16;
1158
                len -= 16;
1159
            }
1160
#endif
1161
0
            if (len) {
1162
0
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1163
0
                ++ctr;
1164
0
                if (IS_LITTLE_ENDIAN)
1165
#ifdef BSWAP4
1166
                    ctx->Yi.d[3] = BSWAP4(ctr);
1167
#else
1168
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1169
0
#endif
1170
0
                else
1171
0
                    ctx->Yi.d[3] = ctr;
1172
0
#if defined(GHASH)
1173
0
                while (len--) {
1174
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1175
0
                    ++n;
1176
0
                }
1177
#else
1178
                while (len--) {
1179
                    u8 c = in[n];
1180
                    ctx->Xi.c[n] ^= c;
1181
                    out[n] = c ^ ctx->EKi.c[n];
1182
                    ++n;
1183
                }
1184
                mres = n;
1185
#endif
1186
0
            }
1187
1188
0
            ctx->mres = mres;
1189
0
            return 0;
1190
0
        } while (0);
1191
0
    }
1192
0
#endif
1193
0
    for (i = 0; i < len; ++i) {
1194
0
        u8 c;
1195
0
        if (n == 0) {
1196
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
1197
0
            ++ctr;
1198
0
            if (IS_LITTLE_ENDIAN)
1199
#ifdef BSWAP4
1200
                ctx->Yi.d[3] = BSWAP4(ctr);
1201
#else
1202
0
                PUTU32(ctx->Yi.c + 12, ctr);
1203
0
#endif
1204
0
            else
1205
0
                ctx->Yi.d[3] = ctr;
1206
0
        }
1207
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1208
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1209
0
        n = (n + 1) % 16;
1210
0
        if (mres == sizeof(ctx->Xn)) {
1211
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
1212
0
            mres = 0;
1213
0
        }
1214
#else
1215
        c = in[i];
1216
        out[i] = c ^ ctx->EKi.c[n];
1217
        ctx->Xi.c[n] ^= c;
1218
        mres = n = (n + 1) % 16;
1219
        if (n == 0)
1220
            GCM_MUL(ctx);
1221
#endif
1222
0
    }
1223
1224
0
    ctx->mres = mres;
1225
0
    return 0;
1226
0
}
1227
1228
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1229
    const unsigned char *in, unsigned char *out,
1230
    size_t len, ctr128_f stream)
1231
0
{
1232
#if defined(OPENSSL_SMALL_FOOTPRINT)
1233
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1234
#else
1235
0
    DECLARE_IS_ENDIAN;
1236
0
    unsigned int n, ctr, mres;
1237
0
    size_t i;
1238
0
    u64 mlen = ctx->len.u[1];
1239
0
    void *key = ctx->key;
1240
1241
0
    mlen += len;
1242
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1243
0
        return -1;
1244
0
    ctx->len.u[1] = mlen;
1245
1246
0
    mres = ctx->mres;
1247
1248
0
    if (ctx->ares) {
1249
        /* First call to encrypt finalizes GHASH(AAD) */
1250
0
#if defined(GHASH)
1251
0
        if (len == 0) {
1252
0
            GCM_MUL(ctx);
1253
0
            ctx->ares = 0;
1254
0
            return 0;
1255
0
        }
1256
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1257
0
        ctx->Xi.u[0] = 0;
1258
0
        ctx->Xi.u[1] = 0;
1259
0
        mres = sizeof(ctx->Xi);
1260
#else
1261
        GCM_MUL(ctx);
1262
#endif
1263
0
        ctx->ares = 0;
1264
0
    }
1265
1266
0
    if (IS_LITTLE_ENDIAN)
1267
#ifdef BSWAP4
1268
        ctr = BSWAP4(ctx->Yi.d[3]);
1269
#else
1270
0
        ctr = GETU32(ctx->Yi.c + 12);
1271
0
#endif
1272
0
    else
1273
0
        ctr = ctx->Yi.d[3];
1274
1275
0
    n = mres % 16;
1276
0
    if (n) {
1277
0
#if defined(GHASH)
1278
0
        while (n && len) {
1279
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1280
0
            --len;
1281
0
            n = (n + 1) % 16;
1282
0
        }
1283
0
        if (n == 0) {
1284
0
            GHASH(ctx, ctx->Xn, mres);
1285
0
            mres = 0;
1286
0
        } else {
1287
0
            ctx->mres = mres;
1288
0
            return 0;
1289
0
        }
1290
#else
1291
        while (n && len) {
1292
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1293
            --len;
1294
            n = (n + 1) % 16;
1295
        }
1296
        if (n == 0) {
1297
            GCM_MUL(ctx);
1298
            mres = 0;
1299
        } else {
1300
            ctx->mres = n;
1301
            return 0;
1302
        }
1303
#endif
1304
0
    }
1305
0
#if defined(GHASH)
1306
0
    if (len >= 16 && mres) {
1307
0
        GHASH(ctx, ctx->Xn, mres);
1308
0
        mres = 0;
1309
0
    }
1310
0
#if defined(GHASH_CHUNK)
1311
0
    while (len >= GHASH_CHUNK) {
1312
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1313
0
        ctr += GHASH_CHUNK / 16;
1314
0
        if (IS_LITTLE_ENDIAN)
1315
#ifdef BSWAP4
1316
            ctx->Yi.d[3] = BSWAP4(ctr);
1317
#else
1318
0
            PUTU32(ctx->Yi.c + 12, ctr);
1319
0
#endif
1320
0
        else
1321
0
            ctx->Yi.d[3] = ctr;
1322
0
        GHASH(ctx, out, GHASH_CHUNK);
1323
0
        out += GHASH_CHUNK;
1324
0
        in += GHASH_CHUNK;
1325
0
        len -= GHASH_CHUNK;
1326
0
    }
1327
0
#endif
1328
0
#endif
1329
0
    if ((i = (len & (size_t)-16))) {
1330
0
        size_t j = i / 16;
1331
1332
0
        (*stream)(in, out, j, key, ctx->Yi.c);
1333
0
        ctr += (unsigned int)j;
1334
0
        if (IS_LITTLE_ENDIAN)
1335
#ifdef BSWAP4
1336
            ctx->Yi.d[3] = BSWAP4(ctr);
1337
#else
1338
0
            PUTU32(ctx->Yi.c + 12, ctr);
1339
0
#endif
1340
0
        else
1341
0
            ctx->Yi.d[3] = ctr;
1342
0
        in += i;
1343
0
        len -= i;
1344
0
#if defined(GHASH)
1345
0
        GHASH(ctx, out, i);
1346
0
        out += i;
1347
#else
1348
        while (j--) {
1349
            for (i = 0; i < 16; ++i)
1350
                ctx->Xi.c[i] ^= out[i];
1351
            GCM_MUL(ctx);
1352
            out += 16;
1353
        }
1354
#endif
1355
0
    }
1356
0
    if (len) {
1357
0
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1358
0
        ++ctr;
1359
0
        if (IS_LITTLE_ENDIAN)
1360
#ifdef BSWAP4
1361
            ctx->Yi.d[3] = BSWAP4(ctr);
1362
#else
1363
0
            PUTU32(ctx->Yi.c + 12, ctr);
1364
0
#endif
1365
0
        else
1366
0
            ctx->Yi.d[3] = ctr;
1367
0
        while (len--) {
1368
0
#if defined(GHASH)
1369
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1370
#else
1371
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1372
#endif
1373
0
            ++n;
1374
0
        }
1375
0
    }
1376
1377
0
    ctx->mres = mres;
1378
0
    return 0;
1379
0
#endif
1380
0
}
1381
1382
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1383
    const unsigned char *in, unsigned char *out,
1384
    size_t len, ctr128_f stream)
1385
0
{
1386
#if defined(OPENSSL_SMALL_FOOTPRINT)
1387
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1388
#else
1389
0
    DECLARE_IS_ENDIAN;
1390
0
    unsigned int n, ctr, mres;
1391
0
    size_t i;
1392
0
    u64 mlen = ctx->len.u[1];
1393
0
    void *key = ctx->key;
1394
1395
0
    mlen += len;
1396
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1397
0
        return -1;
1398
0
    ctx->len.u[1] = mlen;
1399
1400
0
    mres = ctx->mres;
1401
1402
0
    if (ctx->ares) {
1403
        /* First call to decrypt finalizes GHASH(AAD) */
1404
0
#if defined(GHASH)
1405
0
        if (len == 0) {
1406
0
            GCM_MUL(ctx);
1407
0
            ctx->ares = 0;
1408
0
            return 0;
1409
0
        }
1410
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1411
0
        ctx->Xi.u[0] = 0;
1412
0
        ctx->Xi.u[1] = 0;
1413
0
        mres = sizeof(ctx->Xi);
1414
#else
1415
        GCM_MUL(ctx);
1416
#endif
1417
0
        ctx->ares = 0;
1418
0
    }
1419
1420
0
    if (IS_LITTLE_ENDIAN)
1421
#ifdef BSWAP4
1422
        ctr = BSWAP4(ctx->Yi.d[3]);
1423
#else
1424
0
        ctr = GETU32(ctx->Yi.c + 12);
1425
0
#endif
1426
0
    else
1427
0
        ctr = ctx->Yi.d[3];
1428
1429
0
    n = mres % 16;
1430
0
    if (n) {
1431
0
#if defined(GHASH)
1432
0
        while (n && len) {
1433
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1434
0
            --len;
1435
0
            n = (n + 1) % 16;
1436
0
        }
1437
0
        if (n == 0) {
1438
0
            GHASH(ctx, ctx->Xn, mres);
1439
0
            mres = 0;
1440
0
        } else {
1441
0
            ctx->mres = mres;
1442
0
            return 0;
1443
0
        }
1444
#else
1445
        while (n && len) {
1446
            u8 c = *(in++);
1447
            *(out++) = c ^ ctx->EKi.c[n];
1448
            ctx->Xi.c[n] ^= c;
1449
            --len;
1450
            n = (n + 1) % 16;
1451
        }
1452
        if (n == 0) {
1453
            GCM_MUL(ctx);
1454
            mres = 0;
1455
        } else {
1456
            ctx->mres = n;
1457
            return 0;
1458
        }
1459
#endif
1460
0
    }
1461
0
#if defined(GHASH)
1462
0
    if (len >= 16 && mres) {
1463
0
        GHASH(ctx, ctx->Xn, mres);
1464
0
        mres = 0;
1465
0
    }
1466
0
#if defined(GHASH_CHUNK)
1467
0
    while (len >= GHASH_CHUNK) {
1468
0
        GHASH(ctx, in, GHASH_CHUNK);
1469
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1470
0
        ctr += GHASH_CHUNK / 16;
1471
0
        if (IS_LITTLE_ENDIAN)
1472
#ifdef BSWAP4
1473
            ctx->Yi.d[3] = BSWAP4(ctr);
1474
#else
1475
0
            PUTU32(ctx->Yi.c + 12, ctr);
1476
0
#endif
1477
0
        else
1478
0
            ctx->Yi.d[3] = ctr;
1479
0
        out += GHASH_CHUNK;
1480
0
        in += GHASH_CHUNK;
1481
0
        len -= GHASH_CHUNK;
1482
0
    }
1483
0
#endif
1484
0
#endif
1485
0
    if ((i = (len & (size_t)-16))) {
1486
0
        size_t j = i / 16;
1487
1488
0
#if defined(GHASH)
1489
0
        GHASH(ctx, in, i);
1490
#else
1491
        while (j--) {
1492
            size_t k;
1493
            for (k = 0; k < 16; ++k)
1494
                ctx->Xi.c[k] ^= in[k];
1495
            GCM_MUL(ctx);
1496
            in += 16;
1497
        }
1498
        j = i / 16;
1499
        in -= i;
1500
#endif
1501
0
        (*stream)(in, out, j, key, ctx->Yi.c);
1502
0
        ctr += (unsigned int)j;
1503
0
        if (IS_LITTLE_ENDIAN)
1504
#ifdef BSWAP4
1505
            ctx->Yi.d[3] = BSWAP4(ctr);
1506
#else
1507
0
            PUTU32(ctx->Yi.c + 12, ctr);
1508
0
#endif
1509
0
        else
1510
0
            ctx->Yi.d[3] = ctr;
1511
0
        out += i;
1512
0
        in += i;
1513
0
        len -= i;
1514
0
    }
1515
0
    if (len) {
1516
0
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1517
0
        ++ctr;
1518
0
        if (IS_LITTLE_ENDIAN)
1519
#ifdef BSWAP4
1520
            ctx->Yi.d[3] = BSWAP4(ctr);
1521
#else
1522
0
            PUTU32(ctx->Yi.c + 12, ctr);
1523
0
#endif
1524
0
        else
1525
0
            ctx->Yi.d[3] = ctr;
1526
0
        while (len--) {
1527
0
#if defined(GHASH)
1528
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1529
#else
1530
            u8 c = in[n];
1531
            ctx->Xi.c[mres++] ^= c;
1532
            out[n] = c ^ ctx->EKi.c[n];
1533
#endif
1534
0
            ++n;
1535
0
        }
1536
0
    }
1537
1538
0
    ctx->mres = mres;
1539
0
    return 0;
1540
0
#endif
1541
0
}
1542
1543
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1544
    size_t len)
1545
0
{
1546
0
    DECLARE_IS_ENDIAN;
1547
0
    u64 alen = ctx->len.u[0] << 3;
1548
0
    u64 clen = ctx->len.u[1] << 3;
1549
1550
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1551
0
    u128 bitlen;
1552
0
    unsigned int mres = ctx->mres;
1553
1554
0
    if (mres) {
1555
0
        unsigned blocks = (mres + 15) & -16;
1556
1557
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1558
0
        mres = blocks;
1559
0
        if (mres == sizeof(ctx->Xn)) {
1560
0
            GHASH(ctx, ctx->Xn, mres);
1561
0
            mres = 0;
1562
0
        }
1563
0
    } else if (ctx->ares) {
1564
0
        GCM_MUL(ctx);
1565
0
    }
1566
#else
1567
    if (ctx->mres || ctx->ares)
1568
        GCM_MUL(ctx);
1569
#endif
1570
1571
0
    if (IS_LITTLE_ENDIAN) {
1572
#ifdef BSWAP8
1573
        alen = BSWAP8(alen);
1574
        clen = BSWAP8(clen);
1575
#else
1576
0
        u8 *p = ctx->len.c;
1577
1578
0
        ctx->len.u[0] = alen;
1579
0
        ctx->len.u[1] = clen;
1580
1581
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1582
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1583
0
#endif
1584
0
    }
1585
1586
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1587
0
    bitlen.hi = alen;
1588
0
    bitlen.lo = clen;
1589
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1590
0
    mres += sizeof(bitlen);
1591
0
    GHASH(ctx, ctx->Xn, mres);
1592
#else
1593
    ctx->Xi.u[0] ^= alen;
1594
    ctx->Xi.u[1] ^= clen;
1595
    GCM_MUL(ctx);
1596
#endif
1597
1598
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1599
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1600
1601
0
    if (tag && len <= sizeof(ctx->Xi))
1602
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1603
0
    else
1604
0
        return -1;
1605
0
}
1606
1607
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1608
0
{
1609
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1610
0
    memcpy(tag, ctx->Xi.c,
1611
0
        len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1612
0
}
1613
1614
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1615
0
{
1616
0
    GCM128_CONTEXT *ret;
1617
1618
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1619
0
        CRYPTO_gcm128_init(ret, key, block);
1620
1621
0
    return ret;
1622
0
}
1623
1624
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1625
0
{
1626
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1627
0
}