Coverage Report

Created: 2025-06-13 06:58

/src/openssl31/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
/* RISC-V uses C implementation of gmult as a fallback. */
31
#if defined(__riscv)
32
# define INCLUDE_C_GMULT_4BIT
33
#endif
34
35
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
36
0
#define REDUCE1BIT(V)   do { \
37
0
        if (sizeof(size_t)==8) { \
38
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^T; \
41
0
        } \
42
0
        else { \
43
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
45
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
46
0
        } \
47
0
} while(0)
48
49
/*-
50
 *
51
 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52
 *
53
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54
 * never be set to 8. 8 is effectively reserved for testing purposes.
55
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57
 * whole spectrum of possible table driven implementations. Why? In
58
 * non-"Shoup's" case memory access pattern is segmented in such manner,
59
 * that it's trivial to see that cache timing information can reveal
60
 * fair portion of intermediate hash value. Given that ciphertext is
61
 * always available to attacker, it's possible for him to attempt to
62
 * deduce secret parameter H and if successful, tamper with messages
63
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64
 * not as trivial, but there is no reason to believe that it's resistant
65
 * to cache-timing attack. And the thing about "8-bit" implementation is
66
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67
 * key + 1KB shared. Well, on pros side it should be twice as fast as
68
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69
 * was observed to run ~75% faster, closer to 100% for commercial
70
 * compilers... Yet "4-bit" procedure is preferred, because it's
71
 * believed to provide better security-performance balance and adequate
72
 * all-round performance. "All-round" refers to things like:
73
 *
74
 * - shorter setup time effectively improves overall timing for
75
 *   handling short messages;
76
 * - larger table allocation can become unbearable because of VM
77
 *   subsystem penalties (for example on Windows large enough free
78
 *   results in VM working set trimming, meaning that consequent
79
 *   malloc would immediately incur working set expansion);
80
 * - larger table has larger cache footprint, which can affect
81
 *   performance of other code paths (not necessarily even from same
82
 *   thread in Hyper-Threading world);
83
 *
84
 * Value of 1 is not appropriate for performance reasons.
85
 */
86
87
static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
88
0
{
89
0
    u128 V;
90
# if defined(OPENSSL_SMALL_FOOTPRINT)
91
    int i;
92
# endif
93
94
0
    Htable[0].hi = 0;
95
0
    Htable[0].lo = 0;
96
0
    V.hi = H[0];
97
0
    V.lo = H[1];
98
99
# if defined(OPENSSL_SMALL_FOOTPRINT)
100
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101
        REDUCE1BIT(V);
102
        Htable[i] = V;
103
    }
104
105
    for (i = 2; i < 16; i <<= 1) {
106
        u128 *Hi = Htable + i;
107
        int j;
108
        for (V = *Hi, j = 1; j < i; ++j) {
109
            Hi[j].hi = V.hi ^ Htable[j].hi;
110
            Hi[j].lo = V.lo ^ Htable[j].lo;
111
        }
112
    }
113
# else
114
0
    Htable[8] = V;
115
0
    REDUCE1BIT(V);
116
0
    Htable[4] = V;
117
0
    REDUCE1BIT(V);
118
0
    Htable[2] = V;
119
0
    REDUCE1BIT(V);
120
0
    Htable[1] = V;
121
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122
0
    V = Htable[4];
123
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126
0
    V = Htable[8];
127
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134
0
# endif
135
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136
    /*
137
     * ARM assembler expects specific dword order in Htable.
138
     */
139
    {
140
        int j;
141
        DECLARE_IS_ENDIAN;
142
143
        if (IS_LITTLE_ENDIAN)
144
            for (j = 0; j < 16; ++j) {
145
                V = Htable[j];
146
                Htable[j].hi = V.lo;
147
                Htable[j].lo = V.hi;
148
        } else
149
            for (j = 0; j < 16; ++j) {
150
                V = Htable[j];
151
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
152
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
153
            }
154
    }
155
# endif
156
0
}
157
158
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159
static const size_t rem_4bit[16] = {
160
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164
};
165
166
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
167
{
168
    u128 Z;
169
    int cnt = 15;
170
    size_t rem, nlo, nhi;
171
    DECLARE_IS_ENDIAN;
172
173
    nlo = ((const u8 *)Xi)[15];
174
    nhi = nlo >> 4;
175
    nlo &= 0xf;
176
177
    Z.hi = Htable[nlo].hi;
178
    Z.lo = Htable[nlo].lo;
179
180
    while (1) {
181
        rem = (size_t)Z.lo & 0xf;
182
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183
        Z.hi = (Z.hi >> 4);
184
        if (sizeof(size_t) == 8)
185
            Z.hi ^= rem_4bit[rem];
186
        else
187
            Z.hi ^= (u64)rem_4bit[rem] << 32;
188
189
        Z.hi ^= Htable[nhi].hi;
190
        Z.lo ^= Htable[nhi].lo;
191
192
        if (--cnt < 0)
193
            break;
194
195
        nlo = ((const u8 *)Xi)[cnt];
196
        nhi = nlo >> 4;
197
        nlo &= 0xf;
198
199
        rem = (size_t)Z.lo & 0xf;
200
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201
        Z.hi = (Z.hi >> 4);
202
        if (sizeof(size_t) == 8)
203
            Z.hi ^= rem_4bit[rem];
204
        else
205
            Z.hi ^= (u64)rem_4bit[rem] << 32;
206
207
        Z.hi ^= Htable[nlo].hi;
208
        Z.lo ^= Htable[nlo].lo;
209
    }
210
211
    if (IS_LITTLE_ENDIAN) {
212
#  ifdef BSWAP8
213
        Xi[0] = BSWAP8(Z.hi);
214
        Xi[1] = BSWAP8(Z.lo);
215
#  else
216
        u8 *p = (u8 *)Xi;
217
        u32 v;
218
        v = (u32)(Z.hi >> 32);
219
        PUTU32(p, v);
220
        v = (u32)(Z.hi);
221
        PUTU32(p + 4, v);
222
        v = (u32)(Z.lo >> 32);
223
        PUTU32(p + 8, v);
224
        v = (u32)(Z.lo);
225
        PUTU32(p + 12, v);
226
#  endif
227
    } else {
228
        Xi[0] = Z.hi;
229
        Xi[1] = Z.lo;
230
    }
231
}
232
233
# endif
234
235
# if !defined(GHASH_ASM)
236
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
237
/*
238
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239
 * details... Compiler-generated code doesn't seem to give any
240
 * performance improvement, at least not on x86[_64]. It's here
241
 * mostly as reference and a placeholder for possible future
242
 * non-trivial optimization[s]...
243
 */
244
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245
                           const u8 *inp, size_t len)
246
{
247
    u128 Z;
248
    int cnt;
249
    size_t rem, nlo, nhi;
250
    DECLARE_IS_ENDIAN;
251
252
    do {
253
        cnt = 15;
254
        nlo = ((const u8 *)Xi)[15];
255
        nlo ^= inp[15];
256
        nhi = nlo >> 4;
257
        nlo &= 0xf;
258
259
        Z.hi = Htable[nlo].hi;
260
        Z.lo = Htable[nlo].lo;
261
262
        while (1) {
263
            rem = (size_t)Z.lo & 0xf;
264
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265
            Z.hi = (Z.hi >> 4);
266
            if (sizeof(size_t) == 8)
267
                Z.hi ^= rem_4bit[rem];
268
            else
269
                Z.hi ^= (u64)rem_4bit[rem] << 32;
270
271
            Z.hi ^= Htable[nhi].hi;
272
            Z.lo ^= Htable[nhi].lo;
273
274
            if (--cnt < 0)
275
                break;
276
277
            nlo = ((const u8 *)Xi)[cnt];
278
            nlo ^= inp[cnt];
279
            nhi = nlo >> 4;
280
            nlo &= 0xf;
281
282
            rem = (size_t)Z.lo & 0xf;
283
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284
            Z.hi = (Z.hi >> 4);
285
            if (sizeof(size_t) == 8)
286
                Z.hi ^= rem_4bit[rem];
287
            else
288
                Z.hi ^= (u64)rem_4bit[rem] << 32;
289
290
            Z.hi ^= Htable[nlo].hi;
291
            Z.lo ^= Htable[nlo].lo;
292
        }
293
294
        if (IS_LITTLE_ENDIAN) {
295
#   ifdef BSWAP8
296
            Xi[0] = BSWAP8(Z.hi);
297
            Xi[1] = BSWAP8(Z.lo);
298
#   else
299
            u8 *p = (u8 *)Xi;
300
            u32 v;
301
            v = (u32)(Z.hi >> 32);
302
            PUTU32(p, v);
303
            v = (u32)(Z.hi);
304
            PUTU32(p + 4, v);
305
            v = (u32)(Z.lo >> 32);
306
            PUTU32(p + 8, v);
307
            v = (u32)(Z.lo);
308
            PUTU32(p + 12, v);
309
#   endif
310
        } else {
311
            Xi[0] = Z.hi;
312
            Xi[1] = Z.lo;
313
        }
314
    } while (inp += 16, len -= 16);
315
}
316
#  endif
317
# else
318
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
319
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
320
                    size_t len);
321
# endif
322
323
291k
# define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
324
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
325
3.19M
#  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
326
/*
327
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
328
 * effect. In other words idea is to hash data while it's still in L1 cache
329
 * after encryption pass...
330
 */
331
2.24M
#  define GHASH_CHUNK       (3*1024)
332
# endif
333
334
#if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
335
# if    !defined(I386_ONLY) && \
336
        (defined(__i386)        || defined(__i386__)    || \
337
         defined(__x86_64)      || defined(__x86_64__)  || \
338
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
339
#  define GHASH_ASM_X86_OR_64
340
341
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
342
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
343
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
344
                     size_t len);
345
346
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
347
#   define gcm_init_avx   gcm_init_clmul
348
#   define gcm_gmult_avx  gcm_gmult_clmul
349
#   define gcm_ghash_avx  gcm_ghash_clmul
350
#  else
351
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
352
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
353
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
354
                   size_t len);
355
#  endif
356
357
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
358
#   define GHASH_ASM_X86
359
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
360
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
361
                        size_t len);
362
363
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
364
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365
                        size_t len);
366
#  endif
367
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
368
#  include "arm_arch.h"
369
#  if __ARM_MAX_ARCH__>=7
370
#   define GHASH_ASM_ARM
371
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
372
#   if defined(__arm__) || defined(__arm)
373
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
374
#   endif
375
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
376
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
377
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
378
                    size_t len);
379
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
380
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
381
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382
                  size_t len);
383
#  endif
384
# elif defined(__sparc__) || defined(__sparc)
385
#  include "crypto/sparc_arch.h"
386
#  define GHASH_ASM_SPARC
387
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
388
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
389
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
390
                    size_t len);
391
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
392
#  include "crypto/ppc_arch.h"
393
#  define GHASH_ASM_PPC
394
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
395
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
396
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
397
                  size_t len);
398
# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
399
#  include "crypto/riscv_arch.h"
400
#  define GHASH_ASM_RISCV
401
#  undef  GHASH
402
void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
403
void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
404
# endif
405
#endif
406
407
static void gcm_get_funcs(struct gcm_funcs_st *ctx)
408
224k
{
409
    /* set defaults -- overridden below as needed */
410
224k
    ctx->ginit = gcm_init_4bit;
411
#if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
412
    ctx->gmult = gcm_gmult_4bit;
413
#else
414
224k
    ctx->gmult = NULL;
415
224k
#endif
416
#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
417
    ctx->ghash = gcm_ghash_4bit;
418
#else
419
224k
    ctx->ghash = NULL;
420
224k
#endif
421
422
224k
#if defined(GHASH_ASM_X86_OR_64)
423
224k
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
424
    /* x86_64 */
425
224k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
426
224k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
427
224k
            ctx->ginit = gcm_init_avx;
428
224k
            ctx->gmult = gcm_gmult_avx;
429
224k
            ctx->ghash = gcm_ghash_avx;
430
224k
        } else {
431
0
            ctx->ginit = gcm_init_clmul;
432
0
            ctx->gmult = gcm_gmult_clmul;
433
0
            ctx->ghash = gcm_ghash_clmul;
434
0
        }
435
224k
        return;
436
224k
    }
437
0
# endif
438
# if defined(GHASH_ASM_X86)
439
    /* x86 only */
440
#  if defined(OPENSSL_IA32_SSE2)
441
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
442
        ctx->gmult = gcm_gmult_4bit_mmx;
443
        ctx->ghash = gcm_ghash_4bit_mmx;
444
        return;
445
    }
446
#  else
447
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
448
        ctx->gmult = gcm_gmult_4bit_mmx;
449
        ctx->ghash = gcm_ghash_4bit_mmx;
450
        return;
451
    }
452
#  endif
453
    ctx->gmult = gcm_gmult_4bit_x86;
454
    ctx->ghash = gcm_ghash_4bit_x86;
455
    return;
456
# else
457
    /* x86_64 fallback defaults */
458
0
    ctx->gmult = gcm_gmult_4bit;
459
0
    ctx->ghash = gcm_ghash_4bit;
460
0
    return;
461
224k
# endif
462
#elif defined(GHASH_ASM_ARM)
463
    /* ARM defaults */
464
    ctx->gmult = gcm_gmult_4bit;
465
# if !defined(OPENSSL_SMALL_FOOTPRINT)
466
    ctx->ghash = gcm_ghash_4bit;
467
# else
468
    ctx->ghash = NULL;
469
# endif
470
# ifdef PMULL_CAPABLE
471
    if (PMULL_CAPABLE) {
472
        ctx->ginit = (gcm_init_fn)gcm_init_v8;
473
        ctx->gmult = gcm_gmult_v8;
474
        ctx->ghash = gcm_ghash_v8;
475
    }
476
# elif defined(NEON_CAPABLE)
477
    if (NEON_CAPABLE) {
478
        ctx->ginit = gcm_init_neon;
479
        ctx->gmult = gcm_gmult_neon;
480
        ctx->ghash = gcm_ghash_neon;
481
    }
482
# endif
483
    return;
484
#elif defined(GHASH_ASM_SPARC)
485
    /* SPARC defaults */
486
    ctx->gmult = gcm_gmult_4bit;
487
    ctx->ghash = gcm_ghash_4bit;
488
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
489
        ctx->ginit = gcm_init_vis3;
490
        ctx->gmult = gcm_gmult_vis3;
491
        ctx->ghash = gcm_ghash_vis3;
492
    }
493
    return;
494
#elif defined(GHASH_ASM_PPC)
495
    /* PowerPC does not define GHASH_ASM; defaults set above */
496
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
497
        ctx->ginit = gcm_init_p8;
498
        ctx->gmult = gcm_gmult_p8;
499
        ctx->ghash = gcm_ghash_p8;
500
    }
501
    return;
502
#elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
503
    /* RISCV defaults; gmult already set above */
504
    ctx->ghash = NULL;
505
    if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
506
        ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
507
        ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
508
    }
509
    return;
510
#elif defined(GHASH_ASM)
511
    /* all other architectures use the generic names */
512
    ctx->gmult = gcm_gmult_4bit;
513
    ctx->ghash = gcm_ghash_4bit;
514
    return;
515
#endif
516
224k
}
517
518
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
519
224k
{
520
224k
    DECLARE_IS_ENDIAN;
521
522
224k
    memset(ctx, 0, sizeof(*ctx));
523
224k
    ctx->block = block;
524
224k
    ctx->key = key;
525
526
224k
    (*block) (ctx->H.c, ctx->H.c, key);
527
528
224k
    if (IS_LITTLE_ENDIAN) {
529
        /* H is stored in host byte order */
530
#ifdef BSWAP8
531
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
532
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
533
#else
534
224k
        u8 *p = ctx->H.c;
535
224k
        u64 hi, lo;
536
224k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
537
224k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
538
224k
        ctx->H.u[0] = hi;
539
224k
        ctx->H.u[1] = lo;
540
224k
#endif
541
224k
    }
542
543
224k
    gcm_get_funcs(&ctx->funcs);
544
224k
    ctx->funcs.ginit(ctx->Htable, ctx->H.u);
545
224k
}
546
547
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
548
                         size_t len)
549
1.50M
{
550
1.50M
    DECLARE_IS_ENDIAN;
551
1.50M
    unsigned int ctr;
552
553
1.50M
    ctx->len.u[0] = 0;          /* AAD length */
554
1.50M
    ctx->len.u[1] = 0;          /* message length */
555
1.50M
    ctx->ares = 0;
556
1.50M
    ctx->mres = 0;
557
558
1.50M
    if (len == 12) {
559
1.50M
        memcpy(ctx->Yi.c, iv, 12);
560
1.50M
        ctx->Yi.c[12] = 0;
561
1.50M
        ctx->Yi.c[13] = 0;
562
1.50M
        ctx->Yi.c[14] = 0;
563
1.50M
        ctx->Yi.c[15] = 1;
564
1.50M
        ctr = 1;
565
1.50M
    } else {
566
0
        size_t i;
567
0
        u64 len0 = len;
568
569
        /* Borrow ctx->Xi to calculate initial Yi */
570
0
        ctx->Xi.u[0] = 0;
571
0
        ctx->Xi.u[1] = 0;
572
573
0
        while (len >= 16) {
574
0
            for (i = 0; i < 16; ++i)
575
0
                ctx->Xi.c[i] ^= iv[i];
576
0
            GCM_MUL(ctx);
577
0
            iv += 16;
578
0
            len -= 16;
579
0
        }
580
0
        if (len) {
581
0
            for (i = 0; i < len; ++i)
582
0
                ctx->Xi.c[i] ^= iv[i];
583
0
            GCM_MUL(ctx);
584
0
        }
585
0
        len0 <<= 3;
586
0
        if (IS_LITTLE_ENDIAN) {
587
#ifdef BSWAP8
588
            ctx->Xi.u[1] ^= BSWAP8(len0);
589
#else
590
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
591
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
592
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
593
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
594
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
595
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
596
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
597
0
            ctx->Xi.c[15] ^= (u8)(len0);
598
0
#endif
599
0
        } else {
600
0
            ctx->Xi.u[1] ^= len0;
601
0
        }
602
603
0
        GCM_MUL(ctx);
604
605
0
        if (IS_LITTLE_ENDIAN)
606
#ifdef BSWAP4
607
            ctr = BSWAP4(ctx->Xi.d[3]);
608
#else
609
0
            ctr = GETU32(ctx->Xi.c + 12);
610
0
#endif
611
0
        else
612
0
            ctr = ctx->Xi.d[3];
613
614
        /* Copy borrowed Xi to Yi */
615
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
616
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
617
0
    }
618
619
1.50M
    ctx->Xi.u[0] = 0;
620
1.50M
    ctx->Xi.u[1] = 0;
621
622
1.50M
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
623
1.50M
    ++ctr;
624
1.50M
    if (IS_LITTLE_ENDIAN)
625
#ifdef BSWAP4
626
        ctx->Yi.d[3] = BSWAP4(ctr);
627
#else
628
1.50M
        PUTU32(ctx->Yi.c + 12, ctr);
629
0
#endif
630
0
    else
631
0
        ctx->Yi.d[3] = ctr;
632
1.50M
}
633
634
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
635
                      size_t len)
636
1.60M
{
637
1.60M
    size_t i;
638
1.60M
    unsigned int n;
639
1.60M
    u64 alen = ctx->len.u[0];
640
641
1.60M
    if (ctx->len.u[1])
642
0
        return -2;
643
644
1.60M
    alen += len;
645
1.60M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
646
0
        return -1;
647
1.60M
    ctx->len.u[0] = alen;
648
649
1.60M
    n = ctx->ares;
650
1.60M
    if (n) {
651
452k
        while (n && len) {
652
358k
            ctx->Xi.c[n] ^= *(aad++);
653
358k
            --len;
654
358k
            n = (n + 1) % 16;
655
358k
        }
656
94.8k
        if (n == 0)
657
9.12k
            GCM_MUL(ctx);
658
85.7k
        else {
659
85.7k
            ctx->ares = n;
660
85.7k
            return 0;
661
85.7k
        }
662
94.8k
    }
663
1.51M
#ifdef GHASH
664
1.51M
    if ((i = (len & (size_t)-16))) {
665
529k
        GHASH(ctx, aad, i);
666
529k
        aad += i;
667
529k
        len -= i;
668
529k
    }
669
#else
670
    while (len >= 16) {
671
        for (i = 0; i < 16; ++i)
672
            ctx->Xi.c[i] ^= aad[i];
673
        GCM_MUL(ctx);
674
        aad += 16;
675
        len -= 16;
676
    }
677
#endif
678
1.51M
    if (len) {
679
1.34M
        n = (unsigned int)len;
680
15.6M
        for (i = 0; i < len; ++i)
681
14.2M
            ctx->Xi.c[i] ^= aad[i];
682
1.34M
    }
683
684
1.51M
    ctx->ares = n;
685
1.51M
    return 0;
686
1.60M
}
687
688
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
689
                          const unsigned char *in, unsigned char *out,
690
                          size_t len)
691
611k
{
692
611k
    DECLARE_IS_ENDIAN;
693
611k
    unsigned int n, ctr, mres;
694
611k
    size_t i;
695
611k
    u64 mlen = ctx->len.u[1];
696
611k
    block128_f block = ctx->block;
697
611k
    void *key = ctx->key;
698
699
611k
    mlen += len;
700
611k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
701
0
        return -1;
702
611k
    ctx->len.u[1] = mlen;
703
704
611k
    mres = ctx->mres;
705
706
611k
    if (ctx->ares) {
707
        /* First call to encrypt finalizes GHASH(AAD) */
708
4.90k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
709
4.90k
        if (len == 0) {
710
4.45k
            GCM_MUL(ctx);
711
4.45k
            ctx->ares = 0;
712
4.45k
            return 0;
713
4.45k
        }
714
449
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
715
449
        ctx->Xi.u[0] = 0;
716
449
        ctx->Xi.u[1] = 0;
717
449
        mres = sizeof(ctx->Xi);
718
#else
719
        GCM_MUL(ctx);
720
#endif
721
449
        ctx->ares = 0;
722
449
    }
723
724
606k
    if (IS_LITTLE_ENDIAN)
725
#ifdef BSWAP4
726
        ctr = BSWAP4(ctx->Yi.d[3]);
727
#else
728
606k
        ctr = GETU32(ctx->Yi.c + 12);
729
0
#endif
730
0
    else
731
0
        ctr = ctx->Yi.d[3];
732
733
606k
    n = mres % 16;
734
606k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
735
606k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
736
606k
        do {
737
606k
            if (n) {
738
596k
# if defined(GHASH)
739
8.82M
                while (n && len) {
740
8.23M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
741
8.23M
                    --len;
742
8.23M
                    n = (n + 1) % 16;
743
8.23M
                }
744
596k
                if (n == 0) {
745
596k
                    GHASH(ctx, ctx->Xn, mres);
746
596k
                    mres = 0;
747
596k
                } else {
748
0
                    ctx->mres = mres;
749
0
                    return 0;
750
0
                }
751
# else
752
                while (n && len) {
753
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
754
                    --len;
755
                    n = (n + 1) % 16;
756
                }
757
                if (n == 0) {
758
                    GCM_MUL(ctx);
759
                    mres = 0;
760
                } else {
761
                    ctx->mres = n;
762
                    return 0;
763
                }
764
# endif
765
596k
            }
766
606k
# if defined(STRICT_ALIGNMENT)
767
606k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
768
603k
                break;
769
3.16k
# endif
770
3.16k
# if defined(GHASH)
771
3.16k
            if (len >= 16 && mres) {
772
234
                GHASH(ctx, ctx->Xn, mres);
773
234
                mres = 0;
774
234
            }
775
3.16k
#  if defined(GHASH_CHUNK)
776
3.16k
            while (len >= GHASH_CHUNK) {
777
0
                size_t j = GHASH_CHUNK;
778
779
0
                while (j) {
780
0
                    size_t_aX *out_t = (size_t_aX *)out;
781
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
782
783
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
784
0
                    ++ctr;
785
0
                    if (IS_LITTLE_ENDIAN)
786
#   ifdef BSWAP4
787
                        ctx->Yi.d[3] = BSWAP4(ctr);
788
#   else
789
0
                        PUTU32(ctx->Yi.c + 12, ctr);
790
0
#   endif
791
0
                    else
792
0
                        ctx->Yi.d[3] = ctr;
793
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
794
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
795
0
                    out += 16;
796
0
                    in += 16;
797
0
                    j -= 16;
798
0
                }
799
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
800
0
                len -= GHASH_CHUNK;
801
0
            }
802
3.16k
#  endif
803
3.16k
            if ((i = (len & (size_t)-16))) {
804
234
                size_t j = i;
805
806
468
                while (len >= 16) {
807
234
                    size_t_aX *out_t = (size_t_aX *)out;
808
234
                    const size_t_aX *in_t = (const size_t_aX *)in;
809
810
234
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
811
234
                    ++ctr;
812
234
                    if (IS_LITTLE_ENDIAN)
813
#  ifdef BSWAP4
814
                        ctx->Yi.d[3] = BSWAP4(ctr);
815
#  else
816
234
                        PUTU32(ctx->Yi.c + 12, ctr);
817
0
#  endif
818
0
                    else
819
0
                        ctx->Yi.d[3] = ctr;
820
702
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
821
468
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
822
234
                    out += 16;
823
234
                    in += 16;
824
234
                    len -= 16;
825
234
                }
826
234
                GHASH(ctx, out - j, j);
827
234
            }
828
# else
829
            while (len >= 16) {
830
                size_t *out_t = (size_t *)out;
831
                const size_t *in_t = (const size_t *)in;
832
833
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
834
                ++ctr;
835
                if (IS_LITTLE_ENDIAN)
836
#  ifdef BSWAP4
837
                    ctx->Yi.d[3] = BSWAP4(ctr);
838
#  else
839
                    PUTU32(ctx->Yi.c + 12, ctr);
840
#  endif
841
                else
842
                    ctx->Yi.d[3] = ctr;
843
                for (i = 0; i < 16 / sizeof(size_t); ++i)
844
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
845
                GCM_MUL(ctx);
846
                out += 16;
847
                in += 16;
848
                len -= 16;
849
            }
850
# endif
851
3.16k
            if (len) {
852
215
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
853
215
                ++ctr;
854
215
                if (IS_LITTLE_ENDIAN)
855
# ifdef BSWAP4
856
                    ctx->Yi.d[3] = BSWAP4(ctr);
857
# else
858
215
                    PUTU32(ctx->Yi.c + 12, ctr);
859
0
# endif
860
0
                else
861
0
                    ctx->Yi.d[3] = ctr;
862
215
# if defined(GHASH)
863
645
                while (len--) {
864
430
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
865
430
                    ++n;
866
430
                }
867
# else
868
                while (len--) {
869
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
870
                    ++n;
871
                }
872
                mres = n;
873
# endif
874
215
            }
875
876
3.16k
            ctx->mres = mres;
877
3.16k
            return 0;
878
606k
        } while (0);
879
606k
    }
880
603k
#endif
881
603k
    for (i = 0; i < len; ++i) {
882
0
        if (n == 0) {
883
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
884
0
            ++ctr;
885
0
            if (IS_LITTLE_ENDIAN)
886
#ifdef BSWAP4
887
                ctx->Yi.d[3] = BSWAP4(ctr);
888
#else
889
0
                PUTU32(ctx->Yi.c + 12, ctr);
890
0
#endif
891
0
            else
892
0
                ctx->Yi.d[3] = ctr;
893
0
        }
894
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
895
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
896
0
        n = (n + 1) % 16;
897
0
        if (mres == sizeof(ctx->Xn)) {
898
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
899
0
            mres = 0;
900
0
        }
901
#else
902
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
903
        mres = n = (n + 1) % 16;
904
        if (n == 0)
905
            GCM_MUL(ctx);
906
#endif
907
0
    }
908
909
603k
    ctx->mres = mres;
910
603k
    return 0;
911
606k
}
912
913
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
914
                          const unsigned char *in, unsigned char *out,
915
                          size_t len)
916
251k
{
917
251k
    DECLARE_IS_ENDIAN;
918
251k
    unsigned int n, ctr, mres;
919
251k
    size_t i;
920
251k
    u64 mlen = ctx->len.u[1];
921
251k
    block128_f block = ctx->block;
922
251k
    void *key = ctx->key;
923
924
251k
    mlen += len;
925
251k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
926
0
        return -1;
927
251k
    ctx->len.u[1] = mlen;
928
929
251k
    mres = ctx->mres;
930
931
251k
    if (ctx->ares) {
932
        /* First call to decrypt finalizes GHASH(AAD) */
933
95.1k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
934
95.1k
        if (len == 0) {
935
88.1k
            GCM_MUL(ctx);
936
88.1k
            ctx->ares = 0;
937
88.1k
            return 0;
938
88.1k
        }
939
7.00k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
940
7.00k
        ctx->Xi.u[0] = 0;
941
7.00k
        ctx->Xi.u[1] = 0;
942
7.00k
        mres = sizeof(ctx->Xi);
943
#else
944
        GCM_MUL(ctx);
945
#endif
946
7.00k
        ctx->ares = 0;
947
7.00k
    }
948
949
163k
    if (IS_LITTLE_ENDIAN)
950
#ifdef BSWAP4
951
        ctr = BSWAP4(ctx->Yi.d[3]);
952
#else
953
163k
        ctr = GETU32(ctx->Yi.c + 12);
954
0
#endif
955
0
    else
956
0
        ctr = ctx->Yi.d[3];
957
958
163k
    n = mres % 16;
959
163k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
960
163k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
961
163k
        do {
962
163k
            if (n) {
963
0
# if defined(GHASH)
964
0
                while (n && len) {
965
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
966
0
                    --len;
967
0
                    n = (n + 1) % 16;
968
0
                }
969
0
                if (n == 0) {
970
0
                    GHASH(ctx, ctx->Xn, mres);
971
0
                    mres = 0;
972
0
                } else {
973
0
                    ctx->mres = mres;
974
0
                    return 0;
975
0
                }
976
# else
977
                while (n && len) {
978
                    u8 c = *(in++);
979
                    *(out++) = c ^ ctx->EKi.c[n];
980
                    ctx->Xi.c[n] ^= c;
981
                    --len;
982
                    n = (n + 1) % 16;
983
                }
984
                if (n == 0) {
985
                    GCM_MUL(ctx);
986
                    mres = 0;
987
                } else {
988
                    ctx->mres = n;
989
                    return 0;
990
                }
991
# endif
992
0
            }
993
163k
# if defined(STRICT_ALIGNMENT)
994
163k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
995
391
                break;
996
163k
# endif
997
163k
# if defined(GHASH)
998
163k
            if (len >= 16 && mres) {
999
1.88k
                GHASH(ctx, ctx->Xn, mres);
1000
1.88k
                mres = 0;
1001
1.88k
            }
1002
163k
#  if defined(GHASH_CHUNK)
1003
164k
            while (len >= GHASH_CHUNK) {
1004
1.27k
                size_t j = GHASH_CHUNK;
1005
1006
1.27k
                GHASH(ctx, in, GHASH_CHUNK);
1007
246k
                while (j) {
1008
245k
                    size_t_aX *out_t = (size_t_aX *)out;
1009
245k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1010
1011
245k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1012
245k
                    ++ctr;
1013
245k
                    if (IS_LITTLE_ENDIAN)
1014
#   ifdef BSWAP4
1015
                        ctx->Yi.d[3] = BSWAP4(ctr);
1016
#   else
1017
245k
                        PUTU32(ctx->Yi.c + 12, ctr);
1018
0
#   endif
1019
0
                    else
1020
0
                        ctx->Yi.d[3] = ctr;
1021
735k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1022
490k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1023
245k
                    out += 16;
1024
245k
                    in += 16;
1025
245k
                    j -= 16;
1026
245k
                }
1027
1.27k
                len -= GHASH_CHUNK;
1028
1.27k
            }
1029
163k
#  endif
1030
163k
            if ((i = (len & (size_t)-16))) {
1031
1.86k
                GHASH(ctx, in, i);
1032
25.0k
                while (len >= 16) {
1033
23.1k
                    size_t_aX *out_t = (size_t_aX *)out;
1034
23.1k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1035
1036
23.1k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1037
23.1k
                    ++ctr;
1038
23.1k
                    if (IS_LITTLE_ENDIAN)
1039
#  ifdef BSWAP4
1040
                        ctx->Yi.d[3] = BSWAP4(ctr);
1041
#  else
1042
23.1k
                        PUTU32(ctx->Yi.c + 12, ctr);
1043
0
#  endif
1044
0
                    else
1045
0
                        ctx->Yi.d[3] = ctr;
1046
69.5k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1047
46.3k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1048
23.1k
                    out += 16;
1049
23.1k
                    in += 16;
1050
23.1k
                    len -= 16;
1051
23.1k
                }
1052
1.86k
            }
1053
# else
1054
            while (len >= 16) {
1055
                size_t *out_t = (size_t *)out;
1056
                const size_t *in_t = (const size_t *)in;
1057
1058
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1059
                ++ctr;
1060
                if (IS_LITTLE_ENDIAN)
1061
#  ifdef BSWAP4
1062
                    ctx->Yi.d[3] = BSWAP4(ctr);
1063
#  else
1064
                    PUTU32(ctx->Yi.c + 12, ctr);
1065
#  endif
1066
                else
1067
                    ctx->Yi.d[3] = ctr;
1068
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1069
                    size_t c = in_t[i];
1070
                    out_t[i] = c ^ ctx->EKi.t[i];
1071
                    ctx->Xi.t[i] ^= c;
1072
                }
1073
                GCM_MUL(ctx);
1074
                out += 16;
1075
                in += 16;
1076
                len -= 16;
1077
            }
1078
# endif
1079
163k
            if (len) {
1080
6.69k
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1081
6.69k
                ++ctr;
1082
6.69k
                if (IS_LITTLE_ENDIAN)
1083
# ifdef BSWAP4
1084
                    ctx->Yi.d[3] = BSWAP4(ctr);
1085
# else
1086
6.69k
                    PUTU32(ctx->Yi.c + 12, ctr);
1087
0
# endif
1088
0
                else
1089
0
                    ctx->Yi.d[3] = ctr;
1090
6.69k
# if defined(GHASH)
1091
65.6k
                while (len--) {
1092
58.9k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1093
58.9k
                    ++n;
1094
58.9k
                }
1095
# else
1096
                while (len--) {
1097
                    u8 c = in[n];
1098
                    ctx->Xi.c[n] ^= c;
1099
                    out[n] = c ^ ctx->EKi.c[n];
1100
                    ++n;
1101
                }
1102
                mres = n;
1103
# endif
1104
6.69k
            }
1105
1106
163k
            ctx->mres = mres;
1107
163k
            return 0;
1108
163k
        } while (0);
1109
163k
    }
1110
391
#endif
1111
391
    for (i = 0; i < len; ++i) {
1112
0
        u8 c;
1113
0
        if (n == 0) {
1114
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1115
0
            ++ctr;
1116
0
            if (IS_LITTLE_ENDIAN)
1117
#ifdef BSWAP4
1118
                ctx->Yi.d[3] = BSWAP4(ctr);
1119
#else
1120
0
                PUTU32(ctx->Yi.c + 12, ctr);
1121
0
#endif
1122
0
            else
1123
0
                ctx->Yi.d[3] = ctr;
1124
0
        }
1125
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1126
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1127
0
        n = (n + 1) % 16;
1128
0
        if (mres == sizeof(ctx->Xn)) {
1129
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1130
0
            mres = 0;
1131
0
        }
1132
#else
1133
        c = in[i];
1134
        out[i] = c ^ ctx->EKi.c[n];
1135
        ctx->Xi.c[n] ^= c;
1136
        mres = n = (n + 1) % 16;
1137
        if (n == 0)
1138
            GCM_MUL(ctx);
1139
#endif
1140
0
    }
1141
1142
391
    ctx->mres = mres;
1143
391
    return 0;
1144
163k
}
1145
1146
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1147
                                const unsigned char *in, unsigned char *out,
1148
                                size_t len, ctr128_f stream)
1149
1.55M
{
1150
#if defined(OPENSSL_SMALL_FOOTPRINT)
1151
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1152
#else
1153
1.55M
    DECLARE_IS_ENDIAN;
1154
1.55M
    unsigned int n, ctr, mres;
1155
1.55M
    size_t i;
1156
1.55M
    u64 mlen = ctx->len.u[1];
1157
1.55M
    void *key = ctx->key;
1158
1159
1.55M
    mlen += len;
1160
1.55M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1161
0
        return -1;
1162
1.55M
    ctx->len.u[1] = mlen;
1163
1164
1.55M
    mres = ctx->mres;
1165
1166
1.55M
    if (ctx->ares) {
1167
        /* First call to encrypt finalizes GHASH(AAD) */
1168
776k
#if defined(GHASH)
1169
776k
        if (len == 0) {
1170
0
            GCM_MUL(ctx);
1171
0
            ctx->ares = 0;
1172
0
            return 0;
1173
0
        }
1174
776k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1175
776k
        ctx->Xi.u[0] = 0;
1176
776k
        ctx->Xi.u[1] = 0;
1177
776k
        mres = sizeof(ctx->Xi);
1178
#else
1179
        GCM_MUL(ctx);
1180
#endif
1181
776k
        ctx->ares = 0;
1182
776k
    }
1183
1184
1.55M
    if (IS_LITTLE_ENDIAN)
1185
# ifdef BSWAP4
1186
        ctr = BSWAP4(ctx->Yi.d[3]);
1187
# else
1188
1.55M
        ctr = GETU32(ctx->Yi.c + 12);
1189
0
# endif
1190
0
    else
1191
0
        ctr = ctx->Yi.d[3];
1192
1193
1.55M
    n = mres % 16;
1194
1.55M
    if (n) {
1195
154k
# if defined(GHASH)
1196
930k
        while (n && len) {
1197
775k
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1198
775k
            --len;
1199
775k
            n = (n + 1) % 16;
1200
775k
        }
1201
154k
        if (n == 0) {
1202
72.4k
            GHASH(ctx, ctx->Xn, mres);
1203
72.4k
            mres = 0;
1204
82.5k
        } else {
1205
82.5k
            ctx->mres = mres;
1206
82.5k
            return 0;
1207
82.5k
        }
1208
# else
1209
        while (n && len) {
1210
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1211
            --len;
1212
            n = (n + 1) % 16;
1213
        }
1214
        if (n == 0) {
1215
            GCM_MUL(ctx);
1216
            mres = 0;
1217
        } else {
1218
            ctx->mres = n;
1219
            return 0;
1220
        }
1221
# endif
1222
154k
    }
1223
1.46M
# if defined(GHASH)
1224
1.46M
        if (len >= 16 && mres) {
1225
48.2k
            GHASH(ctx, ctx->Xn, mres);
1226
48.2k
            mres = 0;
1227
48.2k
        }
1228
1.46M
#  if defined(GHASH_CHUNK)
1229
1.46M
    while (len >= GHASH_CHUNK) {
1230
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1231
0
        ctr += GHASH_CHUNK / 16;
1232
0
        if (IS_LITTLE_ENDIAN)
1233
#   ifdef BSWAP4
1234
            ctx->Yi.d[3] = BSWAP4(ctr);
1235
#   else
1236
0
            PUTU32(ctx->Yi.c + 12, ctr);
1237
0
#   endif
1238
0
        else
1239
0
            ctx->Yi.d[3] = ctr;
1240
0
        GHASH(ctx, out, GHASH_CHUNK);
1241
0
        out += GHASH_CHUNK;
1242
0
        in += GHASH_CHUNK;
1243
0
        len -= GHASH_CHUNK;
1244
0
    }
1245
1.46M
#  endif
1246
1.46M
# endif
1247
1.46M
    if ((i = (len & (size_t)-16))) {
1248
221k
        size_t j = i / 16;
1249
1250
221k
        (*stream) (in, out, j, key, ctx->Yi.c);
1251
221k
        ctr += (unsigned int)j;
1252
221k
        if (IS_LITTLE_ENDIAN)
1253
# ifdef BSWAP4
1254
            ctx->Yi.d[3] = BSWAP4(ctr);
1255
# else
1256
221k
            PUTU32(ctx->Yi.c + 12, ctr);
1257
0
# endif
1258
0
        else
1259
0
            ctx->Yi.d[3] = ctr;
1260
221k
        in += i;
1261
221k
        len -= i;
1262
221k
# if defined(GHASH)
1263
221k
        GHASH(ctx, out, i);
1264
221k
        out += i;
1265
# else
1266
        while (j--) {
1267
            for (i = 0; i < 16; ++i)
1268
                ctx->Xi.c[i] ^= out[i];
1269
            GCM_MUL(ctx);
1270
            out += 16;
1271
        }
1272
# endif
1273
221k
    }
1274
1.46M
    if (len) {
1275
1.44M
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1276
1.44M
        ++ctr;
1277
1.44M
        if (IS_LITTLE_ENDIAN)
1278
# ifdef BSWAP4
1279
            ctx->Yi.d[3] = BSWAP4(ctr);
1280
# else
1281
1.44M
            PUTU32(ctx->Yi.c + 12, ctr);
1282
0
# endif
1283
0
        else
1284
0
            ctx->Yi.d[3] = ctr;
1285
5.73M
        while (len--) {
1286
4.28M
# if defined(GHASH)
1287
4.28M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1288
# else
1289
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1290
# endif
1291
4.28M
            ++n;
1292
4.28M
        }
1293
1.44M
    }
1294
1295
1.46M
    ctx->mres = mres;
1296
1.46M
    return 0;
1297
1.55M
#endif
1298
1.55M
}
1299
1300
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1301
                                const unsigned char *in, unsigned char *out,
1302
                                size_t len, ctr128_f stream)
1303
606k
{
1304
#if defined(OPENSSL_SMALL_FOOTPRINT)
1305
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1306
#else
1307
606k
    DECLARE_IS_ENDIAN;
1308
606k
    unsigned int n, ctr, mres;
1309
606k
    size_t i;
1310
606k
    u64 mlen = ctx->len.u[1];
1311
606k
    void *key = ctx->key;
1312
1313
606k
    mlen += len;
1314
606k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1315
0
        return -1;
1316
606k
    ctx->len.u[1] = mlen;
1317
1318
606k
    mres = ctx->mres;
1319
1320
606k
    if (ctx->ares) {
1321
        /* First call to decrypt finalizes GHASH(AAD) */
1322
361k
# if defined(GHASH)
1323
361k
        if (len == 0) {
1324
1.43k
            GCM_MUL(ctx);
1325
1.43k
            ctx->ares = 0;
1326
1.43k
            return 0;
1327
1.43k
        }
1328
360k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1329
360k
        ctx->Xi.u[0] = 0;
1330
360k
        ctx->Xi.u[1] = 0;
1331
360k
        mres = sizeof(ctx->Xi);
1332
# else
1333
        GCM_MUL(ctx);
1334
# endif
1335
360k
        ctx->ares = 0;
1336
360k
    }
1337
1338
604k
    if (IS_LITTLE_ENDIAN)
1339
# ifdef BSWAP4
1340
        ctr = BSWAP4(ctx->Yi.d[3]);
1341
# else
1342
604k
        ctr = GETU32(ctx->Yi.c + 12);
1343
0
# endif
1344
0
    else
1345
0
        ctr = ctx->Yi.d[3];
1346
1347
604k
    n = mres % 16;
1348
604k
    if (n) {
1349
0
# if defined(GHASH)
1350
0
        while (n && len) {
1351
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1352
0
            --len;
1353
0
            n = (n + 1) % 16;
1354
0
        }
1355
0
        if (n == 0) {
1356
0
            GHASH(ctx, ctx->Xn, mres);
1357
0
            mres = 0;
1358
0
        } else {
1359
0
            ctx->mres = mres;
1360
0
            return 0;
1361
0
        }
1362
# else
1363
        while (n && len) {
1364
            u8 c = *(in++);
1365
            *(out++) = c ^ ctx->EKi.c[n];
1366
            ctx->Xi.c[n] ^= c;
1367
            --len;
1368
            n = (n + 1) % 16;
1369
        }
1370
        if (n == 0) {
1371
            GCM_MUL(ctx);
1372
            mres = 0;
1373
        } else {
1374
            ctx->mres = n;
1375
            return 0;
1376
        }
1377
# endif
1378
0
    }
1379
604k
# if defined(GHASH)
1380
604k
    if (len >= 16 && mres) {
1381
0
        GHASH(ctx, ctx->Xn, mres);
1382
0
        mres = 0;
1383
0
    }
1384
604k
#  if defined(GHASH_CHUNK)
1385
604k
    while (len >= GHASH_CHUNK) {
1386
0
        GHASH(ctx, in, GHASH_CHUNK);
1387
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1388
0
        ctr += GHASH_CHUNK / 16;
1389
0
        if (IS_LITTLE_ENDIAN)
1390
#   ifdef BSWAP4
1391
            ctx->Yi.d[3] = BSWAP4(ctr);
1392
#   else
1393
0
            PUTU32(ctx->Yi.c + 12, ctr);
1394
0
#   endif
1395
0
        else
1396
0
            ctx->Yi.d[3] = ctr;
1397
0
        out += GHASH_CHUNK;
1398
0
        in += GHASH_CHUNK;
1399
0
        len -= GHASH_CHUNK;
1400
0
    }
1401
604k
#  endif
1402
604k
# endif
1403
604k
    if ((i = (len & (size_t)-16))) {
1404
219k
        size_t j = i / 16;
1405
1406
219k
# if defined(GHASH)
1407
219k
        GHASH(ctx, in, i);
1408
# else
1409
        while (j--) {
1410
            size_t k;
1411
            for (k = 0; k < 16; ++k)
1412
                ctx->Xi.c[k] ^= in[k];
1413
            GCM_MUL(ctx);
1414
            in += 16;
1415
        }
1416
        j = i / 16;
1417
        in -= i;
1418
# endif
1419
219k
        (*stream) (in, out, j, key, ctx->Yi.c);
1420
219k
        ctr += (unsigned int)j;
1421
219k
        if (IS_LITTLE_ENDIAN)
1422
# ifdef BSWAP4
1423
            ctx->Yi.d[3] = BSWAP4(ctr);
1424
# else
1425
219k
            PUTU32(ctx->Yi.c + 12, ctr);
1426
0
# endif
1427
0
        else
1428
0
            ctx->Yi.d[3] = ctr;
1429
219k
        out += i;
1430
219k
        in += i;
1431
219k
        len -= i;
1432
219k
    }
1433
604k
    if (len) {
1434
593k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1435
593k
        ++ctr;
1436
593k
        if (IS_LITTLE_ENDIAN)
1437
# ifdef BSWAP4
1438
            ctx->Yi.d[3] = BSWAP4(ctr);
1439
# else
1440
593k
            PUTU32(ctx->Yi.c + 12, ctr);
1441
0
# endif
1442
0
        else
1443
0
            ctx->Yi.d[3] = ctr;
1444
2.53M
        while (len--) {
1445
1.93M
# if defined(GHASH)
1446
1.93M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1447
# else
1448
            u8 c = in[n];
1449
            ctx->Xi.c[mres++] ^= c;
1450
            out[n] = c ^ ctx->EKi.c[n];
1451
# endif
1452
1.93M
            ++n;
1453
1.93M
        }
1454
593k
    }
1455
1456
604k
    ctx->mres = mres;
1457
604k
    return 0;
1458
604k
#endif
1459
604k
}
1460
1461
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1462
                         size_t len)
1463
1.50M
{
1464
1.50M
    DECLARE_IS_ENDIAN;
1465
1.50M
    u64 alen = ctx->len.u[0] << 3;
1466
1.50M
    u64 clen = ctx->len.u[1] << 3;
1467
1468
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1469
1.50M
    u128 bitlen;
1470
1.50M
    unsigned int mres = ctx->mres;
1471
1472
1.50M
    if (mres) {
1473
1.37M
        unsigned blocks = (mres + 15) & -16;
1474
1475
1.37M
        memset(ctx->Xn + mres, 0, blocks - mres);
1476
1.37M
        mres = blocks;
1477
1.37M
        if (mres == sizeof(ctx->Xn)) {
1478
0
            GHASH(ctx, ctx->Xn, mres);
1479
0
            mres = 0;
1480
0
        }
1481
1.37M
    } else if (ctx->ares) {
1482
93.6k
        GCM_MUL(ctx);
1483
93.6k
    }
1484
#else
1485
    if (ctx->mres || ctx->ares)
1486
        GCM_MUL(ctx);
1487
#endif
1488
1489
1.50M
    if (IS_LITTLE_ENDIAN) {
1490
#ifdef BSWAP8
1491
        alen = BSWAP8(alen);
1492
        clen = BSWAP8(clen);
1493
#else
1494
1.50M
        u8 *p = ctx->len.c;
1495
1496
1.50M
        ctx->len.u[0] = alen;
1497
1.50M
        ctx->len.u[1] = clen;
1498
1499
1.50M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1500
1.50M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1501
1.50M
#endif
1502
1.50M
    }
1503
1504
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1505
1.50M
    bitlen.hi = alen;
1506
1.50M
    bitlen.lo = clen;
1507
1.50M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1508
1.50M
    mres += sizeof(bitlen);
1509
1.50M
    GHASH(ctx, ctx->Xn, mres);
1510
#else
1511
    ctx->Xi.u[0] ^= alen;
1512
    ctx->Xi.u[1] ^= clen;
1513
    GCM_MUL(ctx);
1514
#endif
1515
1516
1.50M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1517
1.50M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1518
1519
1.50M
    if (tag && len <= sizeof(ctx->Xi))
1520
613k
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1521
887k
    else
1522
887k
        return -1;
1523
1.50M
}
1524
1525
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1526
887k
{
1527
887k
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1528
887k
    memcpy(tag, ctx->Xi.c,
1529
887k
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1530
887k
}
1531
1532
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1533
0
{
1534
0
    GCM128_CONTEXT *ret;
1535
1536
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1537
0
        CRYPTO_gcm128_init(ret, key, block);
1538
1539
0
    return ret;
1540
0
}
1541
1542
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1543
0
{
1544
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1545
0
}