Coverage Report

Created: 2024-07-27 06:39

/src/openssl31/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
/* RISC-V uses C implementation of gmult as a fallback. */
31
#if defined(__riscv)
32
# define INCLUDE_C_GMULT_4BIT
33
#endif
34
35
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
36
0
#define REDUCE1BIT(V)   do { \
37
0
        if (sizeof(size_t)==8) { \
38
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^T; \
41
0
        } \
42
0
        else { \
43
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
45
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
46
0
        } \
47
0
} while(0)
48
49
/*-
50
 *
51
 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52
 *
53
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54
 * never be set to 8. 8 is effectively reserved for testing purposes.
55
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57
 * whole spectrum of possible table driven implementations. Why? In
58
 * non-"Shoup's" case memory access pattern is segmented in such manner,
59
 * that it's trivial to see that cache timing information can reveal
60
 * fair portion of intermediate hash value. Given that ciphertext is
61
 * always available to attacker, it's possible for him to attempt to
62
 * deduce secret parameter H and if successful, tamper with messages
63
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64
 * not as trivial, but there is no reason to believe that it's resistant
65
 * to cache-timing attack. And the thing about "8-bit" implementation is
66
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67
 * key + 1KB shared. Well, on pros side it should be twice as fast as
68
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69
 * was observed to run ~75% faster, closer to 100% for commercial
70
 * compilers... Yet "4-bit" procedure is preferred, because it's
71
 * believed to provide better security-performance balance and adequate
72
 * all-round performance. "All-round" refers to things like:
73
 *
74
 * - shorter setup time effectively improves overall timing for
75
 *   handling short messages;
76
 * - larger table allocation can become unbearable because of VM
77
 *   subsystem penalties (for example on Windows large enough free
78
 *   results in VM working set trimming, meaning that consequent
79
 *   malloc would immediately incur working set expansion);
80
 * - larger table has larger cache footprint, which can affect
81
 *   performance of other code paths (not necessarily even from same
82
 *   thread in Hyper-Threading world);
83
 *
84
 * Value of 1 is not appropriate for performance reasons.
85
 */
86
87
static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
88
0
{
89
0
    u128 V;
90
# if defined(OPENSSL_SMALL_FOOTPRINT)
91
    int i;
92
# endif
93
94
0
    Htable[0].hi = 0;
95
0
    Htable[0].lo = 0;
96
0
    V.hi = H[0];
97
0
    V.lo = H[1];
98
99
# if defined(OPENSSL_SMALL_FOOTPRINT)
100
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101
        REDUCE1BIT(V);
102
        Htable[i] = V;
103
    }
104
105
    for (i = 2; i < 16; i <<= 1) {
106
        u128 *Hi = Htable + i;
107
        int j;
108
        for (V = *Hi, j = 1; j < i; ++j) {
109
            Hi[j].hi = V.hi ^ Htable[j].hi;
110
            Hi[j].lo = V.lo ^ Htable[j].lo;
111
        }
112
    }
113
# else
114
0
    Htable[8] = V;
115
0
    REDUCE1BIT(V);
116
0
    Htable[4] = V;
117
0
    REDUCE1BIT(V);
118
0
    Htable[2] = V;
119
0
    REDUCE1BIT(V);
120
0
    Htable[1] = V;
121
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122
0
    V = Htable[4];
123
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126
0
    V = Htable[8];
127
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134
0
# endif
135
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136
    /*
137
     * ARM assembler expects specific dword order in Htable.
138
     */
139
    {
140
        int j;
141
        DECLARE_IS_ENDIAN;
142
143
        if (IS_LITTLE_ENDIAN)
144
            for (j = 0; j < 16; ++j) {
145
                V = Htable[j];
146
                Htable[j].hi = V.lo;
147
                Htable[j].lo = V.hi;
148
        } else
149
            for (j = 0; j < 16; ++j) {
150
                V = Htable[j];
151
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
152
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
153
            }
154
    }
155
# endif
156
0
}
157
158
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159
static const size_t rem_4bit[16] = {
160
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164
};
165
166
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
167
{
168
    u128 Z;
169
    int cnt = 15;
170
    size_t rem, nlo, nhi;
171
    DECLARE_IS_ENDIAN;
172
173
    nlo = ((const u8 *)Xi)[15];
174
    nhi = nlo >> 4;
175
    nlo &= 0xf;
176
177
    Z.hi = Htable[nlo].hi;
178
    Z.lo = Htable[nlo].lo;
179
180
    while (1) {
181
        rem = (size_t)Z.lo & 0xf;
182
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183
        Z.hi = (Z.hi >> 4);
184
        if (sizeof(size_t) == 8)
185
            Z.hi ^= rem_4bit[rem];
186
        else
187
            Z.hi ^= (u64)rem_4bit[rem] << 32;
188
189
        Z.hi ^= Htable[nhi].hi;
190
        Z.lo ^= Htable[nhi].lo;
191
192
        if (--cnt < 0)
193
            break;
194
195
        nlo = ((const u8 *)Xi)[cnt];
196
        nhi = nlo >> 4;
197
        nlo &= 0xf;
198
199
        rem = (size_t)Z.lo & 0xf;
200
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201
        Z.hi = (Z.hi >> 4);
202
        if (sizeof(size_t) == 8)
203
            Z.hi ^= rem_4bit[rem];
204
        else
205
            Z.hi ^= (u64)rem_4bit[rem] << 32;
206
207
        Z.hi ^= Htable[nlo].hi;
208
        Z.lo ^= Htable[nlo].lo;
209
    }
210
211
    if (IS_LITTLE_ENDIAN) {
212
#  ifdef BSWAP8
213
        Xi[0] = BSWAP8(Z.hi);
214
        Xi[1] = BSWAP8(Z.lo);
215
#  else
216
        u8 *p = (u8 *)Xi;
217
        u32 v;
218
        v = (u32)(Z.hi >> 32);
219
        PUTU32(p, v);
220
        v = (u32)(Z.hi);
221
        PUTU32(p + 4, v);
222
        v = (u32)(Z.lo >> 32);
223
        PUTU32(p + 8, v);
224
        v = (u32)(Z.lo);
225
        PUTU32(p + 12, v);
226
#  endif
227
    } else {
228
        Xi[0] = Z.hi;
229
        Xi[1] = Z.lo;
230
    }
231
}
232
233
# endif
234
235
# if !defined(GHASH_ASM)
236
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
237
/*
238
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239
 * details... Compiler-generated code doesn't seem to give any
240
 * performance improvement, at least not on x86[_64]. It's here
241
 * mostly as reference and a placeholder for possible future
242
 * non-trivial optimization[s]...
243
 */
244
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245
                           const u8 *inp, size_t len)
246
{
247
    u128 Z;
248
    int cnt;
249
    size_t rem, nlo, nhi;
250
    DECLARE_IS_ENDIAN;
251
252
    do {
253
        cnt = 15;
254
        nlo = ((const u8 *)Xi)[15];
255
        nlo ^= inp[15];
256
        nhi = nlo >> 4;
257
        nlo &= 0xf;
258
259
        Z.hi = Htable[nlo].hi;
260
        Z.lo = Htable[nlo].lo;
261
262
        while (1) {
263
            rem = (size_t)Z.lo & 0xf;
264
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265
            Z.hi = (Z.hi >> 4);
266
            if (sizeof(size_t) == 8)
267
                Z.hi ^= rem_4bit[rem];
268
            else
269
                Z.hi ^= (u64)rem_4bit[rem] << 32;
270
271
            Z.hi ^= Htable[nhi].hi;
272
            Z.lo ^= Htable[nhi].lo;
273
274
            if (--cnt < 0)
275
                break;
276
277
            nlo = ((const u8 *)Xi)[cnt];
278
            nlo ^= inp[cnt];
279
            nhi = nlo >> 4;
280
            nlo &= 0xf;
281
282
            rem = (size_t)Z.lo & 0xf;
283
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284
            Z.hi = (Z.hi >> 4);
285
            if (sizeof(size_t) == 8)
286
                Z.hi ^= rem_4bit[rem];
287
            else
288
                Z.hi ^= (u64)rem_4bit[rem] << 32;
289
290
            Z.hi ^= Htable[nlo].hi;
291
            Z.lo ^= Htable[nlo].lo;
292
        }
293
294
        if (IS_LITTLE_ENDIAN) {
295
#   ifdef BSWAP8
296
            Xi[0] = BSWAP8(Z.hi);
297
            Xi[1] = BSWAP8(Z.lo);
298
#   else
299
            u8 *p = (u8 *)Xi;
300
            u32 v;
301
            v = (u32)(Z.hi >> 32);
302
            PUTU32(p, v);
303
            v = (u32)(Z.hi);
304
            PUTU32(p + 4, v);
305
            v = (u32)(Z.lo >> 32);
306
            PUTU32(p + 8, v);
307
            v = (u32)(Z.lo);
308
            PUTU32(p + 12, v);
309
#   endif
310
        } else {
311
            Xi[0] = Z.hi;
312
            Xi[1] = Z.lo;
313
        }
314
    } while (inp += 16, len -= 16);
315
}
316
#  endif
317
# else
318
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
319
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
320
                    size_t len);
321
# endif
322
323
377k
# define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
324
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
325
2.12M
#  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
326
/*
327
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
328
 * effect. In other words idea is to hash data while it's still in L1 cache
329
 * after encryption pass...
330
 */
331
1.28M
#  define GHASH_CHUNK       (3*1024)
332
# endif
333
334
#if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
335
# if    !defined(I386_ONLY) && \
336
        (defined(__i386)        || defined(__i386__)    || \
337
         defined(__x86_64)      || defined(__x86_64__)  || \
338
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
339
#  define GHASH_ASM_X86_OR_64
340
341
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
342
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
343
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
344
                     size_t len);
345
346
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
347
#   define gcm_init_avx   gcm_init_clmul
348
#   define gcm_gmult_avx  gcm_gmult_clmul
349
#   define gcm_ghash_avx  gcm_ghash_clmul
350
#  else
351
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
352
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
353
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
354
                   size_t len);
355
#  endif
356
357
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
358
#   define GHASH_ASM_X86
359
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
360
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
361
                        size_t len);
362
363
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
364
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365
                        size_t len);
366
#  endif
367
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
368
#  include "arm_arch.h"
369
#  if __ARM_MAX_ARCH__>=7
370
#   define GHASH_ASM_ARM
371
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
372
#   if defined(__arm__) || defined(__arm)
373
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
374
#   endif
375
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
376
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
377
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
378
                    size_t len);
379
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
380
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
381
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382
                  size_t len);
383
#  endif
384
# elif defined(__sparc__) || defined(__sparc)
385
#  include "crypto/sparc_arch.h"
386
#  define GHASH_ASM_SPARC
387
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
388
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
389
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
390
                    size_t len);
391
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
392
#  include "crypto/ppc_arch.h"
393
#  define GHASH_ASM_PPC
394
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
395
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
396
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
397
                  size_t len);
398
# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
399
#  include "crypto/riscv_arch.h"
400
#  define GHASH_ASM_RISCV
401
#  undef  GHASH
402
void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
403
void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
404
# endif
405
#endif
406
407
static void gcm_get_funcs(struct gcm_funcs_st *ctx)
408
251k
{
409
    /* set defaults -- overridden below as needed */
410
251k
    ctx->ginit = gcm_init_4bit;
411
#if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
412
    ctx->gmult = gcm_gmult_4bit;
413
#else
414
251k
    ctx->gmult = NULL;
415
251k
#endif
416
#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
417
    ctx->ghash = gcm_ghash_4bit;
418
#else
419
251k
    ctx->ghash = NULL;
420
251k
#endif
421
422
251k
#if defined(GHASH_ASM_X86_OR_64)
423
251k
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
424
    /* x86_64 */
425
251k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
426
251k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
427
251k
            ctx->ginit = gcm_init_avx;
428
251k
            ctx->gmult = gcm_gmult_avx;
429
251k
            ctx->ghash = gcm_ghash_avx;
430
251k
        } else {
431
0
            ctx->ginit = gcm_init_clmul;
432
0
            ctx->gmult = gcm_gmult_clmul;
433
0
            ctx->ghash = gcm_ghash_clmul;
434
0
        }
435
251k
        return;
436
251k
    }
437
0
# endif
438
# if defined(GHASH_ASM_X86)
439
    /* x86 only */
440
#  if defined(OPENSSL_IA32_SSE2)
441
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
442
        ctx->gmult = gcm_gmult_4bit_mmx;
443
        ctx->ghash = gcm_ghash_4bit_mmx;
444
        return;
445
    }
446
#  else
447
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
448
        ctx->gmult = gcm_gmult_4bit_mmx;
449
        ctx->ghash = gcm_ghash_4bit_mmx;
450
        return;
451
    }
452
#  endif
453
    ctx->gmult = gcm_gmult_4bit_x86;
454
    ctx->ghash = gcm_ghash_4bit_x86;
455
    return;
456
# else
457
    /* x86_64 fallback defaults */
458
0
    ctx->gmult = gcm_gmult_4bit;
459
0
    ctx->ghash = gcm_ghash_4bit;
460
0
    return;
461
251k
# endif
462
#elif defined(GHASH_ASM_ARM)
463
    /* ARM defaults */
464
    ctx->gmult = gcm_gmult_4bit;
465
    ctx->ghash = gcm_ghash_4bit;
466
# ifdef PMULL_CAPABLE
467
    if (PMULL_CAPABLE) {
468
        ctx->ginit = (gcm_init_fn)gcm_init_v8;
469
        ctx->gmult = gcm_gmult_v8;
470
        ctx->ghash = gcm_ghash_v8;
471
    }
472
# elif defined(NEON_CAPABLE)
473
    if (NEON_CAPABLE) {
474
        ctx->ginit = gcm_init_neon;
475
        ctx->gmult = gcm_gmult_neon;
476
        ctx->ghash = gcm_ghash_neon;
477
    }
478
# endif
479
    return;
480
#elif defined(GHASH_ASM_SPARC)
481
    /* SPARC defaults */
482
    ctx->gmult = gcm_gmult_4bit;
483
    ctx->ghash = gcm_ghash_4bit;
484
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
485
        ctx->ginit = gcm_init_vis3;
486
        ctx->gmult = gcm_gmult_vis3;
487
        ctx->ghash = gcm_ghash_vis3;
488
    }
489
    return;
490
#elif defined(GHASH_ASM_PPC)
491
    /* PowerPC does not define GHASH_ASM; defaults set above */
492
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
493
        ctx->ginit = gcm_init_p8;
494
        ctx->gmult = gcm_gmult_p8;
495
        ctx->ghash = gcm_ghash_p8;
496
    }
497
    return;
498
#elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
499
    /* RISCV defaults; gmult already set above */
500
    ctx->ghash = NULL;
501
    if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
502
        ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
503
        ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
504
    }
505
    return;
506
#elif defined(GHASH_ASM)
507
    /* all other architectures use the generic names */
508
    ctx->gmult = gcm_gmult_4bit;
509
    ctx->ghash = gcm_ghash_4bit;
510
    return;
511
#endif
512
251k
}
513
514
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
515
251k
{
516
251k
    DECLARE_IS_ENDIAN;
517
518
251k
    memset(ctx, 0, sizeof(*ctx));
519
251k
    ctx->block = block;
520
251k
    ctx->key = key;
521
522
251k
    (*block) (ctx->H.c, ctx->H.c, key);
523
524
251k
    if (IS_LITTLE_ENDIAN) {
525
        /* H is stored in host byte order */
526
#ifdef BSWAP8
527
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
528
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
529
#else
530
251k
        u8 *p = ctx->H.c;
531
251k
        u64 hi, lo;
532
251k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
533
251k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
534
251k
        ctx->H.u[0] = hi;
535
251k
        ctx->H.u[1] = lo;
536
251k
#endif
537
251k
    }
538
539
251k
    gcm_get_funcs(&ctx->funcs);
540
251k
    ctx->funcs.ginit(ctx->Htable, ctx->H.u);
541
251k
}
542
543
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
544
                         size_t len)
545
969k
{
546
969k
    DECLARE_IS_ENDIAN;
547
969k
    unsigned int ctr;
548
549
969k
    ctx->len.u[0] = 0;          /* AAD length */
550
969k
    ctx->len.u[1] = 0;          /* message length */
551
969k
    ctx->ares = 0;
552
969k
    ctx->mres = 0;
553
554
969k
    if (len == 12) {
555
969k
        memcpy(ctx->Yi.c, iv, 12);
556
969k
        ctx->Yi.c[12] = 0;
557
969k
        ctx->Yi.c[13] = 0;
558
969k
        ctx->Yi.c[14] = 0;
559
969k
        ctx->Yi.c[15] = 1;
560
969k
        ctr = 1;
561
969k
    } else {
562
0
        size_t i;
563
0
        u64 len0 = len;
564
565
        /* Borrow ctx->Xi to calculate initial Yi */
566
0
        ctx->Xi.u[0] = 0;
567
0
        ctx->Xi.u[1] = 0;
568
569
0
        while (len >= 16) {
570
0
            for (i = 0; i < 16; ++i)
571
0
                ctx->Xi.c[i] ^= iv[i];
572
0
            GCM_MUL(ctx);
573
0
            iv += 16;
574
0
            len -= 16;
575
0
        }
576
0
        if (len) {
577
0
            for (i = 0; i < len; ++i)
578
0
                ctx->Xi.c[i] ^= iv[i];
579
0
            GCM_MUL(ctx);
580
0
        }
581
0
        len0 <<= 3;
582
0
        if (IS_LITTLE_ENDIAN) {
583
#ifdef BSWAP8
584
            ctx->Xi.u[1] ^= BSWAP8(len0);
585
#else
586
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
587
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
588
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
589
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
590
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
591
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
592
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
593
0
            ctx->Xi.c[15] ^= (u8)(len0);
594
0
#endif
595
0
        } else {
596
0
            ctx->Xi.u[1] ^= len0;
597
0
        }
598
599
0
        GCM_MUL(ctx);
600
601
0
        if (IS_LITTLE_ENDIAN)
602
#ifdef BSWAP4
603
            ctr = BSWAP4(ctx->Xi.d[3]);
604
#else
605
0
            ctr = GETU32(ctx->Xi.c + 12);
606
0
#endif
607
0
        else
608
0
            ctr = ctx->Xi.d[3];
609
610
        /* Copy borrowed Xi to Yi */
611
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
612
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
613
0
    }
614
615
969k
    ctx->Xi.u[0] = 0;
616
969k
    ctx->Xi.u[1] = 0;
617
618
969k
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
619
969k
    ++ctr;
620
969k
    if (IS_LITTLE_ENDIAN)
621
#ifdef BSWAP4
622
        ctx->Yi.d[3] = BSWAP4(ctr);
623
#else
624
969k
        PUTU32(ctx->Yi.c + 12, ctr);
625
0
#endif
626
0
    else
627
0
        ctx->Yi.d[3] = ctr;
628
969k
}
629
630
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
631
                      size_t len)
632
1.12M
{
633
1.12M
    size_t i;
634
1.12M
    unsigned int n;
635
1.12M
    u64 alen = ctx->len.u[0];
636
637
1.12M
    if (ctx->len.u[1])
638
0
        return -2;
639
640
1.12M
    alen += len;
641
1.12M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
642
0
        return -1;
643
1.12M
    ctx->len.u[0] = alen;
644
645
1.12M
    n = ctx->ares;
646
1.12M
    if (n) {
647
394k
        while (n && len) {
648
252k
            ctx->Xi.c[n] ^= *(aad++);
649
252k
            --len;
650
252k
            n = (n + 1) % 16;
651
252k
        }
652
141k
        if (n == 0)
653
9.30k
            GCM_MUL(ctx);
654
132k
        else {
655
132k
            ctx->ares = n;
656
132k
            return 0;
657
132k
        }
658
141k
    }
659
990k
#ifdef GHASH
660
990k
    if ((i = (len & (size_t)-16))) {
661
463k
        GHASH(ctx, aad, i);
662
463k
        aad += i;
663
463k
        len -= i;
664
463k
    }
665
#else
666
    while (len >= 16) {
667
        for (i = 0; i < 16; ++i)
668
            ctx->Xi.c[i] ^= aad[i];
669
        GCM_MUL(ctx);
670
        aad += 16;
671
        len -= 16;
672
    }
673
#endif
674
990k
    if (len) {
675
881k
        n = (unsigned int)len;
676
9.80M
        for (i = 0; i < len; ++i)
677
8.91M
            ctx->Xi.c[i] ^= aad[i];
678
881k
    }
679
680
990k
    ctx->ares = n;
681
990k
    return 0;
682
1.12M
}
683
684
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
685
                          const unsigned char *in, unsigned char *out,
686
                          size_t len)
687
302k
{
688
302k
    DECLARE_IS_ENDIAN;
689
302k
    unsigned int n, ctr, mres;
690
302k
    size_t i;
691
302k
    u64 mlen = ctx->len.u[1];
692
302k
    block128_f block = ctx->block;
693
302k
    void *key = ctx->key;
694
695
302k
    mlen += len;
696
302k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
697
0
        return -1;
698
302k
    ctx->len.u[1] = mlen;
699
700
302k
    mres = ctx->mres;
701
702
302k
    if (ctx->ares) {
703
        /* First call to encrypt finalizes GHASH(AAD) */
704
4.37k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
705
4.37k
        if (len == 0) {
706
3.93k
            GCM_MUL(ctx);
707
3.93k
            ctx->ares = 0;
708
3.93k
            return 0;
709
3.93k
        }
710
438
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
711
438
        ctx->Xi.u[0] = 0;
712
438
        ctx->Xi.u[1] = 0;
713
438
        mres = sizeof(ctx->Xi);
714
#else
715
        GCM_MUL(ctx);
716
#endif
717
438
        ctx->ares = 0;
718
438
    }
719
720
298k
    if (IS_LITTLE_ENDIAN)
721
#ifdef BSWAP4
722
        ctr = BSWAP4(ctx->Yi.d[3]);
723
#else
724
298k
        ctr = GETU32(ctx->Yi.c + 12);
725
0
#endif
726
0
    else
727
0
        ctr = ctx->Yi.d[3];
728
729
298k
    n = mres % 16;
730
298k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
731
298k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
732
298k
        do {
733
298k
            if (n) {
734
292k
# if defined(GHASH)
735
4.12M
                while (n && len) {
736
3.83M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
737
3.83M
                    --len;
738
3.83M
                    n = (n + 1) % 16;
739
3.83M
                }
740
292k
                if (n == 0) {
741
292k
                    GHASH(ctx, ctx->Xn, mres);
742
292k
                    mres = 0;
743
292k
                } else {
744
0
                    ctx->mres = mres;
745
0
                    return 0;
746
0
                }
747
# else
748
                while (n && len) {
749
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
750
                    --len;
751
                    n = (n + 1) % 16;
752
                }
753
                if (n == 0) {
754
                    GCM_MUL(ctx);
755
                    mres = 0;
756
                } else {
757
                    ctx->mres = n;
758
                    return 0;
759
                }
760
# endif
761
292k
            }
762
298k
# if defined(STRICT_ALIGNMENT)
763
298k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
764
296k
                break;
765
2.23k
# endif
766
2.23k
# if defined(GHASH)
767
2.23k
            if (len >= 16 && mres) {
768
228
                GHASH(ctx, ctx->Xn, mres);
769
228
                mres = 0;
770
228
            }
771
2.23k
#  if defined(GHASH_CHUNK)
772
2.23k
            while (len >= GHASH_CHUNK) {
773
0
                size_t j = GHASH_CHUNK;
774
775
0
                while (j) {
776
0
                    size_t_aX *out_t = (size_t_aX *)out;
777
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
778
779
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
780
0
                    ++ctr;
781
0
                    if (IS_LITTLE_ENDIAN)
782
#   ifdef BSWAP4
783
                        ctx->Yi.d[3] = BSWAP4(ctr);
784
#   else
785
0
                        PUTU32(ctx->Yi.c + 12, ctr);
786
0
#   endif
787
0
                    else
788
0
                        ctx->Yi.d[3] = ctr;
789
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
790
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
791
0
                    out += 16;
792
0
                    in += 16;
793
0
                    j -= 16;
794
0
                }
795
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
796
0
                len -= GHASH_CHUNK;
797
0
            }
798
2.23k
#  endif
799
2.23k
            if ((i = (len & (size_t)-16))) {
800
228
                size_t j = i;
801
802
456
                while (len >= 16) {
803
228
                    size_t_aX *out_t = (size_t_aX *)out;
804
228
                    const size_t_aX *in_t = (const size_t_aX *)in;
805
806
228
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
807
228
                    ++ctr;
808
228
                    if (IS_LITTLE_ENDIAN)
809
#  ifdef BSWAP4
810
                        ctx->Yi.d[3] = BSWAP4(ctr);
811
#  else
812
228
                        PUTU32(ctx->Yi.c + 12, ctr);
813
0
#  endif
814
0
                    else
815
0
                        ctx->Yi.d[3] = ctr;
816
684
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
817
456
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
818
228
                    out += 16;
819
228
                    in += 16;
820
228
                    len -= 16;
821
228
                }
822
228
                GHASH(ctx, out - j, j);
823
228
            }
824
# else
825
            while (len >= 16) {
826
                size_t *out_t = (size_t *)out;
827
                const size_t *in_t = (const size_t *)in;
828
829
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
830
                ++ctr;
831
                if (IS_LITTLE_ENDIAN)
832
#  ifdef BSWAP4
833
                    ctx->Yi.d[3] = BSWAP4(ctr);
834
#  else
835
                    PUTU32(ctx->Yi.c + 12, ctr);
836
#  endif
837
                else
838
                    ctx->Yi.d[3] = ctr;
839
                for (i = 0; i < 16 / sizeof(size_t); ++i)
840
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
841
                GCM_MUL(ctx);
842
                out += 16;
843
                in += 16;
844
                len -= 16;
845
            }
846
# endif
847
2.23k
            if (len) {
848
210
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
849
210
                ++ctr;
850
210
                if (IS_LITTLE_ENDIAN)
851
# ifdef BSWAP4
852
                    ctx->Yi.d[3] = BSWAP4(ctr);
853
# else
854
210
                    PUTU32(ctx->Yi.c + 12, ctr);
855
0
# endif
856
0
                else
857
0
                    ctx->Yi.d[3] = ctr;
858
210
# if defined(GHASH)
859
630
                while (len--) {
860
420
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
861
420
                    ++n;
862
420
                }
863
# else
864
                while (len--) {
865
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
866
                    ++n;
867
                }
868
                mres = n;
869
# endif
870
210
            }
871
872
2.23k
            ctx->mres = mres;
873
2.23k
            return 0;
874
298k
        } while (0);
875
298k
    }
876
296k
#endif
877
296k
    for (i = 0; i < len; ++i) {
878
0
        if (n == 0) {
879
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
880
0
            ++ctr;
881
0
            if (IS_LITTLE_ENDIAN)
882
#ifdef BSWAP4
883
                ctx->Yi.d[3] = BSWAP4(ctr);
884
#else
885
0
                PUTU32(ctx->Yi.c + 12, ctr);
886
0
#endif
887
0
            else
888
0
                ctx->Yi.d[3] = ctr;
889
0
        }
890
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
891
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
892
0
        n = (n + 1) % 16;
893
0
        if (mres == sizeof(ctx->Xn)) {
894
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
895
0
            mres = 0;
896
0
        }
897
#else
898
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
899
        mres = n = (n + 1) % 16;
900
        if (n == 0)
901
            GCM_MUL(ctx);
902
#endif
903
0
    }
904
905
296k
    ctx->mres = mres;
906
296k
    return 0;
907
298k
}
908
909
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
910
                          const unsigned char *in, unsigned char *out,
911
                          size_t len)
912
168k
{
913
168k
    DECLARE_IS_ENDIAN;
914
168k
    unsigned int n, ctr, mres;
915
168k
    size_t i;
916
168k
    u64 mlen = ctx->len.u[1];
917
168k
    block128_f block = ctx->block;
918
168k
    void *key = ctx->key;
919
920
168k
    mlen += len;
921
168k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
922
0
        return -1;
923
168k
    ctx->len.u[1] = mlen;
924
925
168k
    mres = ctx->mres;
926
927
168k
    if (ctx->ares) {
928
        /* First call to decrypt finalizes GHASH(AAD) */
929
83.4k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
930
83.4k
        if (len == 0) {
931
78.1k
            GCM_MUL(ctx);
932
78.1k
            ctx->ares = 0;
933
78.1k
            return 0;
934
78.1k
        }
935
5.25k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
936
5.25k
        ctx->Xi.u[0] = 0;
937
5.25k
        ctx->Xi.u[1] = 0;
938
5.25k
        mres = sizeof(ctx->Xi);
939
#else
940
        GCM_MUL(ctx);
941
#endif
942
5.25k
        ctx->ares = 0;
943
5.25k
    }
944
945
90.5k
    if (IS_LITTLE_ENDIAN)
946
#ifdef BSWAP4
947
        ctr = BSWAP4(ctx->Yi.d[3]);
948
#else
949
90.5k
        ctr = GETU32(ctx->Yi.c + 12);
950
0
#endif
951
0
    else
952
0
        ctr = ctx->Yi.d[3];
953
954
90.5k
    n = mres % 16;
955
90.5k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
956
90.5k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
957
90.5k
        do {
958
90.5k
            if (n) {
959
0
# if defined(GHASH)
960
0
                while (n && len) {
961
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
962
0
                    --len;
963
0
                    n = (n + 1) % 16;
964
0
                }
965
0
                if (n == 0) {
966
0
                    GHASH(ctx, ctx->Xn, mres);
967
0
                    mres = 0;
968
0
                } else {
969
0
                    ctx->mres = mres;
970
0
                    return 0;
971
0
                }
972
# else
973
                while (n && len) {
974
                    u8 c = *(in++);
975
                    *(out++) = c ^ ctx->EKi.c[n];
976
                    ctx->Xi.c[n] ^= c;
977
                    --len;
978
                    n = (n + 1) % 16;
979
                }
980
                if (n == 0) {
981
                    GCM_MUL(ctx);
982
                    mres = 0;
983
                } else {
984
                    ctx->mres = n;
985
                    return 0;
986
                }
987
# endif
988
0
            }
989
90.5k
# if defined(STRICT_ALIGNMENT)
990
90.5k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
991
976
                break;
992
89.5k
# endif
993
89.5k
# if defined(GHASH)
994
89.5k
            if (len >= 16 && mres) {
995
1.26k
                GHASH(ctx, ctx->Xn, mres);
996
1.26k
                mres = 0;
997
1.26k
            }
998
89.5k
#  if defined(GHASH_CHUNK)
999
90.9k
            while (len >= GHASH_CHUNK) {
1000
1.37k
                size_t j = GHASH_CHUNK;
1001
1002
1.37k
                GHASH(ctx, in, GHASH_CHUNK);
1003
265k
                while (j) {
1004
264k
                    size_t_aX *out_t = (size_t_aX *)out;
1005
264k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1006
1007
264k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1008
264k
                    ++ctr;
1009
264k
                    if (IS_LITTLE_ENDIAN)
1010
#   ifdef BSWAP4
1011
                        ctx->Yi.d[3] = BSWAP4(ctr);
1012
#   else
1013
264k
                        PUTU32(ctx->Yi.c + 12, ctr);
1014
0
#   endif
1015
0
                    else
1016
0
                        ctx->Yi.d[3] = ctr;
1017
793k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1018
528k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1019
264k
                    out += 16;
1020
264k
                    in += 16;
1021
264k
                    j -= 16;
1022
264k
                }
1023
1.37k
                len -= GHASH_CHUNK;
1024
1.37k
            }
1025
89.5k
#  endif
1026
89.5k
            if ((i = (len & (size_t)-16))) {
1027
1.25k
                GHASH(ctx, in, i);
1028
22.9k
                while (len >= 16) {
1029
21.7k
                    size_t_aX *out_t = (size_t_aX *)out;
1030
21.7k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1031
1032
21.7k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1033
21.7k
                    ++ctr;
1034
21.7k
                    if (IS_LITTLE_ENDIAN)
1035
#  ifdef BSWAP4
1036
                        ctx->Yi.d[3] = BSWAP4(ctr);
1037
#  else
1038
21.7k
                        PUTU32(ctx->Yi.c + 12, ctr);
1039
0
#  endif
1040
0
                    else
1041
0
                        ctx->Yi.d[3] = ctr;
1042
65.1k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1043
43.4k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1044
21.7k
                    out += 16;
1045
21.7k
                    in += 16;
1046
21.7k
                    len -= 16;
1047
21.7k
                }
1048
1.25k
            }
1049
# else
1050
            while (len >= 16) {
1051
                size_t *out_t = (size_t *)out;
1052
                const size_t *in_t = (const size_t *)in;
1053
1054
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1055
                ++ctr;
1056
                if (IS_LITTLE_ENDIAN)
1057
#  ifdef BSWAP4
1058
                    ctx->Yi.d[3] = BSWAP4(ctr);
1059
#  else
1060
                    PUTU32(ctx->Yi.c + 12, ctr);
1061
#  endif
1062
                else
1063
                    ctx->Yi.d[3] = ctr;
1064
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1065
                    size_t c = in_t[i];
1066
                    out_t[i] = c ^ ctx->EKi.t[i];
1067
                    ctx->Xi.t[i] ^= c;
1068
                }
1069
                GCM_MUL(ctx);
1070
                out += 16;
1071
                in += 16;
1072
                len -= 16;
1073
            }
1074
# endif
1075
89.5k
            if (len) {
1076
4.95k
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1077
4.95k
                ++ctr;
1078
4.95k
                if (IS_LITTLE_ENDIAN)
1079
# ifdef BSWAP4
1080
                    ctx->Yi.d[3] = BSWAP4(ctr);
1081
# else
1082
4.95k
                    PUTU32(ctx->Yi.c + 12, ctr);
1083
0
# endif
1084
0
                else
1085
0
                    ctx->Yi.d[3] = ctr;
1086
4.95k
# if defined(GHASH)
1087
49.0k
                while (len--) {
1088
44.1k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1089
44.1k
                    ++n;
1090
44.1k
                }
1091
# else
1092
                while (len--) {
1093
                    u8 c = in[n];
1094
                    ctx->Xi.c[n] ^= c;
1095
                    out[n] = c ^ ctx->EKi.c[n];
1096
                    ++n;
1097
                }
1098
                mres = n;
1099
# endif
1100
4.95k
            }
1101
1102
89.5k
            ctx->mres = mres;
1103
89.5k
            return 0;
1104
90.5k
        } while (0);
1105
90.5k
    }
1106
976
#endif
1107
976
    for (i = 0; i < len; ++i) {
1108
0
        u8 c;
1109
0
        if (n == 0) {
1110
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1111
0
            ++ctr;
1112
0
            if (IS_LITTLE_ENDIAN)
1113
#ifdef BSWAP4
1114
                ctx->Yi.d[3] = BSWAP4(ctr);
1115
#else
1116
0
                PUTU32(ctx->Yi.c + 12, ctr);
1117
0
#endif
1118
0
            else
1119
0
                ctx->Yi.d[3] = ctr;
1120
0
        }
1121
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1122
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1123
0
        n = (n + 1) % 16;
1124
0
        if (mres == sizeof(ctx->Xn)) {
1125
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1126
0
            mres = 0;
1127
0
        }
1128
#else
1129
        c = in[i];
1130
        out[i] = c ^ ctx->EKi.c[n];
1131
        ctx->Xi.c[n] ^= c;
1132
        mres = n = (n + 1) % 16;
1133
        if (n == 0)
1134
            GCM_MUL(ctx);
1135
#endif
1136
0
    }
1137
1138
976
    ctx->mres = mres;
1139
976
    return 0;
1140
90.5k
}
1141
1142
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1143
                                const unsigned char *in, unsigned char *out,
1144
                                size_t len, ctr128_f stream)
1145
891k
{
1146
#if defined(OPENSSL_SMALL_FOOTPRINT)
1147
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1148
#else
1149
891k
    DECLARE_IS_ENDIAN;
1150
891k
    unsigned int n, ctr, mres;
1151
891k
    size_t i;
1152
891k
    u64 mlen = ctx->len.u[1];
1153
891k
    void *key = ctx->key;
1154
1155
891k
    mlen += len;
1156
891k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1157
0
        return -1;
1158
891k
    ctx->len.u[1] = mlen;
1159
1160
891k
    mres = ctx->mres;
1161
1162
891k
    if (ctx->ares) {
1163
        /* First call to encrypt finalizes GHASH(AAD) */
1164
429k
#if defined(GHASH)
1165
429k
        if (len == 0) {
1166
0
            GCM_MUL(ctx);
1167
0
            ctx->ares = 0;
1168
0
            return 0;
1169
0
        }
1170
429k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1171
429k
        ctx->Xi.u[0] = 0;
1172
429k
        ctx->Xi.u[1] = 0;
1173
429k
        mres = sizeof(ctx->Xi);
1174
#else
1175
        GCM_MUL(ctx);
1176
#endif
1177
429k
        ctx->ares = 0;
1178
429k
    }
1179
1180
891k
    if (IS_LITTLE_ENDIAN)
1181
# ifdef BSWAP4
1182
        ctr = BSWAP4(ctx->Yi.d[3]);
1183
# else
1184
891k
        ctr = GETU32(ctx->Yi.c + 12);
1185
0
# endif
1186
0
    else
1187
0
        ctr = ctx->Yi.d[3];
1188
1189
891k
    n = mres % 16;
1190
891k
    if (n) {
1191
150k
# if defined(GHASH)
1192
999k
        while (n && len) {
1193
849k
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1194
849k
            --len;
1195
849k
            n = (n + 1) % 16;
1196
849k
        }
1197
150k
        if (n == 0) {
1198
76.9k
            GHASH(ctx, ctx->Xn, mres);
1199
76.9k
            mres = 0;
1200
76.9k
        } else {
1201
73.2k
            ctx->mres = mres;
1202
73.2k
            return 0;
1203
73.2k
        }
1204
# else
1205
        while (n && len) {
1206
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1207
            --len;
1208
            n = (n + 1) % 16;
1209
        }
1210
        if (n == 0) {
1211
            GCM_MUL(ctx);
1212
            mres = 0;
1213
        } else {
1214
            ctx->mres = n;
1215
            return 0;
1216
        }
1217
# endif
1218
150k
    }
1219
817k
# if defined(GHASH)
1220
817k
        if (len >= 16 && mres) {
1221
26.7k
            GHASH(ctx, ctx->Xn, mres);
1222
26.7k
            mres = 0;
1223
26.7k
        }
1224
817k
#  if defined(GHASH_CHUNK)
1225
817k
    while (len >= GHASH_CHUNK) {
1226
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1227
0
        ctr += GHASH_CHUNK / 16;
1228
0
        if (IS_LITTLE_ENDIAN)
1229
#   ifdef BSWAP4
1230
            ctx->Yi.d[3] = BSWAP4(ctr);
1231
#   else
1232
0
            PUTU32(ctx->Yi.c + 12, ctr);
1233
0
#   endif
1234
0
        else
1235
0
            ctx->Yi.d[3] = ctr;
1236
0
        GHASH(ctx, out, GHASH_CHUNK);
1237
0
        out += GHASH_CHUNK;
1238
0
        in += GHASH_CHUNK;
1239
0
        len -= GHASH_CHUNK;
1240
0
    }
1241
817k
#  endif
1242
817k
# endif
1243
817k
    if ((i = (len & (size_t)-16))) {
1244
155k
        size_t j = i / 16;
1245
1246
155k
        (*stream) (in, out, j, key, ctx->Yi.c);
1247
155k
        ctr += (unsigned int)j;
1248
155k
        if (IS_LITTLE_ENDIAN)
1249
# ifdef BSWAP4
1250
            ctx->Yi.d[3] = BSWAP4(ctr);
1251
# else
1252
155k
            PUTU32(ctx->Yi.c + 12, ctr);
1253
0
# endif
1254
0
        else
1255
0
            ctx->Yi.d[3] = ctr;
1256
155k
        in += i;
1257
155k
        len -= i;
1258
155k
# if defined(GHASH)
1259
155k
        GHASH(ctx, out, i);
1260
155k
        out += i;
1261
# else
1262
        while (j--) {
1263
            for (i = 0; i < 16; ++i)
1264
                ctx->Xi.c[i] ^= out[i];
1265
            GCM_MUL(ctx);
1266
            out += 16;
1267
        }
1268
# endif
1269
155k
    }
1270
817k
    if (len) {
1271
801k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1272
801k
        ++ctr;
1273
801k
        if (IS_LITTLE_ENDIAN)
1274
# ifdef BSWAP4
1275
            ctx->Yi.d[3] = BSWAP4(ctr);
1276
# else
1277
801k
            PUTU32(ctx->Yi.c + 12, ctr);
1278
0
# endif
1279
0
        else
1280
0
            ctx->Yi.d[3] = ctr;
1281
3.73M
        while (len--) {
1282
2.93M
# if defined(GHASH)
1283
2.93M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1284
# else
1285
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1286
# endif
1287
2.93M
            ++n;
1288
2.93M
        }
1289
801k
    }
1290
1291
817k
    ctx->mres = mres;
1292
817k
    return 0;
1293
891k
#endif
1294
891k
}
1295
1296
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1297
                                const unsigned char *in, unsigned char *out,
1298
                                size_t len, ctr128_f stream)
1299
374k
{
1300
#if defined(OPENSSL_SMALL_FOOTPRINT)
1301
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1302
#else
1303
374k
    DECLARE_IS_ENDIAN;
1304
374k
    unsigned int n, ctr, mres;
1305
374k
    size_t i;
1306
374k
    u64 mlen = ctx->len.u[1];
1307
374k
    void *key = ctx->key;
1308
1309
374k
    mlen += len;
1310
374k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1311
0
        return -1;
1312
374k
    ctx->len.u[1] = mlen;
1313
1314
374k
    mres = ctx->mres;
1315
1316
374k
    if (ctx->ares) {
1317
        /* First call to decrypt finalizes GHASH(AAD) */
1318
211k
# if defined(GHASH)
1319
211k
        if (len == 0) {
1320
283
            GCM_MUL(ctx);
1321
283
            ctx->ares = 0;
1322
283
            return 0;
1323
283
        }
1324
210k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1325
210k
        ctx->Xi.u[0] = 0;
1326
210k
        ctx->Xi.u[1] = 0;
1327
210k
        mres = sizeof(ctx->Xi);
1328
# else
1329
        GCM_MUL(ctx);
1330
# endif
1331
210k
        ctx->ares = 0;
1332
210k
    }
1333
1334
374k
    if (IS_LITTLE_ENDIAN)
1335
# ifdef BSWAP4
1336
        ctr = BSWAP4(ctx->Yi.d[3]);
1337
# else
1338
374k
        ctr = GETU32(ctx->Yi.c + 12);
1339
0
# endif
1340
0
    else
1341
0
        ctr = ctx->Yi.d[3];
1342
1343
374k
    n = mres % 16;
1344
374k
    if (n) {
1345
0
# if defined(GHASH)
1346
0
        while (n && len) {
1347
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1348
0
            --len;
1349
0
            n = (n + 1) % 16;
1350
0
        }
1351
0
        if (n == 0) {
1352
0
            GHASH(ctx, ctx->Xn, mres);
1353
0
            mres = 0;
1354
0
        } else {
1355
0
            ctx->mres = mres;
1356
0
            return 0;
1357
0
        }
1358
# else
1359
        while (n && len) {
1360
            u8 c = *(in++);
1361
            *(out++) = c ^ ctx->EKi.c[n];
1362
            ctx->Xi.c[n] ^= c;
1363
            --len;
1364
            n = (n + 1) % 16;
1365
        }
1366
        if (n == 0) {
1367
            GCM_MUL(ctx);
1368
            mres = 0;
1369
        } else {
1370
            ctx->mres = n;
1371
            return 0;
1372
        }
1373
# endif
1374
0
    }
1375
374k
# if defined(GHASH)
1376
374k
    if (len >= 16 && mres) {
1377
0
        GHASH(ctx, ctx->Xn, mres);
1378
0
        mres = 0;
1379
0
    }
1380
374k
#  if defined(GHASH_CHUNK)
1381
374k
    while (len >= GHASH_CHUNK) {
1382
0
        GHASH(ctx, in, GHASH_CHUNK);
1383
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1384
0
        ctr += GHASH_CHUNK / 16;
1385
0
        if (IS_LITTLE_ENDIAN)
1386
#   ifdef BSWAP4
1387
            ctx->Yi.d[3] = BSWAP4(ctr);
1388
#   else
1389
0
            PUTU32(ctx->Yi.c + 12, ctr);
1390
0
#   endif
1391
0
        else
1392
0
            ctx->Yi.d[3] = ctr;
1393
0
        out += GHASH_CHUNK;
1394
0
        in += GHASH_CHUNK;
1395
0
        len -= GHASH_CHUNK;
1396
0
    }
1397
374k
#  endif
1398
374k
# endif
1399
374k
    if ((i = (len & (size_t)-16))) {
1400
139k
        size_t j = i / 16;
1401
1402
139k
# if defined(GHASH)
1403
139k
        GHASH(ctx, in, i);
1404
# else
1405
        while (j--) {
1406
            size_t k;
1407
            for (k = 0; k < 16; ++k)
1408
                ctx->Xi.c[k] ^= in[k];
1409
            GCM_MUL(ctx);
1410
            in += 16;
1411
        }
1412
        j = i / 16;
1413
        in -= i;
1414
# endif
1415
139k
        (*stream) (in, out, j, key, ctx->Yi.c);
1416
139k
        ctr += (unsigned int)j;
1417
139k
        if (IS_LITTLE_ENDIAN)
1418
# ifdef BSWAP4
1419
            ctx->Yi.d[3] = BSWAP4(ctr);
1420
# else
1421
139k
            PUTU32(ctx->Yi.c + 12, ctr);
1422
0
# endif
1423
0
        else
1424
0
            ctx->Yi.d[3] = ctr;
1425
139k
        out += i;
1426
139k
        in += i;
1427
139k
        len -= i;
1428
139k
    }
1429
374k
    if (len) {
1430
360k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1431
360k
        ++ctr;
1432
360k
        if (IS_LITTLE_ENDIAN)
1433
# ifdef BSWAP4
1434
            ctx->Yi.d[3] = BSWAP4(ctr);
1435
# else
1436
360k
            PUTU32(ctx->Yi.c + 12, ctr);
1437
0
# endif
1438
0
        else
1439
0
            ctx->Yi.d[3] = ctr;
1440
1.52M
        while (len--) {
1441
1.16M
# if defined(GHASH)
1442
1.16M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1443
# else
1444
            u8 c = in[n];
1445
            ctx->Xi.c[mres++] ^= c;
1446
            out[n] = c ^ ctx->EKi.c[n];
1447
# endif
1448
1.16M
            ++n;
1449
1.16M
        }
1450
360k
    }
1451
1452
374k
    ctx->mres = mres;
1453
374k
    return 0;
1454
374k
#endif
1455
374k
}
1456
1457
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1458
                         size_t len)
1459
969k
{
1460
969k
    DECLARE_IS_ENDIAN;
1461
969k
    u64 alen = ctx->len.u[0] << 3;
1462
969k
    u64 clen = ctx->len.u[1] << 3;
1463
1464
969k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1465
969k
    u128 bitlen;
1466
969k
    unsigned int mres = ctx->mres;
1467
1468
969k
    if (mres) {
1469
796k
        unsigned blocks = (mres + 15) & -16;
1470
1471
796k
        memset(ctx->Xn + mres, 0, blocks - mres);
1472
796k
        mres = blocks;
1473
796k
        if (mres == sizeof(ctx->Xn)) {
1474
0
            GHASH(ctx, ctx->Xn, mres);
1475
0
            mres = 0;
1476
0
        }
1477
796k
    } else if (ctx->ares) {
1478
143k
        GCM_MUL(ctx);
1479
143k
    }
1480
#else
1481
    if (ctx->mres || ctx->ares)
1482
        GCM_MUL(ctx);
1483
#endif
1484
1485
969k
    if (IS_LITTLE_ENDIAN) {
1486
#ifdef BSWAP8
1487
        alen = BSWAP8(alen);
1488
        clen = BSWAP8(clen);
1489
#else
1490
969k
        u8 *p = ctx->len.c;
1491
1492
969k
        ctx->len.u[0] = alen;
1493
969k
        ctx->len.u[1] = clen;
1494
1495
969k
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1496
969k
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1497
969k
#endif
1498
969k
    }
1499
1500
969k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1501
969k
    bitlen.hi = alen;
1502
969k
    bitlen.lo = clen;
1503
969k
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1504
969k
    mres += sizeof(bitlen);
1505
969k
    GHASH(ctx, ctx->Xn, mres);
1506
#else
1507
    ctx->Xi.u[0] ^= alen;
1508
    ctx->Xi.u[1] ^= clen;
1509
    GCM_MUL(ctx);
1510
#endif
1511
1512
969k
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1513
969k
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1514
1515
969k
    if (tag && len <= sizeof(ctx->Xi))
1516
380k
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1517
589k
    else
1518
589k
        return -1;
1519
969k
}
1520
1521
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1522
589k
{
1523
589k
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1524
589k
    memcpy(tag, ctx->Xi.c,
1525
589k
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1526
589k
}
1527
1528
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1529
0
{
1530
0
    GCM128_CONTEXT *ret;
1531
1532
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1533
0
        CRYPTO_gcm128_init(ret, key, block);
1534
1535
0
    return ret;
1536
0
}
1537
1538
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1539
0
{
1540
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1541
0
}