Coverage Report

Created: 2024-05-21 06:33

/src/openssl/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2024 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
/* RISC-V uses C implementation as a fallback. */
31
#if defined(__riscv)
32
# define INCLUDE_C_GMULT_4BIT
33
# define INCLUDE_C_GHASH_4BIT
34
#endif
35
36
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
37
0
#define REDUCE1BIT(V)   do { \
38
0
        if (sizeof(size_t)==8) { \
39
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
40
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
41
0
                V.hi  = (V.hi>>1 )^T; \
42
0
        } \
43
0
        else { \
44
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
45
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
46
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
47
0
        } \
48
0
} while(0)
49
50
/*-
51
 *
52
 * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
53
 *
54
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55
 * never be set to 8. 8 is effectively reserved for testing purposes.
56
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58
 * whole spectrum of possible table driven implementations. Why? In
59
 * non-"Shoup's" case memory access pattern is segmented in such manner,
60
 * that it's trivial to see that cache timing information can reveal
61
 * fair portion of intermediate hash value. Given that ciphertext is
62
 * always available to attacker, it's possible for him to attempt to
63
 * deduce secret parameter H and if successful, tamper with messages
64
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65
 * not as trivial, but there is no reason to believe that it's resistant
66
 * to cache-timing attack. And the thing about "8-bit" implementation is
67
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
68
 * key + 1KB shared. Well, on pros side it should be twice as fast as
69
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70
 * was observed to run ~75% faster, closer to 100% for commercial
71
 * compilers... Yet "4-bit" procedure is preferred, because it's
72
 * believed to provide better security-performance balance and adequate
73
 * all-round performance. "All-round" refers to things like:
74
 *
75
 * - shorter setup time effectively improves overall timing for
76
 *   handling short messages;
77
 * - larger table allocation can become unbearable because of VM
78
 *   subsystem penalties (for example on Windows large enough free
79
 *   results in VM working set trimming, meaning that consequent
80
 *   malloc would immediately incur working set expansion);
81
 * - larger table has larger cache footprint, which can affect
82
 *   performance of other code paths (not necessarily even from same
83
 *   thread in Hyper-Threading world);
84
 *
85
 * Value of 1 is not appropriate for performance reasons.
86
 */
87
88
static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
89
0
{
90
0
    u128 V;
91
# if defined(OPENSSL_SMALL_FOOTPRINT)
92
    int i;
93
# endif
94
95
0
    Htable[0].hi = 0;
96
0
    Htable[0].lo = 0;
97
0
    V.hi = H[0];
98
0
    V.lo = H[1];
99
100
# if defined(OPENSSL_SMALL_FOOTPRINT)
101
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
102
        REDUCE1BIT(V);
103
        Htable[i] = V;
104
    }
105
106
    for (i = 2; i < 16; i <<= 1) {
107
        u128 *Hi = Htable + i;
108
        int j;
109
        for (V = *Hi, j = 1; j < i; ++j) {
110
            Hi[j].hi = V.hi ^ Htable[j].hi;
111
            Hi[j].lo = V.lo ^ Htable[j].lo;
112
        }
113
    }
114
# else
115
0
    Htable[8] = V;
116
0
    REDUCE1BIT(V);
117
0
    Htable[4] = V;
118
0
    REDUCE1BIT(V);
119
0
    Htable[2] = V;
120
0
    REDUCE1BIT(V);
121
0
    Htable[1] = V;
122
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
123
0
    V = Htable[4];
124
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
125
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
126
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
127
0
    V = Htable[8];
128
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
129
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
130
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
131
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
132
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
133
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
134
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
135
0
# endif
136
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137
    /*
138
     * ARM assembler expects specific dword order in Htable.
139
     */
140
    {
141
        int j;
142
        DECLARE_IS_ENDIAN;
143
144
        if (IS_LITTLE_ENDIAN)
145
            for (j = 0; j < 16; ++j) {
146
                V = Htable[j];
147
                Htable[j].hi = V.lo;
148
                Htable[j].lo = V.hi;
149
        } else
150
            for (j = 0; j < 16; ++j) {
151
                V = Htable[j];
152
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
153
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
154
            }
155
    }
156
# endif
157
0
}
158
159
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
160
static const size_t rem_4bit[16] = {
161
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
162
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
163
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
164
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
165
};
166
167
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
168
0
{
169
0
    u128 Z;
170
0
    int cnt = 15;
171
0
    size_t rem, nlo, nhi;
172
0
    DECLARE_IS_ENDIAN;
173
174
0
    nlo = ((const u8 *)Xi)[15];
175
0
    nhi = nlo >> 4;
176
0
    nlo &= 0xf;
177
178
0
    Z.hi = Htable[nlo].hi;
179
0
    Z.lo = Htable[nlo].lo;
180
181
0
    while (1) {
182
0
        rem = (size_t)Z.lo & 0xf;
183
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
184
0
        Z.hi = (Z.hi >> 4);
185
0
        if (sizeof(size_t) == 8)
186
0
            Z.hi ^= rem_4bit[rem];
187
0
        else
188
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
189
190
0
        Z.hi ^= Htable[nhi].hi;
191
0
        Z.lo ^= Htable[nhi].lo;
192
193
0
        if (--cnt < 0)
194
0
            break;
195
196
0
        nlo = ((const u8 *)Xi)[cnt];
197
0
        nhi = nlo >> 4;
198
0
        nlo &= 0xf;
199
200
0
        rem = (size_t)Z.lo & 0xf;
201
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
202
0
        Z.hi = (Z.hi >> 4);
203
0
        if (sizeof(size_t) == 8)
204
0
            Z.hi ^= rem_4bit[rem];
205
0
        else
206
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
207
208
0
        Z.hi ^= Htable[nlo].hi;
209
0
        Z.lo ^= Htable[nlo].lo;
210
0
    }
211
212
0
    if (IS_LITTLE_ENDIAN) {
213
#  ifdef BSWAP8
214
        Xi[0] = BSWAP8(Z.hi);
215
        Xi[1] = BSWAP8(Z.lo);
216
#  else
217
0
        u8 *p = (u8 *)Xi;
218
0
        u32 v;
219
0
        v = (u32)(Z.hi >> 32);
220
0
        PUTU32(p, v);
221
0
        v = (u32)(Z.hi);
222
0
        PUTU32(p + 4, v);
223
0
        v = (u32)(Z.lo >> 32);
224
0
        PUTU32(p + 8, v);
225
0
        v = (u32)(Z.lo);
226
0
        PUTU32(p + 12, v);
227
0
#  endif
228
0
    } else {
229
0
        Xi[0] = Z.hi;
230
0
        Xi[1] = Z.lo;
231
0
    }
232
0
}
233
234
# endif
235
236
# if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
237
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
238
/*
239
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
240
 * details... Compiler-generated code doesn't seem to give any
241
 * performance improvement, at least not on x86[_64]. It's here
242
 * mostly as reference and a placeholder for possible future
243
 * non-trivial optimization[s]...
244
 */
245
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
246
                           const u8 *inp, size_t len)
247
0
{
248
0
    u128 Z;
249
0
    int cnt;
250
0
    size_t rem, nlo, nhi;
251
0
    DECLARE_IS_ENDIAN;
252
253
0
    do {
254
0
        cnt = 15;
255
0
        nlo = ((const u8 *)Xi)[15];
256
0
        nlo ^= inp[15];
257
0
        nhi = nlo >> 4;
258
0
        nlo &= 0xf;
259
260
0
        Z.hi = Htable[nlo].hi;
261
0
        Z.lo = Htable[nlo].lo;
262
263
0
        while (1) {
264
0
            rem = (size_t)Z.lo & 0xf;
265
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
266
0
            Z.hi = (Z.hi >> 4);
267
0
            if (sizeof(size_t) == 8)
268
0
                Z.hi ^= rem_4bit[rem];
269
0
            else
270
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
271
272
0
            Z.hi ^= Htable[nhi].hi;
273
0
            Z.lo ^= Htable[nhi].lo;
274
275
0
            if (--cnt < 0)
276
0
                break;
277
278
0
            nlo = ((const u8 *)Xi)[cnt];
279
0
            nlo ^= inp[cnt];
280
0
            nhi = nlo >> 4;
281
0
            nlo &= 0xf;
282
283
0
            rem = (size_t)Z.lo & 0xf;
284
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
285
0
            Z.hi = (Z.hi >> 4);
286
0
            if (sizeof(size_t) == 8)
287
0
                Z.hi ^= rem_4bit[rem];
288
0
            else
289
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
290
291
0
            Z.hi ^= Htable[nlo].hi;
292
0
            Z.lo ^= Htable[nlo].lo;
293
0
        }
294
295
0
        if (IS_LITTLE_ENDIAN) {
296
#   ifdef BSWAP8
297
            Xi[0] = BSWAP8(Z.hi);
298
            Xi[1] = BSWAP8(Z.lo);
299
#   else
300
0
            u8 *p = (u8 *)Xi;
301
0
            u32 v;
302
0
            v = (u32)(Z.hi >> 32);
303
0
            PUTU32(p, v);
304
0
            v = (u32)(Z.hi);
305
0
            PUTU32(p + 4, v);
306
0
            v = (u32)(Z.lo >> 32);
307
0
            PUTU32(p + 8, v);
308
0
            v = (u32)(Z.lo);
309
0
            PUTU32(p + 12, v);
310
0
#   endif
311
0
        } else {
312
0
            Xi[0] = Z.hi;
313
0
            Xi[1] = Z.lo;
314
0
        }
315
316
0
        inp += 16;
317
        /* Block size is 128 bits so len is a multiple of 16 */
318
0
        len -= 16;
319
0
    } while (len > 0);
320
0
}
321
#  endif
322
# else
323
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
324
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
325
                    size_t len);
326
# endif
327
328
0
# define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
329
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
330
0
#  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
331
/*
332
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
333
 * effect. In other words idea is to hash data while it's still in L1 cache
334
 * after encryption pass...
335
 */
336
0
#  define GHASH_CHUNK       (3*1024)
337
# endif
338
339
#if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
340
# if    !defined(I386_ONLY) && \
341
        (defined(__i386)        || defined(__i386__)    || \
342
         defined(__x86_64)      || defined(__x86_64__)  || \
343
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
344
#  define GHASH_ASM_X86_OR_64
345
346
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
347
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
348
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
349
                     size_t len);
350
351
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
352
#   define gcm_init_avx   gcm_init_clmul
353
#   define gcm_gmult_avx  gcm_gmult_clmul
354
#   define gcm_ghash_avx  gcm_ghash_clmul
355
#  else
356
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
357
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
358
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
359
                   size_t len);
360
#  endif
361
362
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
363
#   define GHASH_ASM_X86
364
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
365
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
366
                        size_t len);
367
368
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
369
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
370
                        size_t len);
371
#  endif
372
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)
373
#  include "arm_arch.h"
374
#  if __ARM_MAX_ARCH__>=7
375
#   define GHASH_ASM_ARM
376
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
377
#   if defined(__arm__) || defined(__arm)
378
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
379
#   endif
380
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
381
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
382
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
383
                    size_t len);
384
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
385
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
386
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
387
                  size_t len);
388
#  endif
389
# elif defined(__sparc__) || defined(__sparc)
390
#  include "crypto/sparc_arch.h"
391
#  define GHASH_ASM_SPARC
392
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
393
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
394
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
395
                    size_t len);
396
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__POWERPC__) || defined(_ARCH_PPC))
397
#  include "crypto/ppc_arch.h"
398
#  define GHASH_ASM_PPC
399
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
400
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
401
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
402
                  size_t len);
403
# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
404
#  include "crypto/riscv_arch.h"
405
#  define GHASH_ASM_RV64I
406
/* Zbc/Zbkc (scalar crypto with clmul) based routines. */
407
void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);
408
void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);
409
void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);
410
void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
411
void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
412
void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
413
                         const u8 *inp, size_t len);
414
void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
415
                               const u8 *inp, size_t len);
416
/* zvkb/Zvbc (vector crypto with vclmul) based routines. */
417
void gcm_init_rv64i_zvkb_zvbc(u128 Htable[16], const u64 Xi[2]);
418
void gcm_gmult_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16]);
419
void gcm_ghash_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16],
420
                               const u8 *inp, size_t len);
421
/* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */
422
void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);
423
void gcm_init_rv64i_zvkg_zvkb(u128 Htable[16], const u64 Xi[2]);
424
void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
425
void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
426
                          const u8 *inp, size_t len);
427
# endif
428
#endif
429
430
static void gcm_get_funcs(struct gcm_funcs_st *ctx)
431
0
{
432
    /* set defaults -- overridden below as needed */
433
0
    ctx->ginit = gcm_init_4bit;
434
0
#if !defined(GHASH_ASM)
435
0
    ctx->gmult = gcm_gmult_4bit;
436
#else
437
    ctx->gmult = NULL;
438
#endif
439
0
#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
440
0
    ctx->ghash = gcm_ghash_4bit;
441
#else
442
    ctx->ghash = NULL;
443
#endif
444
445
#if defined(GHASH_ASM_X86_OR_64)
446
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
447
    /* x86_64 */
448
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
449
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
450
            ctx->ginit = gcm_init_avx;
451
            ctx->gmult = gcm_gmult_avx;
452
            ctx->ghash = gcm_ghash_avx;
453
        } else {
454
            ctx->ginit = gcm_init_clmul;
455
            ctx->gmult = gcm_gmult_clmul;
456
            ctx->ghash = gcm_ghash_clmul;
457
        }
458
        return;
459
    }
460
# endif
461
# if defined(GHASH_ASM_X86)
462
    /* x86 only */
463
#  if defined(OPENSSL_IA32_SSE2)
464
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
465
        ctx->gmult = gcm_gmult_4bit_mmx;
466
        ctx->ghash = gcm_ghash_4bit_mmx;
467
        return;
468
    }
469
#  else
470
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
471
        ctx->gmult = gcm_gmult_4bit_mmx;
472
        ctx->ghash = gcm_ghash_4bit_mmx;
473
        return;
474
    }
475
#  endif
476
    ctx->gmult = gcm_gmult_4bit_x86;
477
    ctx->ghash = gcm_ghash_4bit_x86;
478
    return;
479
# else
480
    /* x86_64 fallback defaults */
481
    ctx->gmult = gcm_gmult_4bit;
482
    ctx->ghash = gcm_ghash_4bit;
483
    return;
484
# endif
485
#elif defined(GHASH_ASM_ARM)
486
    /* ARM defaults */
487
    ctx->gmult = gcm_gmult_4bit;
488
    ctx->ghash = gcm_ghash_4bit;
489
# ifdef PMULL_CAPABLE
490
    if (PMULL_CAPABLE) {
491
        ctx->ginit = (gcm_init_fn)gcm_init_v8;
492
        ctx->gmult = gcm_gmult_v8;
493
        ctx->ghash = gcm_ghash_v8;
494
    }
495
# elif defined(NEON_CAPABLE)
496
    if (NEON_CAPABLE) {
497
        ctx->ginit = gcm_init_neon;
498
        ctx->gmult = gcm_gmult_neon;
499
        ctx->ghash = gcm_ghash_neon;
500
    }
501
# endif
502
    return;
503
#elif defined(GHASH_ASM_SPARC)
504
    /* SPARC defaults */
505
    ctx->gmult = gcm_gmult_4bit;
506
    ctx->ghash = gcm_ghash_4bit;
507
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
508
        ctx->ginit = gcm_init_vis3;
509
        ctx->gmult = gcm_gmult_vis3;
510
        ctx->ghash = gcm_ghash_vis3;
511
    }
512
    return;
513
#elif defined(GHASH_ASM_PPC)
514
    /* PowerPC does not define GHASH_ASM; defaults set above */
515
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
516
        ctx->ginit = gcm_init_p8;
517
        ctx->gmult = gcm_gmult_p8;
518
        ctx->ghash = gcm_ghash_p8;
519
    }
520
    return;
521
#elif defined(GHASH_ASM_RV64I)
522
    /* RISCV defaults */
523
    ctx->gmult = gcm_gmult_4bit;
524
    ctx->ghash = gcm_ghash_4bit;
525
526
    if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) {
527
        if (RISCV_HAS_ZVKB())
528
            ctx->ginit = gcm_init_rv64i_zvkg_zvkb;
529
        else
530
            ctx->ginit = gcm_init_rv64i_zvkg;
531
        ctx->gmult = gcm_gmult_rv64i_zvkg;
532
        ctx->ghash = gcm_ghash_rv64i_zvkg;
533
    } else if (RISCV_HAS_ZVKB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
534
        ctx->ginit = gcm_init_rv64i_zvkb_zvbc;
535
        ctx->gmult = gcm_gmult_rv64i_zvkb_zvbc;
536
        ctx->ghash = gcm_ghash_rv64i_zvkb_zvbc;
537
    } else if (RISCV_HAS_ZBC()) {
538
        if (RISCV_HAS_ZBKB()) {
539
            ctx->ginit = gcm_init_rv64i_zbc__zbkb;
540
            ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;
541
            ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;
542
        } else if (RISCV_HAS_ZBB()) {
543
            ctx->ginit = gcm_init_rv64i_zbc__zbb;
544
            ctx->gmult = gcm_gmult_rv64i_zbc;
545
            ctx->ghash = gcm_ghash_rv64i_zbc;
546
        } else {
547
            ctx->ginit = gcm_init_rv64i_zbc;
548
            ctx->gmult = gcm_gmult_rv64i_zbc;
549
            ctx->ghash = gcm_ghash_rv64i_zbc;
550
        }
551
    }
552
    return;
553
#elif defined(GHASH_ASM)
554
    /* all other architectures use the generic names */
555
    ctx->gmult = gcm_gmult_4bit;
556
    ctx->ghash = gcm_ghash_4bit;
557
    return;
558
#endif
559
0
}
560
561
void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
562
0
{
563
0
    struct gcm_funcs_st funcs;
564
565
0
    gcm_get_funcs(&funcs);
566
0
    funcs.ginit(Htable, H);
567
0
}
568
569
void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
570
0
{
571
0
    struct gcm_funcs_st funcs;
572
573
0
    gcm_get_funcs(&funcs);
574
0
    funcs.gmult(Xi, Htable);
575
0
}
576
577
void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
578
                         const u8 *inp, size_t len)
579
0
{
580
0
    struct gcm_funcs_st funcs;
581
0
    u64 tmp[2];
582
0
    size_t i;
583
584
0
    gcm_get_funcs(&funcs);
585
0
    if (funcs.ghash != NULL) {
586
0
        funcs.ghash(Xi, Htable, inp, len);
587
0
    } else {
588
        /* Emulate ghash if needed */
589
0
        for (i = 0; i < len; i += 16) {
590
0
            memcpy(tmp, &inp[i], sizeof(tmp));
591
0
            Xi[0] ^= tmp[0];
592
0
            Xi[1] ^= tmp[1];
593
0
            funcs.gmult(Xi, Htable);
594
0
        }
595
0
    }
596
0
}
597
598
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
599
0
{
600
0
    DECLARE_IS_ENDIAN;
601
602
0
    memset(ctx, 0, sizeof(*ctx));
603
0
    ctx->block = block;
604
0
    ctx->key = key;
605
606
0
    (*block) (ctx->H.c, ctx->H.c, key);
607
608
0
    if (IS_LITTLE_ENDIAN) {
609
        /* H is stored in host byte order */
610
#ifdef BSWAP8
611
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
612
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
613
#else
614
0
        u8 *p = ctx->H.c;
615
0
        u64 hi, lo;
616
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
617
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
618
0
        ctx->H.u[0] = hi;
619
0
        ctx->H.u[1] = lo;
620
0
#endif
621
0
    }
622
623
0
    gcm_get_funcs(&ctx->funcs);
624
0
    ctx->funcs.ginit(ctx->Htable, ctx->H.u);
625
0
}
626
627
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
628
                         size_t len)
629
0
{
630
0
    DECLARE_IS_ENDIAN;
631
0
    unsigned int ctr;
632
633
0
    ctx->len.u[0] = 0;          /* AAD length */
634
0
    ctx->len.u[1] = 0;          /* message length */
635
0
    ctx->ares = 0;
636
0
    ctx->mres = 0;
637
638
0
    if (len == 12) {
639
0
        memcpy(ctx->Yi.c, iv, 12);
640
0
        ctx->Yi.c[12] = 0;
641
0
        ctx->Yi.c[13] = 0;
642
0
        ctx->Yi.c[14] = 0;
643
0
        ctx->Yi.c[15] = 1;
644
0
        ctr = 1;
645
0
    } else {
646
0
        size_t i;
647
0
        u64 len0 = len;
648
649
        /* Borrow ctx->Xi to calculate initial Yi */
650
0
        ctx->Xi.u[0] = 0;
651
0
        ctx->Xi.u[1] = 0;
652
653
0
        while (len >= 16) {
654
0
            for (i = 0; i < 16; ++i)
655
0
                ctx->Xi.c[i] ^= iv[i];
656
0
            GCM_MUL(ctx);
657
0
            iv += 16;
658
0
            len -= 16;
659
0
        }
660
0
        if (len) {
661
0
            for (i = 0; i < len; ++i)
662
0
                ctx->Xi.c[i] ^= iv[i];
663
0
            GCM_MUL(ctx);
664
0
        }
665
0
        len0 <<= 3;
666
0
        if (IS_LITTLE_ENDIAN) {
667
#ifdef BSWAP8
668
            ctx->Xi.u[1] ^= BSWAP8(len0);
669
#else
670
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
671
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
672
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
673
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
674
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
675
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
676
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
677
0
            ctx->Xi.c[15] ^= (u8)(len0);
678
0
#endif
679
0
        } else {
680
0
            ctx->Xi.u[1] ^= len0;
681
0
        }
682
683
0
        GCM_MUL(ctx);
684
685
0
        if (IS_LITTLE_ENDIAN)
686
#ifdef BSWAP4
687
            ctr = BSWAP4(ctx->Xi.d[3]);
688
#else
689
0
            ctr = GETU32(ctx->Xi.c + 12);
690
0
#endif
691
0
        else
692
0
            ctr = ctx->Xi.d[3];
693
694
        /* Copy borrowed Xi to Yi */
695
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
696
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
697
0
    }
698
699
0
    ctx->Xi.u[0] = 0;
700
0
    ctx->Xi.u[1] = 0;
701
702
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
703
0
    ++ctr;
704
0
    if (IS_LITTLE_ENDIAN)
705
#ifdef BSWAP4
706
        ctx->Yi.d[3] = BSWAP4(ctr);
707
#else
708
0
        PUTU32(ctx->Yi.c + 12, ctr);
709
0
#endif
710
0
    else
711
0
        ctx->Yi.d[3] = ctr;
712
0
}
713
714
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
715
                      size_t len)
716
0
{
717
0
    size_t i;
718
0
    unsigned int n;
719
0
    u64 alen = ctx->len.u[0];
720
721
0
    if (ctx->len.u[1])
722
0
        return -2;
723
724
0
    alen += len;
725
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
726
0
        return -1;
727
0
    ctx->len.u[0] = alen;
728
729
0
    n = ctx->ares;
730
0
    if (n) {
731
0
        while (n && len) {
732
0
            ctx->Xi.c[n] ^= *(aad++);
733
0
            --len;
734
0
            n = (n + 1) % 16;
735
0
        }
736
0
        if (n == 0)
737
0
            GCM_MUL(ctx);
738
0
        else {
739
0
            ctx->ares = n;
740
0
            return 0;
741
0
        }
742
0
    }
743
0
#ifdef GHASH
744
0
    if ((i = (len & (size_t)-16))) {
745
0
        GHASH(ctx, aad, i);
746
0
        aad += i;
747
0
        len -= i;
748
0
    }
749
#else
750
    while (len >= 16) {
751
        for (i = 0; i < 16; ++i)
752
            ctx->Xi.c[i] ^= aad[i];
753
        GCM_MUL(ctx);
754
        aad += 16;
755
        len -= 16;
756
    }
757
#endif
758
0
    if (len) {
759
0
        n = (unsigned int)len;
760
0
        for (i = 0; i < len; ++i)
761
0
            ctx->Xi.c[i] ^= aad[i];
762
0
    }
763
764
0
    ctx->ares = n;
765
0
    return 0;
766
0
}
767
768
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
769
                          const unsigned char *in, unsigned char *out,
770
                          size_t len)
771
0
{
772
0
    DECLARE_IS_ENDIAN;
773
0
    unsigned int n, ctr, mres;
774
0
    size_t i;
775
0
    u64 mlen = ctx->len.u[1];
776
0
    block128_f block = ctx->block;
777
0
    void *key = ctx->key;
778
779
0
    mlen += len;
780
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
781
0
        return -1;
782
0
    ctx->len.u[1] = mlen;
783
784
0
    mres = ctx->mres;
785
786
0
    if (ctx->ares) {
787
        /* First call to encrypt finalizes GHASH(AAD) */
788
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
789
0
        if (len == 0) {
790
0
            GCM_MUL(ctx);
791
0
            ctx->ares = 0;
792
0
            return 0;
793
0
        }
794
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
795
0
        ctx->Xi.u[0] = 0;
796
0
        ctx->Xi.u[1] = 0;
797
0
        mres = sizeof(ctx->Xi);
798
#else
799
        GCM_MUL(ctx);
800
#endif
801
0
        ctx->ares = 0;
802
0
    }
803
804
0
    if (IS_LITTLE_ENDIAN)
805
#ifdef BSWAP4
806
        ctr = BSWAP4(ctx->Yi.d[3]);
807
#else
808
0
        ctr = GETU32(ctx->Yi.c + 12);
809
0
#endif
810
0
    else
811
0
        ctr = ctx->Yi.d[3];
812
813
0
    n = mres % 16;
814
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
815
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
816
0
        do {
817
0
            if (n) {
818
0
# if defined(GHASH)
819
0
                while (n && len) {
820
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
821
0
                    --len;
822
0
                    n = (n + 1) % 16;
823
0
                }
824
0
                if (n == 0) {
825
0
                    GHASH(ctx, ctx->Xn, mres);
826
0
                    mres = 0;
827
0
                } else {
828
0
                    ctx->mres = mres;
829
0
                    return 0;
830
0
                }
831
# else
832
                while (n && len) {
833
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
834
                    --len;
835
                    n = (n + 1) % 16;
836
                }
837
                if (n == 0) {
838
                    GCM_MUL(ctx);
839
                    mres = 0;
840
                } else {
841
                    ctx->mres = n;
842
                    return 0;
843
                }
844
# endif
845
0
            }
846
# if defined(STRICT_ALIGNMENT)
847
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
848
                break;
849
# endif
850
0
# if defined(GHASH)
851
0
            if (len >= 16 && mres) {
852
0
                GHASH(ctx, ctx->Xn, mres);
853
0
                mres = 0;
854
0
            }
855
0
#  if defined(GHASH_CHUNK)
856
0
            while (len >= GHASH_CHUNK) {
857
0
                size_t j = GHASH_CHUNK;
858
859
0
                while (j) {
860
0
                    size_t_aX *out_t = (size_t_aX *)out;
861
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
862
863
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
864
0
                    ++ctr;
865
0
                    if (IS_LITTLE_ENDIAN)
866
#   ifdef BSWAP4
867
                        ctx->Yi.d[3] = BSWAP4(ctr);
868
#   else
869
0
                        PUTU32(ctx->Yi.c + 12, ctr);
870
0
#   endif
871
0
                    else
872
0
                        ctx->Yi.d[3] = ctr;
873
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
874
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
875
0
                    out += 16;
876
0
                    in += 16;
877
0
                    j -= 16;
878
0
                }
879
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
880
0
                len -= GHASH_CHUNK;
881
0
            }
882
0
#  endif
883
0
            if ((i = (len & (size_t)-16))) {
884
0
                size_t j = i;
885
886
0
                while (len >= 16) {
887
0
                    size_t_aX *out_t = (size_t_aX *)out;
888
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
889
890
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
891
0
                    ++ctr;
892
0
                    if (IS_LITTLE_ENDIAN)
893
#  ifdef BSWAP4
894
                        ctx->Yi.d[3] = BSWAP4(ctr);
895
#  else
896
0
                        PUTU32(ctx->Yi.c + 12, ctr);
897
0
#  endif
898
0
                    else
899
0
                        ctx->Yi.d[3] = ctr;
900
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
901
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
902
0
                    out += 16;
903
0
                    in += 16;
904
0
                    len -= 16;
905
0
                }
906
0
                GHASH(ctx, out - j, j);
907
0
            }
908
# else
909
            while (len >= 16) {
910
                size_t *out_t = (size_t *)out;
911
                const size_t *in_t = (const size_t *)in;
912
913
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
914
                ++ctr;
915
                if (IS_LITTLE_ENDIAN)
916
#  ifdef BSWAP4
917
                    ctx->Yi.d[3] = BSWAP4(ctr);
918
#  else
919
                    PUTU32(ctx->Yi.c + 12, ctr);
920
#  endif
921
                else
922
                    ctx->Yi.d[3] = ctr;
923
                for (i = 0; i < 16 / sizeof(size_t); ++i)
924
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
925
                GCM_MUL(ctx);
926
                out += 16;
927
                in += 16;
928
                len -= 16;
929
            }
930
# endif
931
0
            if (len) {
932
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
933
0
                ++ctr;
934
0
                if (IS_LITTLE_ENDIAN)
935
# ifdef BSWAP4
936
                    ctx->Yi.d[3] = BSWAP4(ctr);
937
# else
938
0
                    PUTU32(ctx->Yi.c + 12, ctr);
939
0
# endif
940
0
                else
941
0
                    ctx->Yi.d[3] = ctr;
942
0
# if defined(GHASH)
943
0
                while (len--) {
944
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
945
0
                    ++n;
946
0
                }
947
# else
948
                while (len--) {
949
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
950
                    ++n;
951
                }
952
                mres = n;
953
# endif
954
0
            }
955
956
0
            ctx->mres = mres;
957
0
            return 0;
958
0
        } while (0);
959
0
    }
960
0
#endif
961
0
    for (i = 0; i < len; ++i) {
962
0
        if (n == 0) {
963
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
964
0
            ++ctr;
965
0
            if (IS_LITTLE_ENDIAN)
966
#ifdef BSWAP4
967
                ctx->Yi.d[3] = BSWAP4(ctr);
968
#else
969
0
                PUTU32(ctx->Yi.c + 12, ctr);
970
0
#endif
971
0
            else
972
0
                ctx->Yi.d[3] = ctr;
973
0
        }
974
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
975
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
976
0
        n = (n + 1) % 16;
977
0
        if (mres == sizeof(ctx->Xn)) {
978
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
979
0
            mres = 0;
980
0
        }
981
#else
982
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
983
        mres = n = (n + 1) % 16;
984
        if (n == 0)
985
            GCM_MUL(ctx);
986
#endif
987
0
    }
988
989
0
    ctx->mres = mres;
990
0
    return 0;
991
0
}
992
993
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
994
                          const unsigned char *in, unsigned char *out,
995
                          size_t len)
996
0
{
997
0
    DECLARE_IS_ENDIAN;
998
0
    unsigned int n, ctr, mres;
999
0
    size_t i;
1000
0
    u64 mlen = ctx->len.u[1];
1001
0
    block128_f block = ctx->block;
1002
0
    void *key = ctx->key;
1003
1004
0
    mlen += len;
1005
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1006
0
        return -1;
1007
0
    ctx->len.u[1] = mlen;
1008
1009
0
    mres = ctx->mres;
1010
1011
0
    if (ctx->ares) {
1012
        /* First call to decrypt finalizes GHASH(AAD) */
1013
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1014
0
        if (len == 0) {
1015
0
            GCM_MUL(ctx);
1016
0
            ctx->ares = 0;
1017
0
            return 0;
1018
0
        }
1019
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1020
0
        ctx->Xi.u[0] = 0;
1021
0
        ctx->Xi.u[1] = 0;
1022
0
        mres = sizeof(ctx->Xi);
1023
#else
1024
        GCM_MUL(ctx);
1025
#endif
1026
0
        ctx->ares = 0;
1027
0
    }
1028
1029
0
    if (IS_LITTLE_ENDIAN)
1030
#ifdef BSWAP4
1031
        ctr = BSWAP4(ctx->Yi.d[3]);
1032
#else
1033
0
        ctr = GETU32(ctx->Yi.c + 12);
1034
0
#endif
1035
0
    else
1036
0
        ctr = ctx->Yi.d[3];
1037
1038
0
    n = mres % 16;
1039
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1040
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1041
0
        do {
1042
0
            if (n) {
1043
0
# if defined(GHASH)
1044
0
                while (n && len) {
1045
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1046
0
                    --len;
1047
0
                    n = (n + 1) % 16;
1048
0
                }
1049
0
                if (n == 0) {
1050
0
                    GHASH(ctx, ctx->Xn, mres);
1051
0
                    mres = 0;
1052
0
                } else {
1053
0
                    ctx->mres = mres;
1054
0
                    return 0;
1055
0
                }
1056
# else
1057
                while (n && len) {
1058
                    u8 c = *(in++);
1059
                    *(out++) = c ^ ctx->EKi.c[n];
1060
                    ctx->Xi.c[n] ^= c;
1061
                    --len;
1062
                    n = (n + 1) % 16;
1063
                }
1064
                if (n == 0) {
1065
                    GCM_MUL(ctx);
1066
                    mres = 0;
1067
                } else {
1068
                    ctx->mres = n;
1069
                    return 0;
1070
                }
1071
# endif
1072
0
            }
1073
# if defined(STRICT_ALIGNMENT)
1074
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1075
                break;
1076
# endif
1077
0
# if defined(GHASH)
1078
0
            if (len >= 16 && mres) {
1079
0
                GHASH(ctx, ctx->Xn, mres);
1080
0
                mres = 0;
1081
0
            }
1082
0
#  if defined(GHASH_CHUNK)
1083
0
            while (len >= GHASH_CHUNK) {
1084
0
                size_t j = GHASH_CHUNK;
1085
1086
0
                GHASH(ctx, in, GHASH_CHUNK);
1087
0
                while (j) {
1088
0
                    size_t_aX *out_t = (size_t_aX *)out;
1089
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1090
1091
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1092
0
                    ++ctr;
1093
0
                    if (IS_LITTLE_ENDIAN)
1094
#   ifdef BSWAP4
1095
                        ctx->Yi.d[3] = BSWAP4(ctr);
1096
#   else
1097
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1098
0
#   endif
1099
0
                    else
1100
0
                        ctx->Yi.d[3] = ctr;
1101
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1102
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1103
0
                    out += 16;
1104
0
                    in += 16;
1105
0
                    j -= 16;
1106
0
                }
1107
0
                len -= GHASH_CHUNK;
1108
0
            }
1109
0
#  endif
1110
0
            if ((i = (len & (size_t)-16))) {
1111
0
                GHASH(ctx, in, i);
1112
0
                while (len >= 16) {
1113
0
                    size_t_aX *out_t = (size_t_aX *)out;
1114
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1115
1116
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1117
0
                    ++ctr;
1118
0
                    if (IS_LITTLE_ENDIAN)
1119
#  ifdef BSWAP4
1120
                        ctx->Yi.d[3] = BSWAP4(ctr);
1121
#  else
1122
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1123
0
#  endif
1124
0
                    else
1125
0
                        ctx->Yi.d[3] = ctr;
1126
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1127
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1128
0
                    out += 16;
1129
0
                    in += 16;
1130
0
                    len -= 16;
1131
0
                }
1132
0
            }
1133
# else
1134
            while (len >= 16) {
1135
                size_t *out_t = (size_t *)out;
1136
                const size_t *in_t = (const size_t *)in;
1137
1138
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1139
                ++ctr;
1140
                if (IS_LITTLE_ENDIAN)
1141
#  ifdef BSWAP4
1142
                    ctx->Yi.d[3] = BSWAP4(ctr);
1143
#  else
1144
                    PUTU32(ctx->Yi.c + 12, ctr);
1145
#  endif
1146
                else
1147
                    ctx->Yi.d[3] = ctr;
1148
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1149
                    size_t c = in_t[i];
1150
                    out_t[i] = c ^ ctx->EKi.t[i];
1151
                    ctx->Xi.t[i] ^= c;
1152
                }
1153
                GCM_MUL(ctx);
1154
                out += 16;
1155
                in += 16;
1156
                len -= 16;
1157
            }
1158
# endif
1159
0
            if (len) {
1160
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1161
0
                ++ctr;
1162
0
                if (IS_LITTLE_ENDIAN)
1163
# ifdef BSWAP4
1164
                    ctx->Yi.d[3] = BSWAP4(ctr);
1165
# else
1166
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1167
0
# endif
1168
0
                else
1169
0
                    ctx->Yi.d[3] = ctr;
1170
0
# if defined(GHASH)
1171
0
                while (len--) {
1172
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1173
0
                    ++n;
1174
0
                }
1175
# else
1176
                while (len--) {
1177
                    u8 c = in[n];
1178
                    ctx->Xi.c[n] ^= c;
1179
                    out[n] = c ^ ctx->EKi.c[n];
1180
                    ++n;
1181
                }
1182
                mres = n;
1183
# endif
1184
0
            }
1185
1186
0
            ctx->mres = mres;
1187
0
            return 0;
1188
0
        } while (0);
1189
0
    }
1190
0
#endif
1191
0
    for (i = 0; i < len; ++i) {
1192
0
        u8 c;
1193
0
        if (n == 0) {
1194
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1195
0
            ++ctr;
1196
0
            if (IS_LITTLE_ENDIAN)
1197
#ifdef BSWAP4
1198
                ctx->Yi.d[3] = BSWAP4(ctr);
1199
#else
1200
0
                PUTU32(ctx->Yi.c + 12, ctr);
1201
0
#endif
1202
0
            else
1203
0
                ctx->Yi.d[3] = ctr;
1204
0
        }
1205
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1206
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1207
0
        n = (n + 1) % 16;
1208
0
        if (mres == sizeof(ctx->Xn)) {
1209
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1210
0
            mres = 0;
1211
0
        }
1212
#else
1213
        c = in[i];
1214
        out[i] = c ^ ctx->EKi.c[n];
1215
        ctx->Xi.c[n] ^= c;
1216
        mres = n = (n + 1) % 16;
1217
        if (n == 0)
1218
            GCM_MUL(ctx);
1219
#endif
1220
0
    }
1221
1222
0
    ctx->mres = mres;
1223
0
    return 0;
1224
0
}
1225
1226
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1227
                                const unsigned char *in, unsigned char *out,
1228
                                size_t len, ctr128_f stream)
1229
0
{
1230
#if defined(OPENSSL_SMALL_FOOTPRINT)
1231
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1232
#else
1233
0
    DECLARE_IS_ENDIAN;
1234
0
    unsigned int n, ctr, mres;
1235
0
    size_t i;
1236
0
    u64 mlen = ctx->len.u[1];
1237
0
    void *key = ctx->key;
1238
1239
0
    mlen += len;
1240
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1241
0
        return -1;
1242
0
    ctx->len.u[1] = mlen;
1243
1244
0
    mres = ctx->mres;
1245
1246
0
    if (ctx->ares) {
1247
        /* First call to encrypt finalizes GHASH(AAD) */
1248
0
#if defined(GHASH)
1249
0
        if (len == 0) {
1250
0
            GCM_MUL(ctx);
1251
0
            ctx->ares = 0;
1252
0
            return 0;
1253
0
        }
1254
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1255
0
        ctx->Xi.u[0] = 0;
1256
0
        ctx->Xi.u[1] = 0;
1257
0
        mres = sizeof(ctx->Xi);
1258
#else
1259
        GCM_MUL(ctx);
1260
#endif
1261
0
        ctx->ares = 0;
1262
0
    }
1263
1264
0
    if (IS_LITTLE_ENDIAN)
1265
# ifdef BSWAP4
1266
        ctr = BSWAP4(ctx->Yi.d[3]);
1267
# else
1268
0
        ctr = GETU32(ctx->Yi.c + 12);
1269
0
# endif
1270
0
    else
1271
0
        ctr = ctx->Yi.d[3];
1272
1273
0
    n = mres % 16;
1274
0
    if (n) {
1275
0
# if defined(GHASH)
1276
0
        while (n && len) {
1277
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1278
0
            --len;
1279
0
            n = (n + 1) % 16;
1280
0
        }
1281
0
        if (n == 0) {
1282
0
            GHASH(ctx, ctx->Xn, mres);
1283
0
            mres = 0;
1284
0
        } else {
1285
0
            ctx->mres = mres;
1286
0
            return 0;
1287
0
        }
1288
# else
1289
        while (n && len) {
1290
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1291
            --len;
1292
            n = (n + 1) % 16;
1293
        }
1294
        if (n == 0) {
1295
            GCM_MUL(ctx);
1296
            mres = 0;
1297
        } else {
1298
            ctx->mres = n;
1299
            return 0;
1300
        }
1301
# endif
1302
0
    }
1303
0
# if defined(GHASH)
1304
0
        if (len >= 16 && mres) {
1305
0
            GHASH(ctx, ctx->Xn, mres);
1306
0
            mres = 0;
1307
0
        }
1308
0
#  if defined(GHASH_CHUNK)
1309
0
    while (len >= GHASH_CHUNK) {
1310
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1311
0
        ctr += GHASH_CHUNK / 16;
1312
0
        if (IS_LITTLE_ENDIAN)
1313
#   ifdef BSWAP4
1314
            ctx->Yi.d[3] = BSWAP4(ctr);
1315
#   else
1316
0
            PUTU32(ctx->Yi.c + 12, ctr);
1317
0
#   endif
1318
0
        else
1319
0
            ctx->Yi.d[3] = ctr;
1320
0
        GHASH(ctx, out, GHASH_CHUNK);
1321
0
        out += GHASH_CHUNK;
1322
0
        in += GHASH_CHUNK;
1323
0
        len -= GHASH_CHUNK;
1324
0
    }
1325
0
#  endif
1326
0
# endif
1327
0
    if ((i = (len & (size_t)-16))) {
1328
0
        size_t j = i / 16;
1329
1330
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1331
0
        ctr += (unsigned int)j;
1332
0
        if (IS_LITTLE_ENDIAN)
1333
# ifdef BSWAP4
1334
            ctx->Yi.d[3] = BSWAP4(ctr);
1335
# else
1336
0
            PUTU32(ctx->Yi.c + 12, ctr);
1337
0
# endif
1338
0
        else
1339
0
            ctx->Yi.d[3] = ctr;
1340
0
        in += i;
1341
0
        len -= i;
1342
0
# if defined(GHASH)
1343
0
        GHASH(ctx, out, i);
1344
0
        out += i;
1345
# else
1346
        while (j--) {
1347
            for (i = 0; i < 16; ++i)
1348
                ctx->Xi.c[i] ^= out[i];
1349
            GCM_MUL(ctx);
1350
            out += 16;
1351
        }
1352
# endif
1353
0
    }
1354
0
    if (len) {
1355
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1356
0
        ++ctr;
1357
0
        if (IS_LITTLE_ENDIAN)
1358
# ifdef BSWAP4
1359
            ctx->Yi.d[3] = BSWAP4(ctr);
1360
# else
1361
0
            PUTU32(ctx->Yi.c + 12, ctr);
1362
0
# endif
1363
0
        else
1364
0
            ctx->Yi.d[3] = ctr;
1365
0
        while (len--) {
1366
0
# if defined(GHASH)
1367
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1368
# else
1369
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1370
# endif
1371
0
            ++n;
1372
0
        }
1373
0
    }
1374
1375
0
    ctx->mres = mres;
1376
0
    return 0;
1377
0
#endif
1378
0
}
1379
1380
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1381
                                const unsigned char *in, unsigned char *out,
1382
                                size_t len, ctr128_f stream)
1383
0
{
1384
#if defined(OPENSSL_SMALL_FOOTPRINT)
1385
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1386
#else
1387
0
    DECLARE_IS_ENDIAN;
1388
0
    unsigned int n, ctr, mres;
1389
0
    size_t i;
1390
0
    u64 mlen = ctx->len.u[1];
1391
0
    void *key = ctx->key;
1392
1393
0
    mlen += len;
1394
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1395
0
        return -1;
1396
0
    ctx->len.u[1] = mlen;
1397
1398
0
    mres = ctx->mres;
1399
1400
0
    if (ctx->ares) {
1401
        /* First call to decrypt finalizes GHASH(AAD) */
1402
0
# if defined(GHASH)
1403
0
        if (len == 0) {
1404
0
            GCM_MUL(ctx);
1405
0
            ctx->ares = 0;
1406
0
            return 0;
1407
0
        }
1408
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1409
0
        ctx->Xi.u[0] = 0;
1410
0
        ctx->Xi.u[1] = 0;
1411
0
        mres = sizeof(ctx->Xi);
1412
# else
1413
        GCM_MUL(ctx);
1414
# endif
1415
0
        ctx->ares = 0;
1416
0
    }
1417
1418
0
    if (IS_LITTLE_ENDIAN)
1419
# ifdef BSWAP4
1420
        ctr = BSWAP4(ctx->Yi.d[3]);
1421
# else
1422
0
        ctr = GETU32(ctx->Yi.c + 12);
1423
0
# endif
1424
0
    else
1425
0
        ctr = ctx->Yi.d[3];
1426
1427
0
    n = mres % 16;
1428
0
    if (n) {
1429
0
# if defined(GHASH)
1430
0
        while (n && len) {
1431
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1432
0
            --len;
1433
0
            n = (n + 1) % 16;
1434
0
        }
1435
0
        if (n == 0) {
1436
0
            GHASH(ctx, ctx->Xn, mres);
1437
0
            mres = 0;
1438
0
        } else {
1439
0
            ctx->mres = mres;
1440
0
            return 0;
1441
0
        }
1442
# else
1443
        while (n && len) {
1444
            u8 c = *(in++);
1445
            *(out++) = c ^ ctx->EKi.c[n];
1446
            ctx->Xi.c[n] ^= c;
1447
            --len;
1448
            n = (n + 1) % 16;
1449
        }
1450
        if (n == 0) {
1451
            GCM_MUL(ctx);
1452
            mres = 0;
1453
        } else {
1454
            ctx->mres = n;
1455
            return 0;
1456
        }
1457
# endif
1458
0
    }
1459
0
# if defined(GHASH)
1460
0
    if (len >= 16 && mres) {
1461
0
        GHASH(ctx, ctx->Xn, mres);
1462
0
        mres = 0;
1463
0
    }
1464
0
#  if defined(GHASH_CHUNK)
1465
0
    while (len >= GHASH_CHUNK) {
1466
0
        GHASH(ctx, in, GHASH_CHUNK);
1467
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1468
0
        ctr += GHASH_CHUNK / 16;
1469
0
        if (IS_LITTLE_ENDIAN)
1470
#   ifdef BSWAP4
1471
            ctx->Yi.d[3] = BSWAP4(ctr);
1472
#   else
1473
0
            PUTU32(ctx->Yi.c + 12, ctr);
1474
0
#   endif
1475
0
        else
1476
0
            ctx->Yi.d[3] = ctr;
1477
0
        out += GHASH_CHUNK;
1478
0
        in += GHASH_CHUNK;
1479
0
        len -= GHASH_CHUNK;
1480
0
    }
1481
0
#  endif
1482
0
# endif
1483
0
    if ((i = (len & (size_t)-16))) {
1484
0
        size_t j = i / 16;
1485
1486
0
# if defined(GHASH)
1487
0
        GHASH(ctx, in, i);
1488
# else
1489
        while (j--) {
1490
            size_t k;
1491
            for (k = 0; k < 16; ++k)
1492
                ctx->Xi.c[k] ^= in[k];
1493
            GCM_MUL(ctx);
1494
            in += 16;
1495
        }
1496
        j = i / 16;
1497
        in -= i;
1498
# endif
1499
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1500
0
        ctr += (unsigned int)j;
1501
0
        if (IS_LITTLE_ENDIAN)
1502
# ifdef BSWAP4
1503
            ctx->Yi.d[3] = BSWAP4(ctr);
1504
# else
1505
0
            PUTU32(ctx->Yi.c + 12, ctr);
1506
0
# endif
1507
0
        else
1508
0
            ctx->Yi.d[3] = ctr;
1509
0
        out += i;
1510
0
        in += i;
1511
0
        len -= i;
1512
0
    }
1513
0
    if (len) {
1514
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1515
0
        ++ctr;
1516
0
        if (IS_LITTLE_ENDIAN)
1517
# ifdef BSWAP4
1518
            ctx->Yi.d[3] = BSWAP4(ctr);
1519
# else
1520
0
            PUTU32(ctx->Yi.c + 12, ctr);
1521
0
# endif
1522
0
        else
1523
0
            ctx->Yi.d[3] = ctr;
1524
0
        while (len--) {
1525
0
# if defined(GHASH)
1526
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1527
# else
1528
            u8 c = in[n];
1529
            ctx->Xi.c[mres++] ^= c;
1530
            out[n] = c ^ ctx->EKi.c[n];
1531
# endif
1532
0
            ++n;
1533
0
        }
1534
0
    }
1535
1536
0
    ctx->mres = mres;
1537
0
    return 0;
1538
0
#endif
1539
0
}
1540
1541
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1542
                         size_t len)
1543
0
{
1544
0
    DECLARE_IS_ENDIAN;
1545
0
    u64 alen = ctx->len.u[0] << 3;
1546
0
    u64 clen = ctx->len.u[1] << 3;
1547
1548
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1549
0
    u128 bitlen;
1550
0
    unsigned int mres = ctx->mres;
1551
1552
0
    if (mres) {
1553
0
        unsigned blocks = (mres + 15) & -16;
1554
1555
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1556
0
        mres = blocks;
1557
0
        if (mres == sizeof(ctx->Xn)) {
1558
0
            GHASH(ctx, ctx->Xn, mres);
1559
0
            mres = 0;
1560
0
        }
1561
0
    } else if (ctx->ares) {
1562
0
        GCM_MUL(ctx);
1563
0
    }
1564
#else
1565
    if (ctx->mres || ctx->ares)
1566
        GCM_MUL(ctx);
1567
#endif
1568
1569
0
    if (IS_LITTLE_ENDIAN) {
1570
#ifdef BSWAP8
1571
        alen = BSWAP8(alen);
1572
        clen = BSWAP8(clen);
1573
#else
1574
0
        u8 *p = ctx->len.c;
1575
1576
0
        ctx->len.u[0] = alen;
1577
0
        ctx->len.u[1] = clen;
1578
1579
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1580
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1581
0
#endif
1582
0
    }
1583
1584
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1585
0
    bitlen.hi = alen;
1586
0
    bitlen.lo = clen;
1587
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1588
0
    mres += sizeof(bitlen);
1589
0
    GHASH(ctx, ctx->Xn, mres);
1590
#else
1591
    ctx->Xi.u[0] ^= alen;
1592
    ctx->Xi.u[1] ^= clen;
1593
    GCM_MUL(ctx);
1594
#endif
1595
1596
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1597
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1598
1599
0
    if (tag && len <= sizeof(ctx->Xi))
1600
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1601
0
    else
1602
0
        return -1;
1603
0
}
1604
1605
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1606
0
{
1607
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1608
0
    memcpy(tag, ctx->Xi.c,
1609
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1610
0
}
1611
1612
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1613
0
{
1614
0
    GCM128_CONTEXT *ret;
1615
1616
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1617
0
        CRYPTO_gcm128_init(ret, key, block);
1618
1619
0
    return ret;
1620
0
}
1621
1622
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1623
0
{
1624
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1625
0
}