Coverage Report

Created: 2025-06-13 06:58

/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31
0
#define REDUCE1BIT(V)   do { \
32
0
        if (sizeof(size_t)==8) { \
33
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
35
0
                V.hi  = (V.hi>>1 )^T; \
36
0
        } \
37
0
        else { \
38
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41
0
        } \
42
0
} while(0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if     TABLE_BITS==8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
# ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
# else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
# endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219
#elif   TABLE_BITS==4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
# if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
# endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
# if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
# else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
# endif
269
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
        } else
283
            for (j = 0; j < 16; ++j) {
284
                V = Htable[j];
285
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287
            }
288
    }
289
# endif
290
0
}
291
292
# ifndef GHASH_ASM
293
static const size_t rem_4bit[16] = {
294
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298
};
299
300
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301
{
302
    u128 Z;
303
    int cnt = 15;
304
    size_t rem, nlo, nhi;
305
    DECLARE_IS_ENDIAN;
306
307
    nlo = ((const u8 *)Xi)[15];
308
    nhi = nlo >> 4;
309
    nlo &= 0xf;
310
311
    Z.hi = Htable[nlo].hi;
312
    Z.lo = Htable[nlo].lo;
313
314
    while (1) {
315
        rem = (size_t)Z.lo & 0xf;
316
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317
        Z.hi = (Z.hi >> 4);
318
        if (sizeof(size_t) == 8)
319
            Z.hi ^= rem_4bit[rem];
320
        else
321
            Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323
        Z.hi ^= Htable[nhi].hi;
324
        Z.lo ^= Htable[nhi].lo;
325
326
        if (--cnt < 0)
327
            break;
328
329
        nlo = ((const u8 *)Xi)[cnt];
330
        nhi = nlo >> 4;
331
        nlo &= 0xf;
332
333
        rem = (size_t)Z.lo & 0xf;
334
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335
        Z.hi = (Z.hi >> 4);
336
        if (sizeof(size_t) == 8)
337
            Z.hi ^= rem_4bit[rem];
338
        else
339
            Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341
        Z.hi ^= Htable[nlo].hi;
342
        Z.lo ^= Htable[nlo].lo;
343
    }
344
345
    if (IS_LITTLE_ENDIAN) {
346
#  ifdef BSWAP8
347
        Xi[0] = BSWAP8(Z.hi);
348
        Xi[1] = BSWAP8(Z.lo);
349
#  else
350
        u8 *p = (u8 *)Xi;
351
        u32 v;
352
        v = (u32)(Z.hi >> 32);
353
        PUTU32(p, v);
354
        v = (u32)(Z.hi);
355
        PUTU32(p + 4, v);
356
        v = (u32)(Z.lo >> 32);
357
        PUTU32(p + 8, v);
358
        v = (u32)(Z.lo);
359
        PUTU32(p + 12, v);
360
#  endif
361
    } else {
362
        Xi[0] = Z.hi;
363
        Xi[1] = Z.lo;
364
    }
365
}
366
367
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368
/*
369
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370
 * details... Compiler-generated code doesn't seem to give any
371
 * performance improvement, at least not on x86[_64]. It's here
372
 * mostly as reference and a placeholder for possible future
373
 * non-trivial optimization[s]...
374
 */
375
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376
                           const u8 *inp, size_t len)
377
{
378
    u128 Z;
379
    int cnt;
380
    size_t rem, nlo, nhi;
381
    DECLARE_IS_ENDIAN;
382
383
#   if 1
384
    do {
385
        cnt = 15;
386
        nlo = ((const u8 *)Xi)[15];
387
        nlo ^= inp[15];
388
        nhi = nlo >> 4;
389
        nlo &= 0xf;
390
391
        Z.hi = Htable[nlo].hi;
392
        Z.lo = Htable[nlo].lo;
393
394
        while (1) {
395
            rem = (size_t)Z.lo & 0xf;
396
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397
            Z.hi = (Z.hi >> 4);
398
            if (sizeof(size_t) == 8)
399
                Z.hi ^= rem_4bit[rem];
400
            else
401
                Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403
            Z.hi ^= Htable[nhi].hi;
404
            Z.lo ^= Htable[nhi].lo;
405
406
            if (--cnt < 0)
407
                break;
408
409
            nlo = ((const u8 *)Xi)[cnt];
410
            nlo ^= inp[cnt];
411
            nhi = nlo >> 4;
412
            nlo &= 0xf;
413
414
            rem = (size_t)Z.lo & 0xf;
415
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416
            Z.hi = (Z.hi >> 4);
417
            if (sizeof(size_t) == 8)
418
                Z.hi ^= rem_4bit[rem];
419
            else
420
                Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422
            Z.hi ^= Htable[nlo].hi;
423
            Z.lo ^= Htable[nlo].lo;
424
        }
425
#   else
426
    /*
427
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428
     * [should] give ~50% improvement... One could have PACK()-ed
429
     * the rem_8bit even here, but the priority is to minimize
430
     * cache footprint...
431
     */
432
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434
    static const unsigned short rem_8bit[256] = {
435
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467
    };
468
    /*
469
     * This pre-processing phase slows down procedure by approximately
470
     * same time as it makes each loop spin faster. In other words
471
     * single block performance is approximately same as straightforward
472
     * "4-bit" implementation, and then it goes only faster...
473
     */
474
    for (cnt = 0; cnt < 16; ++cnt) {
475
        Z.hi = Htable[cnt].hi;
476
        Z.lo = Htable[cnt].lo;
477
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478
        Hshr4[cnt].hi = (Z.hi >> 4);
479
        Hshl4[cnt] = (u8)(Z.lo << 4);
480
    }
481
482
    do {
483
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484
            nlo = ((const u8 *)Xi)[cnt];
485
            nlo ^= inp[cnt];
486
            nhi = nlo >> 4;
487
            nlo &= 0xf;
488
489
            Z.hi ^= Htable[nlo].hi;
490
            Z.lo ^= Htable[nlo].lo;
491
492
            rem = (size_t)Z.lo & 0xff;
493
494
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495
            Z.hi = (Z.hi >> 8);
496
497
            Z.hi ^= Hshr4[nhi].hi;
498
            Z.lo ^= Hshr4[nhi].lo;
499
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500
        }
501
502
        nlo = ((const u8 *)Xi)[0];
503
        nlo ^= inp[0];
504
        nhi = nlo >> 4;
505
        nlo &= 0xf;
506
507
        Z.hi ^= Htable[nlo].hi;
508
        Z.lo ^= Htable[nlo].lo;
509
510
        rem = (size_t)Z.lo & 0xf;
511
512
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513
        Z.hi = (Z.hi >> 4);
514
515
        Z.hi ^= Htable[nhi].hi;
516
        Z.lo ^= Htable[nhi].lo;
517
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518
#   endif
519
520
        if (IS_LITTLE_ENDIAN) {
521
#   ifdef BSWAP8
522
            Xi[0] = BSWAP8(Z.hi);
523
            Xi[1] = BSWAP8(Z.lo);
524
#   else
525
            u8 *p = (u8 *)Xi;
526
            u32 v;
527
            v = (u32)(Z.hi >> 32);
528
            PUTU32(p, v);
529
            v = (u32)(Z.hi);
530
            PUTU32(p + 4, v);
531
            v = (u32)(Z.lo >> 32);
532
            PUTU32(p + 8, v);
533
            v = (u32)(Z.lo);
534
            PUTU32(p + 12, v);
535
#   endif
536
        } else {
537
            Xi[0] = Z.hi;
538
            Xi[1] = Z.lo;
539
        }
540
    } while (inp += 16, len -= 16);
541
}
542
#  endif
543
# else
544
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546
                    size_t len);
547
# endif
548
549
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552
/*
553
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554
 * effect. In other words idea is to hash data while it's still in L1 cache
555
 * after encryption pass...
556
 */
557
2.24M
#  define GHASH_CHUNK       (3*1024)
558
# endif
559
560
#else                           /* TABLE_BITS */
561
562
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563
{
564
    u128 V, Z = { 0, 0 };
565
    long X;
566
    int i, j;
567
    const long *xi = (const long *)Xi;
568
    DECLARE_IS_ENDIAN;
569
570
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571
    V.lo = H[1];
572
573
    for (j = 0; j < 16 / sizeof(long); ++j) {
574
        if (IS_LITTLE_ENDIAN) {
575
            if (sizeof(long) == 8) {
576
# ifdef BSWAP8
577
                X = (long)(BSWAP8(xi[j]));
578
# else
579
                const u8 *p = (const u8 *)(xi + j);
580
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581
# endif
582
            } else {
583
                const u8 *p = (const u8 *)(xi + j);
584
                X = (long)GETU32(p);
585
            }
586
        } else
587
            X = xi[j];
588
589
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591
            Z.hi ^= V.hi & M;
592
            Z.lo ^= V.lo & M;
593
594
            REDUCE1BIT(V);
595
        }
596
    }
597
598
    if (IS_LITTLE_ENDIAN) {
599
# ifdef BSWAP8
600
        Xi[0] = BSWAP8(Z.hi);
601
        Xi[1] = BSWAP8(Z.lo);
602
# else
603
        u8 *p = (u8 *)Xi;
604
        u32 v;
605
        v = (u32)(Z.hi >> 32);
606
        PUTU32(p, v);
607
        v = (u32)(Z.hi);
608
        PUTU32(p + 4, v);
609
        v = (u32)(Z.lo >> 32);
610
        PUTU32(p + 8, v);
611
        v = (u32)(Z.lo);
612
        PUTU32(p + 12, v);
613
# endif
614
    } else {
615
        Xi[0] = Z.hi;
616
        Xi[1] = Z.lo;
617
    }
618
}
619
620
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622
#endif
623
624
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625
# if    !defined(I386_ONLY) && \
626
        (defined(__i386)        || defined(__i386__)    || \
627
         defined(__x86_64)      || defined(__x86_64__)  || \
628
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629
#  define GHASH_ASM_X86_OR_64
630
#  define GCM_FUNCREF_4BIT
631
632
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635
                     size_t len);
636
637
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638
#   define gcm_init_avx   gcm_init_clmul
639
#   define gcm_gmult_avx  gcm_gmult_clmul
640
#   define gcm_ghash_avx  gcm_ghash_clmul
641
#  else
642
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645
                   size_t len);
646
#  endif
647
648
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649
#   define GHASH_ASM_X86
650
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652
                        size_t len);
653
654
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656
                        size_t len);
657
#  endif
658
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659
#  include "arm_arch.h"
660
#  if __ARM_MAX_ARCH__>=7
661
#   define GHASH_ASM_ARM
662
#   define GCM_FUNCREF_4BIT
663
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664
#   if defined(__arm__) || defined(__arm)
665
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666
#   endif
667
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                    size_t len);
671
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674
                  size_t len);
675
#  endif
676
# elif defined(__sparc__) || defined(__sparc)
677
#  include "crypto/sparc_arch.h"
678
#  define GHASH_ASM_SPARC
679
#  define GCM_FUNCREF_4BIT
680
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683
                    size_t len);
684
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685
#  include "crypto/ppc_arch.h"
686
#  define GHASH_ASM_PPC
687
#  define GCM_FUNCREF_4BIT
688
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691
                  size_t len);
692
# endif
693
#endif
694
695
#ifdef GCM_FUNCREF_4BIT
696
# undef  GCM_MUL
697
196k
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698
# ifdef GHASH
699
#  undef  GHASH
700
3.19M
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701
# endif
702
#endif
703
704
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705
792
{
706
792
    DECLARE_IS_ENDIAN;
707
708
792
    memset(ctx, 0, sizeof(*ctx));
709
792
    ctx->block = block;
710
792
    ctx->key = key;
711
712
792
    (*block) (ctx->H.c, ctx->H.c, key);
713
714
792
    if (IS_LITTLE_ENDIAN) {
715
        /* H is stored in host byte order */
716
#ifdef BSWAP8
717
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719
#else
720
792
        u8 *p = ctx->H.c;
721
792
        u64 hi, lo;
722
792
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723
792
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724
792
        ctx->H.u[0] = hi;
725
792
        ctx->H.u[1] = lo;
726
792
#endif
727
792
    }
728
#if     TABLE_BITS==8
729
    gcm_init_8bit(ctx->Htable, ctx->H.u);
730
#elif   TABLE_BITS==4
731
# if    defined(GHASH)
732
792
#  define CTX__GHASH(f) (ctx->ghash = (f))
733
# else
734
#  define CTX__GHASH(f) (ctx->ghash = NULL)
735
# endif
736
792
# if    defined(GHASH_ASM_X86_OR_64)
737
792
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738
792
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739
792
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740
792
            gcm_init_avx(ctx->Htable, ctx->H.u);
741
792
            ctx->gmult = gcm_gmult_avx;
742
792
            CTX__GHASH(gcm_ghash_avx);
743
792
        } else {
744
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
745
0
            ctx->gmult = gcm_gmult_clmul;
746
0
            CTX__GHASH(gcm_ghash_clmul);
747
0
        }
748
792
        return;
749
792
    }
750
0
#  endif
751
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
752
#  if   defined(GHASH_ASM_X86)  /* x86 only */
753
#   if  defined(OPENSSL_IA32_SSE2)
754
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755
#   else
756
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757
#   endif
758
        ctx->gmult = gcm_gmult_4bit_mmx;
759
        CTX__GHASH(gcm_ghash_4bit_mmx);
760
    } else {
761
        ctx->gmult = gcm_gmult_4bit_x86;
762
        CTX__GHASH(gcm_ghash_4bit_x86);
763
    }
764
#  else
765
0
    ctx->gmult = gcm_gmult_4bit;
766
0
    CTX__GHASH(gcm_ghash_4bit);
767
0
#  endif
768
# elif  defined(GHASH_ASM_ARM)
769
#  ifdef PMULL_CAPABLE
770
    if (PMULL_CAPABLE) {
771
        gcm_init_v8(ctx->Htable, ctx->H.u);
772
        ctx->gmult = gcm_gmult_v8;
773
        CTX__GHASH(gcm_ghash_v8);
774
    } else
775
#  endif
776
#  ifdef NEON_CAPABLE
777
    if (NEON_CAPABLE) {
778
        gcm_init_neon(ctx->Htable, ctx->H.u);
779
        ctx->gmult = gcm_gmult_neon;
780
        CTX__GHASH(gcm_ghash_neon);
781
    } else
782
#  endif
783
    {
784
        gcm_init_4bit(ctx->Htable, ctx->H.u);
785
        ctx->gmult = gcm_gmult_4bit;
786
        CTX__GHASH(gcm_ghash_4bit);
787
    }
788
# elif  defined(GHASH_ASM_SPARC)
789
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790
        gcm_init_vis3(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_vis3;
792
        CTX__GHASH(gcm_ghash_vis3);
793
    } else {
794
        gcm_init_4bit(ctx->Htable, ctx->H.u);
795
        ctx->gmult = gcm_gmult_4bit;
796
        CTX__GHASH(gcm_ghash_4bit);
797
    }
798
# elif  defined(GHASH_ASM_PPC)
799
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800
        gcm_init_p8(ctx->Htable, ctx->H.u);
801
        ctx->gmult = gcm_gmult_p8;
802
        CTX__GHASH(gcm_ghash_p8);
803
    } else {
804
        gcm_init_4bit(ctx->Htable, ctx->H.u);
805
        ctx->gmult = gcm_gmult_4bit;
806
        CTX__GHASH(gcm_ghash_4bit);
807
    }
808
# else
809
    gcm_init_4bit(ctx->Htable, ctx->H.u);
810
# endif
811
0
# undef CTX__GHASH
812
0
#endif
813
0
}
814
815
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816
                         size_t len)
817
1.50M
{
818
1.50M
    DECLARE_IS_ENDIAN;
819
1.50M
    unsigned int ctr;
820
1.50M
#ifdef GCM_FUNCREF_4BIT
821
1.50M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822
1.50M
#endif
823
824
1.50M
    ctx->len.u[0] = 0;          /* AAD length */
825
1.50M
    ctx->len.u[1] = 0;          /* message length */
826
1.50M
    ctx->ares = 0;
827
1.50M
    ctx->mres = 0;
828
829
1.50M
    if (len == 12) {
830
1.50M
        memcpy(ctx->Yi.c, iv, 12);
831
1.50M
        ctx->Yi.c[12] = 0;
832
1.50M
        ctx->Yi.c[13] = 0;
833
1.50M
        ctx->Yi.c[14] = 0;
834
1.50M
        ctx->Yi.c[15] = 1;
835
1.50M
        ctr = 1;
836
1.50M
    } else {
837
0
        size_t i;
838
0
        u64 len0 = len;
839
840
        /* Borrow ctx->Xi to calculate initial Yi */
841
0
        ctx->Xi.u[0] = 0;
842
0
        ctx->Xi.u[1] = 0;
843
844
0
        while (len >= 16) {
845
0
            for (i = 0; i < 16; ++i)
846
0
                ctx->Xi.c[i] ^= iv[i];
847
0
            GCM_MUL(ctx);
848
0
            iv += 16;
849
0
            len -= 16;
850
0
        }
851
0
        if (len) {
852
0
            for (i = 0; i < len; ++i)
853
0
                ctx->Xi.c[i] ^= iv[i];
854
0
            GCM_MUL(ctx);
855
0
        }
856
0
        len0 <<= 3;
857
0
        if (IS_LITTLE_ENDIAN) {
858
#ifdef BSWAP8
859
            ctx->Xi.u[1] ^= BSWAP8(len0);
860
#else
861
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868
0
            ctx->Xi.c[15] ^= (u8)(len0);
869
0
#endif
870
0
        } else {
871
0
            ctx->Xi.u[1] ^= len0;
872
0
        }
873
874
0
        GCM_MUL(ctx);
875
876
0
        if (IS_LITTLE_ENDIAN)
877
#ifdef BSWAP4
878
            ctr = BSWAP4(ctx->Xi.d[3]);
879
#else
880
0
            ctr = GETU32(ctx->Xi.c + 12);
881
0
#endif
882
0
        else
883
0
            ctr = ctx->Xi.d[3];
884
885
        /* Copy borrowed Xi to Yi */
886
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
887
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
888
0
    }
889
890
1.50M
    ctx->Xi.u[0] = 0;
891
1.50M
    ctx->Xi.u[1] = 0;
892
893
1.50M
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894
1.50M
    ++ctr;
895
1.50M
    if (IS_LITTLE_ENDIAN)
896
#ifdef BSWAP4
897
        ctx->Yi.d[3] = BSWAP4(ctr);
898
#else
899
1.50M
        PUTU32(ctx->Yi.c + 12, ctr);
900
0
#endif
901
0
    else
902
0
        ctx->Yi.d[3] = ctr;
903
1.50M
}
904
905
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906
                      size_t len)
907
1.60M
{
908
1.60M
    size_t i;
909
1.60M
    unsigned int n;
910
1.60M
    u64 alen = ctx->len.u[0];
911
1.60M
#ifdef GCM_FUNCREF_4BIT
912
1.60M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913
1.60M
# ifdef GHASH
914
1.60M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915
1.60M
                         const u8 *inp, size_t len) = ctx->ghash;
916
1.60M
# endif
917
1.60M
#endif
918
919
1.60M
    if (ctx->len.u[1])
920
0
        return -2;
921
922
1.60M
    alen += len;
923
1.60M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924
0
        return -1;
925
1.60M
    ctx->len.u[0] = alen;
926
927
1.60M
    n = ctx->ares;
928
1.60M
    if (n) {
929
452k
        while (n && len) {
930
358k
            ctx->Xi.c[n] ^= *(aad++);
931
358k
            --len;
932
358k
            n = (n + 1) % 16;
933
358k
        }
934
94.8k
        if (n == 0)
935
9.12k
            GCM_MUL(ctx);
936
85.7k
        else {
937
85.7k
            ctx->ares = n;
938
85.7k
            return 0;
939
85.7k
        }
940
94.8k
    }
941
1.51M
#ifdef GHASH
942
1.51M
    if ((i = (len & (size_t)-16))) {
943
529k
        GHASH(ctx, aad, i);
944
529k
        aad += i;
945
529k
        len -= i;
946
529k
    }
947
#else
948
    while (len >= 16) {
949
        for (i = 0; i < 16; ++i)
950
            ctx->Xi.c[i] ^= aad[i];
951
        GCM_MUL(ctx);
952
        aad += 16;
953
        len -= 16;
954
    }
955
#endif
956
1.51M
    if (len) {
957
1.34M
        n = (unsigned int)len;
958
15.6M
        for (i = 0; i < len; ++i)
959
14.2M
            ctx->Xi.c[i] ^= aad[i];
960
1.34M
    }
961
962
1.51M
    ctx->ares = n;
963
1.51M
    return 0;
964
1.60M
}
965
966
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967
                          const unsigned char *in, unsigned char *out,
968
                          size_t len)
969
611k
{
970
611k
    DECLARE_IS_ENDIAN;
971
611k
    unsigned int n, ctr, mres;
972
611k
    size_t i;
973
611k
    u64 mlen = ctx->len.u[1];
974
611k
    block128_f block = ctx->block;
975
611k
    void *key = ctx->key;
976
611k
#ifdef GCM_FUNCREF_4BIT
977
611k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978
611k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979
611k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980
611k
                         const u8 *inp, size_t len) = ctx->ghash;
981
611k
# endif
982
611k
#endif
983
984
611k
    mlen += len;
985
611k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
611k
    ctx->len.u[1] = mlen;
988
989
611k
    mres = ctx->mres;
990
991
611k
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
4.90k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
4.90k
        if (len == 0) {
995
4.45k
            GCM_MUL(ctx);
996
4.45k
            ctx->ares = 0;
997
4.45k
            return 0;
998
4.45k
        }
999
449
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
449
        ctx->Xi.u[0] = 0;
1001
449
        ctx->Xi.u[1] = 0;
1002
449
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
449
        ctx->ares = 0;
1007
449
    }
1008
1009
606k
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
606k
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
606k
    n = mres % 16;
1019
606k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
606k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
606k
        do {
1022
606k
            if (n) {
1023
596k
# if defined(GHASH)
1024
8.82M
                while (n && len) {
1025
8.23M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
8.23M
                    --len;
1027
8.23M
                    n = (n + 1) % 16;
1028
8.23M
                }
1029
596k
                if (n == 0) {
1030
596k
                    GHASH(ctx, ctx->Xn, mres);
1031
596k
                    mres = 0;
1032
596k
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
# else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
# endif
1050
596k
            }
1051
606k
# if defined(STRICT_ALIGNMENT)
1052
606k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
603k
                break;
1054
3.16k
# endif
1055
3.16k
# if defined(GHASH)
1056
3.16k
            if (len >= 16 && mres) {
1057
234
                GHASH(ctx, ctx->Xn, mres);
1058
234
                mres = 0;
1059
234
            }
1060
3.16k
#  if defined(GHASH_CHUNK)
1061
3.16k
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#   ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#   else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#   endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
3.16k
#  endif
1088
3.16k
            if ((i = (len & (size_t)-16))) {
1089
234
                size_t j = i;
1090
1091
468
                while (len >= 16) {
1092
234
                    size_t_aX *out_t = (size_t_aX *)out;
1093
234
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
234
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096
234
                    ++ctr;
1097
234
                    if (IS_LITTLE_ENDIAN)
1098
#  ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#  else
1101
234
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#  endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
702
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
468
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
234
                    out += 16;
1108
234
                    in += 16;
1109
234
                    len -= 16;
1110
234
                }
1111
234
                GHASH(ctx, out - j, j);
1112
234
            }
1113
# else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#  ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#  else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#  endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
# endif
1136
3.16k
            if (len) {
1137
215
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
215
                ++ctr;
1139
215
                if (IS_LITTLE_ENDIAN)
1140
# ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
# else
1143
215
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
# endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
215
# if defined(GHASH)
1148
645
                while (len--) {
1149
430
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
430
                    ++n;
1151
430
                }
1152
# else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
# endif
1159
215
            }
1160
1161
3.16k
            ctx->mres = mres;
1162
3.16k
            return 0;
1163
606k
        } while (0);
1164
606k
    }
1165
603k
#endif
1166
603k
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
603k
    ctx->mres = mres;
1195
603k
    return 0;
1196
606k
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
                          const unsigned char *in, unsigned char *out,
1200
                          size_t len)
1201
251k
{
1202
251k
    DECLARE_IS_ENDIAN;
1203
251k
    unsigned int n, ctr, mres;
1204
251k
    size_t i;
1205
251k
    u64 mlen = ctx->len.u[1];
1206
251k
    block128_f block = ctx->block;
1207
251k
    void *key = ctx->key;
1208
251k
#ifdef GCM_FUNCREF_4BIT
1209
251k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
251k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
251k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212
251k
                         const u8 *inp, size_t len) = ctx->ghash;
1213
251k
# endif
1214
251k
#endif
1215
1216
251k
    mlen += len;
1217
251k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218
0
        return -1;
1219
251k
    ctx->len.u[1] = mlen;
1220
1221
251k
    mres = ctx->mres;
1222
1223
251k
    if (ctx->ares) {
1224
        /* First call to decrypt finalizes GHASH(AAD) */
1225
95.1k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226
95.1k
        if (len == 0) {
1227
88.1k
            GCM_MUL(ctx);
1228
88.1k
            ctx->ares = 0;
1229
88.1k
            return 0;
1230
88.1k
        }
1231
7.00k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232
7.00k
        ctx->Xi.u[0] = 0;
1233
7.00k
        ctx->Xi.u[1] = 0;
1234
7.00k
        mres = sizeof(ctx->Xi);
1235
#else
1236
        GCM_MUL(ctx);
1237
#endif
1238
7.00k
        ctx->ares = 0;
1239
7.00k
    }
1240
1241
163k
    if (IS_LITTLE_ENDIAN)
1242
#ifdef BSWAP4
1243
        ctr = BSWAP4(ctx->Yi.d[3]);
1244
#else
1245
163k
        ctr = GETU32(ctx->Yi.c + 12);
1246
0
#endif
1247
0
    else
1248
0
        ctr = ctx->Yi.d[3];
1249
1250
163k
    n = mres % 16;
1251
163k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252
163k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253
163k
        do {
1254
163k
            if (n) {
1255
0
# if defined(GHASH)
1256
0
                while (n && len) {
1257
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258
0
                    --len;
1259
0
                    n = (n + 1) % 16;
1260
0
                }
1261
0
                if (n == 0) {
1262
0
                    GHASH(ctx, ctx->Xn, mres);
1263
0
                    mres = 0;
1264
0
                } else {
1265
0
                    ctx->mres = mres;
1266
0
                    return 0;
1267
0
                }
1268
# else
1269
                while (n && len) {
1270
                    u8 c = *(in++);
1271
                    *(out++) = c ^ ctx->EKi.c[n];
1272
                    ctx->Xi.c[n] ^= c;
1273
                    --len;
1274
                    n = (n + 1) % 16;
1275
                }
1276
                if (n == 0) {
1277
                    GCM_MUL(ctx);
1278
                    mres = 0;
1279
                } else {
1280
                    ctx->mres = n;
1281
                    return 0;
1282
                }
1283
# endif
1284
0
            }
1285
163k
# if defined(STRICT_ALIGNMENT)
1286
163k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
391
                break;
1288
163k
# endif
1289
163k
# if defined(GHASH)
1290
163k
            if (len >= 16 && mres) {
1291
1.88k
                GHASH(ctx, ctx->Xn, mres);
1292
1.88k
                mres = 0;
1293
1.88k
            }
1294
163k
#  if defined(GHASH_CHUNK)
1295
164k
            while (len >= GHASH_CHUNK) {
1296
1.27k
                size_t j = GHASH_CHUNK;
1297
1298
1.27k
                GHASH(ctx, in, GHASH_CHUNK);
1299
246k
                while (j) {
1300
245k
                    size_t_aX *out_t = (size_t_aX *)out;
1301
245k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1302
1303
245k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304
245k
                    ++ctr;
1305
245k
                    if (IS_LITTLE_ENDIAN)
1306
#   ifdef BSWAP4
1307
                        ctx->Yi.d[3] = BSWAP4(ctr);
1308
#   else
1309
245k
                        PUTU32(ctx->Yi.c + 12, ctr);
1310
0
#   endif
1311
0
                    else
1312
0
                        ctx->Yi.d[3] = ctr;
1313
735k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314
490k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315
245k
                    out += 16;
1316
245k
                    in += 16;
1317
245k
                    j -= 16;
1318
245k
                }
1319
1.27k
                len -= GHASH_CHUNK;
1320
1.27k
            }
1321
163k
#  endif
1322
163k
            if ((i = (len & (size_t)-16))) {
1323
1.86k
                GHASH(ctx, in, i);
1324
25.0k
                while (len >= 16) {
1325
23.1k
                    size_t_aX *out_t = (size_t_aX *)out;
1326
23.1k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1327
1328
23.1k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329
23.1k
                    ++ctr;
1330
23.1k
                    if (IS_LITTLE_ENDIAN)
1331
#  ifdef BSWAP4
1332
                        ctx->Yi.d[3] = BSWAP4(ctr);
1333
#  else
1334
23.1k
                        PUTU32(ctx->Yi.c + 12, ctr);
1335
0
#  endif
1336
0
                    else
1337
0
                        ctx->Yi.d[3] = ctr;
1338
69.5k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339
46.3k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340
23.1k
                    out += 16;
1341
23.1k
                    in += 16;
1342
23.1k
                    len -= 16;
1343
23.1k
                }
1344
1.86k
            }
1345
# else
1346
            while (len >= 16) {
1347
                size_t *out_t = (size_t *)out;
1348
                const size_t *in_t = (const size_t *)in;
1349
1350
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351
                ++ctr;
1352
                if (IS_LITTLE_ENDIAN)
1353
#  ifdef BSWAP4
1354
                    ctx->Yi.d[3] = BSWAP4(ctr);
1355
#  else
1356
                    PUTU32(ctx->Yi.c + 12, ctr);
1357
#  endif
1358
                else
1359
                    ctx->Yi.d[3] = ctr;
1360
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361
                    size_t c = in_t[i];
1362
                    out_t[i] = c ^ ctx->EKi.t[i];
1363
                    ctx->Xi.t[i] ^= c;
1364
                }
1365
                GCM_MUL(ctx);
1366
                out += 16;
1367
                in += 16;
1368
                len -= 16;
1369
            }
1370
# endif
1371
163k
            if (len) {
1372
6.69k
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373
6.69k
                ++ctr;
1374
6.69k
                if (IS_LITTLE_ENDIAN)
1375
# ifdef BSWAP4
1376
                    ctx->Yi.d[3] = BSWAP4(ctr);
1377
# else
1378
6.69k
                    PUTU32(ctx->Yi.c + 12, ctr);
1379
0
# endif
1380
0
                else
1381
0
                    ctx->Yi.d[3] = ctr;
1382
6.69k
# if defined(GHASH)
1383
65.6k
                while (len--) {
1384
58.9k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385
58.9k
                    ++n;
1386
58.9k
                }
1387
# else
1388
                while (len--) {
1389
                    u8 c = in[n];
1390
                    ctx->Xi.c[n] ^= c;
1391
                    out[n] = c ^ ctx->EKi.c[n];
1392
                    ++n;
1393
                }
1394
                mres = n;
1395
# endif
1396
6.69k
            }
1397
1398
163k
            ctx->mres = mres;
1399
163k
            return 0;
1400
163k
        } while (0);
1401
163k
    }
1402
391
#endif
1403
391
    for (i = 0; i < len; ++i) {
1404
0
        u8 c;
1405
0
        if (n == 0) {
1406
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407
0
            ++ctr;
1408
0
            if (IS_LITTLE_ENDIAN)
1409
#ifdef BSWAP4
1410
                ctx->Yi.d[3] = BSWAP4(ctr);
1411
#else
1412
0
                PUTU32(ctx->Yi.c + 12, ctr);
1413
0
#endif
1414
0
            else
1415
0
                ctx->Yi.d[3] = ctr;
1416
0
        }
1417
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419
0
        n = (n + 1) % 16;
1420
0
        if (mres == sizeof(ctx->Xn)) {
1421
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422
0
            mres = 0;
1423
0
        }
1424
#else
1425
        c = in[i];
1426
        out[i] = c ^ ctx->EKi.c[n];
1427
        ctx->Xi.c[n] ^= c;
1428
        mres = n = (n + 1) % 16;
1429
        if (n == 0)
1430
            GCM_MUL(ctx);
1431
#endif
1432
0
    }
1433
1434
391
    ctx->mres = mres;
1435
391
    return 0;
1436
163k
}
1437
1438
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439
                                const unsigned char *in, unsigned char *out,
1440
                                size_t len, ctr128_f stream)
1441
1.55M
{
1442
#if defined(OPENSSL_SMALL_FOOTPRINT)
1443
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444
#else
1445
1.55M
    DECLARE_IS_ENDIAN;
1446
1.55M
    unsigned int n, ctr, mres;
1447
1.55M
    size_t i;
1448
1.55M
    u64 mlen = ctx->len.u[1];
1449
1.55M
    void *key = ctx->key;
1450
1.55M
# ifdef GCM_FUNCREF_4BIT
1451
1.55M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452
1.55M
#  ifdef GHASH
1453
1.55M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454
1.55M
                         const u8 *inp, size_t len) = ctx->ghash;
1455
1.55M
#  endif
1456
1.55M
# endif
1457
1458
1.55M
    mlen += len;
1459
1.55M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460
0
        return -1;
1461
1.55M
    ctx->len.u[1] = mlen;
1462
1463
1.55M
    mres = ctx->mres;
1464
1465
1.55M
    if (ctx->ares) {
1466
        /* First call to encrypt finalizes GHASH(AAD) */
1467
776k
#if defined(GHASH)
1468
776k
        if (len == 0) {
1469
0
            GCM_MUL(ctx);
1470
0
            ctx->ares = 0;
1471
0
            return 0;
1472
0
        }
1473
776k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474
776k
        ctx->Xi.u[0] = 0;
1475
776k
        ctx->Xi.u[1] = 0;
1476
776k
        mres = sizeof(ctx->Xi);
1477
#else
1478
        GCM_MUL(ctx);
1479
#endif
1480
776k
        ctx->ares = 0;
1481
776k
    }
1482
1483
1.55M
    if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
        ctr = BSWAP4(ctx->Yi.d[3]);
1486
# else
1487
1.55M
        ctr = GETU32(ctx->Yi.c + 12);
1488
0
# endif
1489
0
    else
1490
0
        ctr = ctx->Yi.d[3];
1491
1492
1.55M
    n = mres % 16;
1493
1.55M
    if (n) {
1494
154k
# if defined(GHASH)
1495
930k
        while (n && len) {
1496
775k
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497
775k
            --len;
1498
775k
            n = (n + 1) % 16;
1499
775k
        }
1500
154k
        if (n == 0) {
1501
72.4k
            GHASH(ctx, ctx->Xn, mres);
1502
72.4k
            mres = 0;
1503
82.5k
        } else {
1504
82.5k
            ctx->mres = mres;
1505
82.5k
            return 0;
1506
82.5k
        }
1507
# else
1508
        while (n && len) {
1509
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510
            --len;
1511
            n = (n + 1) % 16;
1512
        }
1513
        if (n == 0) {
1514
            GCM_MUL(ctx);
1515
            mres = 0;
1516
        } else {
1517
            ctx->mres = n;
1518
            return 0;
1519
        }
1520
# endif
1521
154k
    }
1522
1.46M
# if defined(GHASH)
1523
1.46M
        if (len >= 16 && mres) {
1524
48.2k
            GHASH(ctx, ctx->Xn, mres);
1525
48.2k
            mres = 0;
1526
48.2k
        }
1527
1.46M
#  if defined(GHASH_CHUNK)
1528
1.46M
    while (len >= GHASH_CHUNK) {
1529
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530
0
        ctr += GHASH_CHUNK / 16;
1531
0
        if (IS_LITTLE_ENDIAN)
1532
#   ifdef BSWAP4
1533
            ctx->Yi.d[3] = BSWAP4(ctr);
1534
#   else
1535
0
            PUTU32(ctx->Yi.c + 12, ctr);
1536
0
#   endif
1537
0
        else
1538
0
            ctx->Yi.d[3] = ctr;
1539
0
        GHASH(ctx, out, GHASH_CHUNK);
1540
0
        out += GHASH_CHUNK;
1541
0
        in += GHASH_CHUNK;
1542
0
        len -= GHASH_CHUNK;
1543
0
    }
1544
1.46M
#  endif
1545
1.46M
# endif
1546
1.46M
    if ((i = (len & (size_t)-16))) {
1547
221k
        size_t j = i / 16;
1548
1549
221k
        (*stream) (in, out, j, key, ctx->Yi.c);
1550
221k
        ctr += (unsigned int)j;
1551
221k
        if (IS_LITTLE_ENDIAN)
1552
# ifdef BSWAP4
1553
            ctx->Yi.d[3] = BSWAP4(ctr);
1554
# else
1555
221k
            PUTU32(ctx->Yi.c + 12, ctr);
1556
0
# endif
1557
0
        else
1558
0
            ctx->Yi.d[3] = ctr;
1559
221k
        in += i;
1560
221k
        len -= i;
1561
221k
# if defined(GHASH)
1562
221k
        GHASH(ctx, out, i);
1563
221k
        out += i;
1564
# else
1565
        while (j--) {
1566
            for (i = 0; i < 16; ++i)
1567
                ctx->Xi.c[i] ^= out[i];
1568
            GCM_MUL(ctx);
1569
            out += 16;
1570
        }
1571
# endif
1572
221k
    }
1573
1.46M
    if (len) {
1574
1.44M
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575
1.44M
        ++ctr;
1576
1.44M
        if (IS_LITTLE_ENDIAN)
1577
# ifdef BSWAP4
1578
            ctx->Yi.d[3] = BSWAP4(ctr);
1579
# else
1580
1.44M
            PUTU32(ctx->Yi.c + 12, ctr);
1581
0
# endif
1582
0
        else
1583
0
            ctx->Yi.d[3] = ctr;
1584
5.73M
        while (len--) {
1585
4.28M
# if defined(GHASH)
1586
4.28M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587
# else
1588
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589
# endif
1590
4.28M
            ++n;
1591
4.28M
        }
1592
1.44M
    }
1593
1594
1.46M
    ctx->mres = mres;
1595
1.46M
    return 0;
1596
1.55M
#endif
1597
1.55M
}
1598
1599
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600
                                const unsigned char *in, unsigned char *out,
1601
                                size_t len, ctr128_f stream)
1602
606k
{
1603
#if defined(OPENSSL_SMALL_FOOTPRINT)
1604
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605
#else
1606
606k
    DECLARE_IS_ENDIAN;
1607
606k
    unsigned int n, ctr, mres;
1608
606k
    size_t i;
1609
606k
    u64 mlen = ctx->len.u[1];
1610
606k
    void *key = ctx->key;
1611
606k
# ifdef GCM_FUNCREF_4BIT
1612
606k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613
606k
#  ifdef GHASH
1614
606k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615
606k
                         const u8 *inp, size_t len) = ctx->ghash;
1616
606k
#  endif
1617
606k
# endif
1618
1619
606k
    mlen += len;
1620
606k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621
0
        return -1;
1622
606k
    ctx->len.u[1] = mlen;
1623
1624
606k
    mres = ctx->mres;
1625
1626
606k
    if (ctx->ares) {
1627
        /* First call to decrypt finalizes GHASH(AAD) */
1628
361k
# if defined(GHASH)
1629
361k
        if (len == 0) {
1630
1.43k
            GCM_MUL(ctx);
1631
1.43k
            ctx->ares = 0;
1632
1.43k
            return 0;
1633
1.43k
        }
1634
360k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635
360k
        ctx->Xi.u[0] = 0;
1636
360k
        ctx->Xi.u[1] = 0;
1637
360k
        mres = sizeof(ctx->Xi);
1638
# else
1639
        GCM_MUL(ctx);
1640
# endif
1641
360k
        ctx->ares = 0;
1642
360k
    }
1643
1644
604k
    if (IS_LITTLE_ENDIAN)
1645
# ifdef BSWAP4
1646
        ctr = BSWAP4(ctx->Yi.d[3]);
1647
# else
1648
604k
        ctr = GETU32(ctx->Yi.c + 12);
1649
0
# endif
1650
0
    else
1651
0
        ctr = ctx->Yi.d[3];
1652
1653
604k
    n = mres % 16;
1654
604k
    if (n) {
1655
0
# if defined(GHASH)
1656
0
        while (n && len) {
1657
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658
0
            --len;
1659
0
            n = (n + 1) % 16;
1660
0
        }
1661
0
        if (n == 0) {
1662
0
            GHASH(ctx, ctx->Xn, mres);
1663
0
            mres = 0;
1664
0
        } else {
1665
0
            ctx->mres = mres;
1666
0
            return 0;
1667
0
        }
1668
# else
1669
        while (n && len) {
1670
            u8 c = *(in++);
1671
            *(out++) = c ^ ctx->EKi.c[n];
1672
            ctx->Xi.c[n] ^= c;
1673
            --len;
1674
            n = (n + 1) % 16;
1675
        }
1676
        if (n == 0) {
1677
            GCM_MUL(ctx);
1678
            mres = 0;
1679
        } else {
1680
            ctx->mres = n;
1681
            return 0;
1682
        }
1683
# endif
1684
0
    }
1685
604k
# if defined(GHASH)
1686
604k
    if (len >= 16 && mres) {
1687
0
        GHASH(ctx, ctx->Xn, mres);
1688
0
        mres = 0;
1689
0
    }
1690
604k
#  if defined(GHASH_CHUNK)
1691
604k
    while (len >= GHASH_CHUNK) {
1692
0
        GHASH(ctx, in, GHASH_CHUNK);
1693
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694
0
        ctr += GHASH_CHUNK / 16;
1695
0
        if (IS_LITTLE_ENDIAN)
1696
#   ifdef BSWAP4
1697
            ctx->Yi.d[3] = BSWAP4(ctr);
1698
#   else
1699
0
            PUTU32(ctx->Yi.c + 12, ctr);
1700
0
#   endif
1701
0
        else
1702
0
            ctx->Yi.d[3] = ctr;
1703
0
        out += GHASH_CHUNK;
1704
0
        in += GHASH_CHUNK;
1705
0
        len -= GHASH_CHUNK;
1706
0
    }
1707
604k
#  endif
1708
604k
# endif
1709
604k
    if ((i = (len & (size_t)-16))) {
1710
219k
        size_t j = i / 16;
1711
1712
219k
# if defined(GHASH)
1713
219k
        GHASH(ctx, in, i);
1714
# else
1715
        while (j--) {
1716
            size_t k;
1717
            for (k = 0; k < 16; ++k)
1718
                ctx->Xi.c[k] ^= in[k];
1719
            GCM_MUL(ctx);
1720
            in += 16;
1721
        }
1722
        j = i / 16;
1723
        in -= i;
1724
# endif
1725
219k
        (*stream) (in, out, j, key, ctx->Yi.c);
1726
219k
        ctr += (unsigned int)j;
1727
219k
        if (IS_LITTLE_ENDIAN)
1728
# ifdef BSWAP4
1729
            ctx->Yi.d[3] = BSWAP4(ctr);
1730
# else
1731
219k
            PUTU32(ctx->Yi.c + 12, ctr);
1732
0
# endif
1733
0
        else
1734
0
            ctx->Yi.d[3] = ctr;
1735
219k
        out += i;
1736
219k
        in += i;
1737
219k
        len -= i;
1738
219k
    }
1739
604k
    if (len) {
1740
593k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741
593k
        ++ctr;
1742
593k
        if (IS_LITTLE_ENDIAN)
1743
# ifdef BSWAP4
1744
            ctx->Yi.d[3] = BSWAP4(ctr);
1745
# else
1746
593k
            PUTU32(ctx->Yi.c + 12, ctr);
1747
0
# endif
1748
0
        else
1749
0
            ctx->Yi.d[3] = ctr;
1750
2.53M
        while (len--) {
1751
1.93M
# if defined(GHASH)
1752
1.93M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753
# else
1754
            u8 c = in[n];
1755
            ctx->Xi.c[mres++] ^= c;
1756
            out[n] = c ^ ctx->EKi.c[n];
1757
# endif
1758
1.93M
            ++n;
1759
1.93M
        }
1760
593k
    }
1761
1762
604k
    ctx->mres = mres;
1763
604k
    return 0;
1764
604k
#endif
1765
604k
}
1766
1767
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768
                         size_t len)
1769
1.50M
{
1770
1.50M
    DECLARE_IS_ENDIAN;
1771
1.50M
    u64 alen = ctx->len.u[0] << 3;
1772
1.50M
    u64 clen = ctx->len.u[1] << 3;
1773
1.50M
#ifdef GCM_FUNCREF_4BIT
1774
1.50M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775
1.50M
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776
1.50M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777
1.50M
                         const u8 *inp, size_t len) = ctx->ghash;
1778
1.50M
# endif
1779
1.50M
#endif
1780
1781
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782
1.50M
    u128 bitlen;
1783
1.50M
    unsigned int mres = ctx->mres;
1784
1785
1.50M
    if (mres) {
1786
1.37M
        unsigned blocks = (mres + 15) & -16;
1787
1788
1.37M
        memset(ctx->Xn + mres, 0, blocks - mres);
1789
1.37M
        mres = blocks;
1790
1.37M
        if (mres == sizeof(ctx->Xn)) {
1791
0
            GHASH(ctx, ctx->Xn, mres);
1792
0
            mres = 0;
1793
0
        }
1794
1.37M
    } else if (ctx->ares) {
1795
93.6k
        GCM_MUL(ctx);
1796
93.6k
    }
1797
#else
1798
    if (ctx->mres || ctx->ares)
1799
        GCM_MUL(ctx);
1800
#endif
1801
1802
1.50M
    if (IS_LITTLE_ENDIAN) {
1803
#ifdef BSWAP8
1804
        alen = BSWAP8(alen);
1805
        clen = BSWAP8(clen);
1806
#else
1807
1.50M
        u8 *p = ctx->len.c;
1808
1809
1.50M
        ctx->len.u[0] = alen;
1810
1.50M
        ctx->len.u[1] = clen;
1811
1812
1.50M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813
1.50M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814
1.50M
#endif
1815
1.50M
    }
1816
1817
1.50M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
1.50M
    bitlen.hi = alen;
1819
1.50M
    bitlen.lo = clen;
1820
1.50M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821
1.50M
    mres += sizeof(bitlen);
1822
1.50M
    GHASH(ctx, ctx->Xn, mres);
1823
#else
1824
    ctx->Xi.u[0] ^= alen;
1825
    ctx->Xi.u[1] ^= clen;
1826
    GCM_MUL(ctx);
1827
#endif
1828
1829
1.50M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830
1.50M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831
1832
1.50M
    if (tag && len <= sizeof(ctx->Xi))
1833
613k
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834
887k
    else
1835
887k
        return -1;
1836
1.50M
}
1837
1838
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839
887k
{
1840
887k
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841
887k
    memcpy(tag, ctx->Xi.c,
1842
887k
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843
887k
}
1844
1845
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846
0
{
1847
0
    GCM128_CONTEXT *ret;
1848
1849
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850
0
        CRYPTO_gcm128_init(ret, key, block);
1851
1852
0
    return ret;
1853
0
}
1854
1855
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856
0
{
1857
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858
0
}