Coverage Report

Created: 2025-12-31 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
#undef GETU32
25
#define GETU32(p) BSWAP4(*(const u32 *)(p))
26
#undef PUTU32
27
#define PUTU32(p, v) *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
31
#define REDUCE1BIT(V)                                           \
32
0
    do {                                                        \
33
0
        if (sizeof(size_t) == 8) {                              \
34
0
            u64 T = U64(0xe100000000000000) & (0 - (V.lo & 1)); \
35
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
36
0
            V.hi = (V.hi >> 1) ^ T;                             \
37
0
        } else {                                                \
38
0
            u32 T = 0xe1000000U & (0 - (u32)(V.lo & 1));        \
39
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
40
0
            V.hi = (V.hi >> 1) ^ ((u64)T << 32);                \
41
0
        }                                                       \
42
0
    } while (0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if TABLE_BITS == 8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
#ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
#else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
#endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
#define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u, ctx->Htable)
218
219
#elif TABLE_BITS == 4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
#if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
#endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
#if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
#else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
#endif
269
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
            }
283
        else
284
            for (j = 0; j < 16; ++j) {
285
                V = Htable[j];
286
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
287
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
288
            }
289
    }
290
#endif
291
0
}
292
293
#ifndef GHASH_ASM
294
static const size_t rem_4bit[16] = {
295
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
296
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
297
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
298
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
299
};
300
301
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
302
{
303
    u128 Z;
304
    int cnt = 15;
305
    size_t rem, nlo, nhi;
306
    DECLARE_IS_ENDIAN;
307
308
    nlo = ((const u8 *)Xi)[15];
309
    nhi = nlo >> 4;
310
    nlo &= 0xf;
311
312
    Z.hi = Htable[nlo].hi;
313
    Z.lo = Htable[nlo].lo;
314
315
    while (1) {
316
        rem = (size_t)Z.lo & 0xf;
317
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
318
        Z.hi = (Z.hi >> 4);
319
        if (sizeof(size_t) == 8)
320
            Z.hi ^= rem_4bit[rem];
321
        else
322
            Z.hi ^= (u64)rem_4bit[rem] << 32;
323
324
        Z.hi ^= Htable[nhi].hi;
325
        Z.lo ^= Htable[nhi].lo;
326
327
        if (--cnt < 0)
328
            break;
329
330
        nlo = ((const u8 *)Xi)[cnt];
331
        nhi = nlo >> 4;
332
        nlo &= 0xf;
333
334
        rem = (size_t)Z.lo & 0xf;
335
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
336
        Z.hi = (Z.hi >> 4);
337
        if (sizeof(size_t) == 8)
338
            Z.hi ^= rem_4bit[rem];
339
        else
340
            Z.hi ^= (u64)rem_4bit[rem] << 32;
341
342
        Z.hi ^= Htable[nlo].hi;
343
        Z.lo ^= Htable[nlo].lo;
344
    }
345
346
    if (IS_LITTLE_ENDIAN) {
347
#ifdef BSWAP8
348
        Xi[0] = BSWAP8(Z.hi);
349
        Xi[1] = BSWAP8(Z.lo);
350
#else
351
        u8 *p = (u8 *)Xi;
352
        u32 v;
353
        v = (u32)(Z.hi >> 32);
354
        PUTU32(p, v);
355
        v = (u32)(Z.hi);
356
        PUTU32(p + 4, v);
357
        v = (u32)(Z.lo >> 32);
358
        PUTU32(p + 8, v);
359
        v = (u32)(Z.lo);
360
        PUTU32(p + 12, v);
361
#endif
362
    } else {
363
        Xi[0] = Z.hi;
364
        Xi[1] = Z.lo;
365
    }
366
}
367
368
#if !defined(OPENSSL_SMALL_FOOTPRINT)
369
/*
370
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
371
 * details... Compiler-generated code doesn't seem to give any
372
 * performance improvement, at least not on x86[_64]. It's here
373
 * mostly as reference and a placeholder for possible future
374
 * non-trivial optimization[s]...
375
 */
376
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
377
    const u8 *inp, size_t len)
378
{
379
    u128 Z;
380
    int cnt;
381
    size_t rem, nlo, nhi;
382
    DECLARE_IS_ENDIAN;
383
384
#if 1
385
    do {
386
        cnt = 15;
387
        nlo = ((const u8 *)Xi)[15];
388
        nlo ^= inp[15];
389
        nhi = nlo >> 4;
390
        nlo &= 0xf;
391
392
        Z.hi = Htable[nlo].hi;
393
        Z.lo = Htable[nlo].lo;
394
395
        while (1) {
396
            rem = (size_t)Z.lo & 0xf;
397
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
398
            Z.hi = (Z.hi >> 4);
399
            if (sizeof(size_t) == 8)
400
                Z.hi ^= rem_4bit[rem];
401
            else
402
                Z.hi ^= (u64)rem_4bit[rem] << 32;
403
404
            Z.hi ^= Htable[nhi].hi;
405
            Z.lo ^= Htable[nhi].lo;
406
407
            if (--cnt < 0)
408
                break;
409
410
            nlo = ((const u8 *)Xi)[cnt];
411
            nlo ^= inp[cnt];
412
            nhi = nlo >> 4;
413
            nlo &= 0xf;
414
415
            rem = (size_t)Z.lo & 0xf;
416
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
417
            Z.hi = (Z.hi >> 4);
418
            if (sizeof(size_t) == 8)
419
                Z.hi ^= rem_4bit[rem];
420
            else
421
                Z.hi ^= (u64)rem_4bit[rem] << 32;
422
423
            Z.hi ^= Htable[nlo].hi;
424
            Z.lo ^= Htable[nlo].lo;
425
        }
426
#else
427
    /*
428
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
429
     * [should] give ~50% improvement... One could have PACK()-ed
430
     * the rem_8bit even here, but the priority is to minimize
431
     * cache footprint...
432
     */
433
    u128 Hshr4[16]; /* Htable shifted right by 4 bits */
434
    u8 Hshl4[16]; /* Htable shifted left by 4 bits */
435
    static const unsigned short rem_8bit[256] = {
436
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
437
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
438
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
439
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
440
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
441
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
442
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
443
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
444
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
445
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
446
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
447
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
448
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
449
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
450
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
451
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
452
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
453
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
454
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
455
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
456
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
457
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
458
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
459
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
460
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
461
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
462
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
463
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
464
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
465
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
466
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
467
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
468
    };
469
    /*
470
     * This pre-processing phase slows down procedure by approximately
471
     * same time as it makes each loop spin faster. In other words
472
     * single block performance is approximately same as straightforward
473
     * "4-bit" implementation, and then it goes only faster...
474
     */
475
    for (cnt = 0; cnt < 16; ++cnt) {
476
        Z.hi = Htable[cnt].hi;
477
        Z.lo = Htable[cnt].lo;
478
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
479
        Hshr4[cnt].hi = (Z.hi >> 4);
480
        Hshl4[cnt] = (u8)(Z.lo << 4);
481
    }
482
483
    do {
484
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
485
            nlo = ((const u8 *)Xi)[cnt];
486
            nlo ^= inp[cnt];
487
            nhi = nlo >> 4;
488
            nlo &= 0xf;
489
490
            Z.hi ^= Htable[nlo].hi;
491
            Z.lo ^= Htable[nlo].lo;
492
493
            rem = (size_t)Z.lo & 0xff;
494
495
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
496
            Z.hi = (Z.hi >> 8);
497
498
            Z.hi ^= Hshr4[nhi].hi;
499
            Z.lo ^= Hshr4[nhi].lo;
500
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
501
        }
502
503
        nlo = ((const u8 *)Xi)[0];
504
        nlo ^= inp[0];
505
        nhi = nlo >> 4;
506
        nlo &= 0xf;
507
508
        Z.hi ^= Htable[nlo].hi;
509
        Z.lo ^= Htable[nlo].lo;
510
511
        rem = (size_t)Z.lo & 0xf;
512
513
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
514
        Z.hi = (Z.hi >> 4);
515
516
        Z.hi ^= Htable[nhi].hi;
517
        Z.lo ^= Htable[nhi].lo;
518
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
519
#endif
520
521
        if (IS_LITTLE_ENDIAN) {
522
#ifdef BSWAP8
523
            Xi[0] = BSWAP8(Z.hi);
524
            Xi[1] = BSWAP8(Z.lo);
525
#else
526
            u8 *p = (u8 *)Xi;
527
            u32 v;
528
            v = (u32)(Z.hi >> 32);
529
            PUTU32(p, v);
530
            v = (u32)(Z.hi);
531
            PUTU32(p + 4, v);
532
            v = (u32)(Z.lo >> 32);
533
            PUTU32(p + 8, v);
534
            v = (u32)(Z.lo);
535
            PUTU32(p + 12, v);
536
#endif
537
        } else {
538
            Xi[0] = Z.hi;
539
            Xi[1] = Z.lo;
540
        }
541
    } while (inp += 16, len -= 16);
542
}
543
#endif
544
#else
545
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
546
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
547
    size_t len);
548
#endif
549
550
#define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
551
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
552
#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
553
/*
554
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
555
 * effect. In other words idea is to hash data while it's still in L1 cache
556
 * after encryption pass...
557
 */
558
5.39M
#define GHASH_CHUNK (3 * 1024)
559
#endif
560
561
#else /* TABLE_BITS */
562
563
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
564
{
565
    u128 V, Z = { 0, 0 };
566
    long X;
567
    int i, j;
568
    const long *xi = (const long *)Xi;
569
    DECLARE_IS_ENDIAN;
570
571
    V.hi = H[0]; /* H is in host byte order, no byte swapping */
572
    V.lo = H[1];
573
574
    for (j = 0; j < 16 / sizeof(long); ++j) {
575
        if (IS_LITTLE_ENDIAN) {
576
            if (sizeof(long) == 8) {
577
#ifdef BSWAP8
578
                X = (long)(BSWAP8(xi[j]));
579
#else
580
                const u8 *p = (const u8 *)(xi + j);
581
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
582
#endif
583
            } else {
584
                const u8 *p = (const u8 *)(xi + j);
585
                X = (long)GETU32(p);
586
            }
587
        } else
588
            X = xi[j];
589
590
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
591
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
592
            Z.hi ^= V.hi & M;
593
            Z.lo ^= V.lo & M;
594
595
            REDUCE1BIT(V);
596
        }
597
    }
598
599
    if (IS_LITTLE_ENDIAN) {
600
#ifdef BSWAP8
601
        Xi[0] = BSWAP8(Z.hi);
602
        Xi[1] = BSWAP8(Z.lo);
603
#else
604
        u8 *p = (u8 *)Xi;
605
        u32 v;
606
        v = (u32)(Z.hi >> 32);
607
        PUTU32(p, v);
608
        v = (u32)(Z.hi);
609
        PUTU32(p + 4, v);
610
        v = (u32)(Z.lo >> 32);
611
        PUTU32(p + 8, v);
612
        v = (u32)(Z.lo);
613
        PUTU32(p + 12, v);
614
#endif
615
    } else {
616
        Xi[0] = Z.hi;
617
        Xi[1] = Z.lo;
618
    }
619
}
620
621
#define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u, ctx->H.u)
622
623
#endif
624
625
#if TABLE_BITS == 4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
626
#if !defined(I386_ONLY) && (defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
627
#define GHASH_ASM_X86_OR_64
628
#define GCM_FUNCREF_4BIT
629
630
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
631
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
632
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
633
    size_t len);
634
635
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
636
#define gcm_init_avx gcm_init_clmul
637
#define gcm_gmult_avx gcm_gmult_clmul
638
#define gcm_ghash_avx gcm_ghash_clmul
639
#else
640
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
641
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
642
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
643
    size_t len);
644
#endif
645
646
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
647
#define GHASH_ASM_X86
648
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
649
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
650
    size_t len);
651
652
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
653
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
654
    size_t len);
655
#endif
656
#elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
657
#include "arm_arch.h"
658
#if __ARM_MAX_ARCH__ >= 7
659
#define GHASH_ASM_ARM
660
#define GCM_FUNCREF_4BIT
661
#define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
662
#if defined(__arm__) || defined(__arm)
663
#define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
664
#endif
665
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
666
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
667
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
668
    size_t len);
669
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
670
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
671
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
672
    size_t len);
673
#endif
674
#elif defined(__sparc__) || defined(__sparc)
675
#include "crypto/sparc_arch.h"
676
#define GHASH_ASM_SPARC
677
#define GCM_FUNCREF_4BIT
678
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
679
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
680
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
681
    size_t len);
682
#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
683
#include "crypto/ppc_arch.h"
684
#define GHASH_ASM_PPC
685
#define GCM_FUNCREF_4BIT
686
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
687
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
688
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
689
    size_t len);
690
#endif
691
#endif
692
693
#ifdef GCM_FUNCREF_4BIT
694
#undef GCM_MUL
695
574k
#define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
696
#ifdef GHASH
697
#undef GHASH
698
7.77M
#define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
699
#endif
700
#endif
701
702
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
703
1.20k
{
704
1.20k
    DECLARE_IS_ENDIAN;
705
706
1.20k
    memset(ctx, 0, sizeof(*ctx));
707
1.20k
    ctx->block = block;
708
1.20k
    ctx->key = key;
709
710
1.20k
    (*block)(ctx->H.c, ctx->H.c, key);
711
712
1.20k
    if (IS_LITTLE_ENDIAN) {
713
        /* H is stored in host byte order */
714
#ifdef BSWAP8
715
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
716
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
717
#else
718
1.20k
        u8 *p = ctx->H.c;
719
1.20k
        u64 hi, lo;
720
1.20k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
721
1.20k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
722
1.20k
        ctx->H.u[0] = hi;
723
1.20k
        ctx->H.u[1] = lo;
724
1.20k
#endif
725
1.20k
    }
726
#if TABLE_BITS == 8
727
    gcm_init_8bit(ctx->Htable, ctx->H.u);
728
#elif TABLE_BITS == 4
729
#if defined(GHASH)
730
1.20k
#define CTX__GHASH(f) (ctx->ghash = (f))
731
#else
732
#define CTX__GHASH(f) (ctx->ghash = NULL)
733
#endif
734
1.20k
#if defined(GHASH_ASM_X86_OR_64)
735
1.20k
#if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
736
1.20k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
737
1.20k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
738
1.20k
            gcm_init_avx(ctx->Htable, ctx->H.u);
739
1.20k
            ctx->gmult = gcm_gmult_avx;
740
1.20k
            CTX__GHASH(gcm_ghash_avx);
741
1.20k
        } else {
742
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
743
0
            ctx->gmult = gcm_gmult_clmul;
744
0
            CTX__GHASH(gcm_ghash_clmul);
745
0
        }
746
1.20k
        return;
747
1.20k
    }
748
0
#endif
749
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
750
#if defined(GHASH_ASM_X86) /* x86 only */
751
#if defined(OPENSSL_IA32_SSE2)
752
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
753
#else
754
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
755
#endif
756
        ctx->gmult = gcm_gmult_4bit_mmx;
757
        CTX__GHASH(gcm_ghash_4bit_mmx);
758
    } else {
759
        ctx->gmult = gcm_gmult_4bit_x86;
760
        CTX__GHASH(gcm_ghash_4bit_x86);
761
    }
762
#else
763
0
    ctx->gmult = gcm_gmult_4bit;
764
0
    CTX__GHASH(gcm_ghash_4bit);
765
0
#endif
766
#elif defined(GHASH_ASM_ARM)
767
#ifdef PMULL_CAPABLE
768
    if (PMULL_CAPABLE) {
769
        gcm_init_v8(ctx->Htable, ctx->H.u);
770
        ctx->gmult = gcm_gmult_v8;
771
        CTX__GHASH(gcm_ghash_v8);
772
    } else
773
#endif
774
#ifdef NEON_CAPABLE
775
        if (NEON_CAPABLE) {
776
        gcm_init_neon(ctx->Htable, ctx->H.u);
777
        ctx->gmult = gcm_gmult_neon;
778
        CTX__GHASH(gcm_ghash_neon);
779
    } else
780
#endif
781
    {
782
        gcm_init_4bit(ctx->Htable, ctx->H.u);
783
        ctx->gmult = gcm_gmult_4bit;
784
        CTX__GHASH(gcm_ghash_4bit);
785
    }
786
#elif defined(GHASH_ASM_SPARC)
787
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
788
        gcm_init_vis3(ctx->Htable, ctx->H.u);
789
        ctx->gmult = gcm_gmult_vis3;
790
        CTX__GHASH(gcm_ghash_vis3);
791
    } else {
792
        gcm_init_4bit(ctx->Htable, ctx->H.u);
793
        ctx->gmult = gcm_gmult_4bit;
794
        CTX__GHASH(gcm_ghash_4bit);
795
    }
796
#elif defined(GHASH_ASM_PPC)
797
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
798
        gcm_init_p8(ctx->Htable, ctx->H.u);
799
        ctx->gmult = gcm_gmult_p8;
800
        CTX__GHASH(gcm_ghash_p8);
801
    } else {
802
        gcm_init_4bit(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_4bit;
804
        CTX__GHASH(gcm_ghash_4bit);
805
    }
806
#else
807
    gcm_init_4bit(ctx->Htable, ctx->H.u);
808
#endif
809
0
#undef CTX__GHASH
810
0
#endif
811
0
}
812
813
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
814
    size_t len)
815
3.63M
{
816
3.63M
    DECLARE_IS_ENDIAN;
817
3.63M
    unsigned int ctr;
818
3.63M
#ifdef GCM_FUNCREF_4BIT
819
3.63M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
820
3.63M
#endif
821
822
3.63M
    ctx->len.u[0] = 0; /* AAD length */
823
3.63M
    ctx->len.u[1] = 0; /* message length */
824
3.63M
    ctx->ares = 0;
825
3.63M
    ctx->mres = 0;
826
827
3.63M
    if (len == 12) {
828
3.63M
        memcpy(ctx->Yi.c, iv, 12);
829
3.63M
        ctx->Yi.c[12] = 0;
830
3.63M
        ctx->Yi.c[13] = 0;
831
3.63M
        ctx->Yi.c[14] = 0;
832
3.63M
        ctx->Yi.c[15] = 1;
833
3.63M
        ctr = 1;
834
3.63M
    } else {
835
0
        size_t i;
836
0
        u64 len0 = len;
837
838
        /* Borrow ctx->Xi to calculate initial Yi */
839
0
        ctx->Xi.u[0] = 0;
840
0
        ctx->Xi.u[1] = 0;
841
842
0
        while (len >= 16) {
843
0
            for (i = 0; i < 16; ++i)
844
0
                ctx->Xi.c[i] ^= iv[i];
845
0
            GCM_MUL(ctx);
846
0
            iv += 16;
847
0
            len -= 16;
848
0
        }
849
0
        if (len) {
850
0
            for (i = 0; i < len; ++i)
851
0
                ctx->Xi.c[i] ^= iv[i];
852
0
            GCM_MUL(ctx);
853
0
        }
854
0
        len0 <<= 3;
855
0
        if (IS_LITTLE_ENDIAN) {
856
#ifdef BSWAP8
857
            ctx->Xi.u[1] ^= BSWAP8(len0);
858
#else
859
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
860
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
861
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
862
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
863
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
864
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
865
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
866
0
            ctx->Xi.c[15] ^= (u8)(len0);
867
0
#endif
868
0
        } else {
869
0
            ctx->Xi.u[1] ^= len0;
870
0
        }
871
872
0
        GCM_MUL(ctx);
873
874
0
        if (IS_LITTLE_ENDIAN)
875
#ifdef BSWAP4
876
            ctr = BSWAP4(ctx->Xi.d[3]);
877
#else
878
0
            ctr = GETU32(ctx->Xi.c + 12);
879
0
#endif
880
0
        else
881
0
            ctr = ctx->Xi.d[3];
882
883
        /* Copy borrowed Xi to Yi */
884
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
885
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
886
0
    }
887
888
3.63M
    ctx->Xi.u[0] = 0;
889
3.63M
    ctx->Xi.u[1] = 0;
890
891
3.63M
    (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
892
3.63M
    ++ctr;
893
3.63M
    if (IS_LITTLE_ENDIAN)
894
#ifdef BSWAP4
895
        ctx->Yi.d[3] = BSWAP4(ctr);
896
#else
897
3.63M
        PUTU32(ctx->Yi.c + 12, ctr);
898
0
#endif
899
0
    else
900
0
        ctx->Yi.d[3] = ctr;
901
3.63M
}
902
903
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
904
    size_t len)
905
3.96M
{
906
3.96M
    size_t i;
907
3.96M
    unsigned int n;
908
3.96M
    u64 alen = ctx->len.u[0];
909
3.96M
#ifdef GCM_FUNCREF_4BIT
910
3.96M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
911
3.96M
#ifdef GHASH
912
3.96M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
913
3.96M
        const u8 *inp, size_t len)
914
3.96M
        = ctx->ghash;
915
3.96M
#endif
916
3.96M
#endif
917
918
3.96M
    if (ctx->len.u[1])
919
0
        return -2;
920
921
3.96M
    alen += len;
922
3.96M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
923
0
        return -1;
924
3.96M
    ctx->len.u[0] = alen;
925
926
3.96M
    n = ctx->ares;
927
3.96M
    if (n) {
928
1.27M
        while (n && len) {
929
987k
            ctx->Xi.c[n] ^= *(aad++);
930
987k
            --len;
931
987k
            n = (n + 1) % 16;
932
987k
        }
933
287k
        if (n == 0)
934
22.1k
            GCM_MUL(ctx);
935
265k
        else {
936
265k
            ctx->ares = n;
937
265k
            return 0;
938
265k
        }
939
287k
    }
940
3.69M
#ifdef GHASH
941
3.69M
    if ((i = (len & (size_t)-16))) {
942
1.28M
        GHASH(ctx, aad, i);
943
1.28M
        aad += i;
944
1.28M
        len -= i;
945
1.28M
    }
946
#else
947
    while (len >= 16) {
948
        for (i = 0; i < 16; ++i)
949
            ctx->Xi.c[i] ^= aad[i];
950
        GCM_MUL(ctx);
951
        aad += 16;
952
        len -= 16;
953
    }
954
#endif
955
3.69M
    if (len) {
956
3.25M
        n = (unsigned int)len;
957
38.8M
        for (i = 0; i < len; ++i)
958
35.5M
            ctx->Xi.c[i] ^= aad[i];
959
3.25M
    }
960
961
3.69M
    ctx->ares = n;
962
3.69M
    return 0;
963
3.96M
}
964
965
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
966
    const unsigned char *in, unsigned char *out,
967
    size_t len)
968
1.55M
{
969
1.55M
    DECLARE_IS_ENDIAN;
970
1.55M
    unsigned int n, ctr, mres;
971
1.55M
    size_t i;
972
1.55M
    u64 mlen = ctx->len.u[1];
973
1.55M
    block128_f block = ctx->block;
974
1.55M
    void *key = ctx->key;
975
1.55M
#ifdef GCM_FUNCREF_4BIT
976
1.55M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
977
1.55M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
978
1.55M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
979
1.55M
        const u8 *inp, size_t len)
980
1.55M
        = ctx->ghash;
981
1.55M
#endif
982
1.55M
#endif
983
984
1.55M
    mlen += len;
985
1.55M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
1.55M
    ctx->len.u[1] = mlen;
988
989
1.55M
    mres = ctx->mres;
990
991
1.55M
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
11.4k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
11.4k
        if (len == 0) {
995
10.7k
            GCM_MUL(ctx);
996
10.7k
            ctx->ares = 0;
997
10.7k
            return 0;
998
10.7k
        }
999
688
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
688
        ctx->Xi.u[0] = 0;
1001
688
        ctx->Xi.u[1] = 0;
1002
688
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
688
        ctx->ares = 0;
1007
688
    }
1008
1009
1.54M
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
1.54M
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
1.54M
    n = mres % 16;
1019
1.54M
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
1.54M
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
1.54M
        do {
1022
1.54M
            if (n) {
1023
1.51M
#if defined(GHASH)
1024
22.6M
                while (n && len) {
1025
21.1M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
21.1M
                    --len;
1027
21.1M
                    n = (n + 1) % 16;
1028
21.1M
                }
1029
1.51M
                if (n == 0) {
1030
1.51M
                    GHASH(ctx, ctx->Xn, mres);
1031
1.51M
                    mres = 0;
1032
1.51M
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
#else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
#endif
1050
1.51M
            }
1051
1.54M
#if defined(STRICT_ALIGNMENT)
1052
1.54M
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
1.53M
                break;
1054
13.6k
#endif
1055
13.6k
#if defined(GHASH)
1056
13.6k
            if (len >= 16 && mres) {
1057
420
                GHASH(ctx, ctx->Xn, mres);
1058
420
                mres = 0;
1059
420
            }
1060
13.6k
#if defined(GHASH_CHUNK)
1061
13.6k
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
13.6k
#endif
1088
13.6k
            if ((i = (len & (size_t)-16))) {
1089
420
                size_t j = i;
1090
1091
840
                while (len >= 16) {
1092
420
                    size_t_aX *out_t = (size_t_aX *)out;
1093
420
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
420
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1096
420
                    ++ctr;
1097
420
                    if (IS_LITTLE_ENDIAN)
1098
#ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#else
1101
420
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
1.26k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
840
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
420
                    out += 16;
1108
420
                    in += 16;
1109
420
                    len -= 16;
1110
420
                }
1111
420
                GHASH(ctx, out - j, j);
1112
420
            }
1113
#else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
#endif
1136
13.6k
            if (len) {
1137
432
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1138
432
                ++ctr;
1139
432
                if (IS_LITTLE_ENDIAN)
1140
#ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
#else
1143
432
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
#endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
432
#if defined(GHASH)
1148
2.06k
                while (len--) {
1149
1.63k
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
1.63k
                    ++n;
1151
1.63k
                }
1152
#else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
#endif
1159
432
            }
1160
1161
13.6k
            ctx->mres = mres;
1162
13.6k
            return 0;
1163
1.54M
        } while (0);
1164
1.54M
    }
1165
1.53M
#endif
1166
1.53M
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
1.53M
    ctx->mres = mres;
1195
1.53M
    return 0;
1196
1.54M
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
    const unsigned char *in, unsigned char *out,
1200
    size_t len)
1201
606k
{
1202
606k
    DECLARE_IS_ENDIAN;
1203
606k
    unsigned int n, ctr, mres;
1204
606k
    size_t i;
1205
606k
    u64 mlen = ctx->len.u[1];
1206
606k
    block128_f block = ctx->block;
1207
606k
    void *key = ctx->key;
1208
606k
#ifdef GCM_FUNCREF_4BIT
1209
606k
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
606k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
606k
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1212
606k
        const u8 *inp, size_t len)
1213
606k
        = ctx->ghash;
1214
606k
#endif
1215
606k
#endif
1216
1217
606k
    mlen += len;
1218
606k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1219
0
        return -1;
1220
606k
    ctx->len.u[1] = mlen;
1221
1222
606k
    mres = ctx->mres;
1223
1224
606k
    if (ctx->ares) {
1225
        /* First call to decrypt finalizes GHASH(AAD) */
1226
245k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227
245k
        if (len == 0) {
1228
215k
            GCM_MUL(ctx);
1229
215k
            ctx->ares = 0;
1230
215k
            return 0;
1231
215k
        }
1232
29.9k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1233
29.9k
        ctx->Xi.u[0] = 0;
1234
29.9k
        ctx->Xi.u[1] = 0;
1235
29.9k
        mres = sizeof(ctx->Xi);
1236
#else
1237
        GCM_MUL(ctx);
1238
#endif
1239
29.9k
        ctx->ares = 0;
1240
29.9k
    }
1241
1242
391k
    if (IS_LITTLE_ENDIAN)
1243
#ifdef BSWAP4
1244
        ctr = BSWAP4(ctx->Yi.d[3]);
1245
#else
1246
391k
        ctr = GETU32(ctx->Yi.c + 12);
1247
0
#endif
1248
0
    else
1249
0
        ctr = ctx->Yi.d[3];
1250
1251
391k
    n = mres % 16;
1252
391k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1253
391k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1254
391k
        do {
1255
391k
            if (n) {
1256
0
#if defined(GHASH)
1257
0
                while (n && len) {
1258
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1259
0
                    --len;
1260
0
                    n = (n + 1) % 16;
1261
0
                }
1262
0
                if (n == 0) {
1263
0
                    GHASH(ctx, ctx->Xn, mres);
1264
0
                    mres = 0;
1265
0
                } else {
1266
0
                    ctx->mres = mres;
1267
0
                    return 0;
1268
0
                }
1269
#else
1270
                while (n && len) {
1271
                    u8 c = *(in++);
1272
                    *(out++) = c ^ ctx->EKi.c[n];
1273
                    ctx->Xi.c[n] ^= c;
1274
                    --len;
1275
                    n = (n + 1) % 16;
1276
                }
1277
                if (n == 0) {
1278
                    GCM_MUL(ctx);
1279
                    mres = 0;
1280
                } else {
1281
                    ctx->mres = n;
1282
                    return 0;
1283
                }
1284
#endif
1285
0
            }
1286
391k
#if defined(STRICT_ALIGNMENT)
1287
391k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1288
1.12k
                break;
1289
390k
#endif
1290
390k
#if defined(GHASH)
1291
390k
            if (len >= 16 && mres) {
1292
9.13k
                GHASH(ctx, ctx->Xn, mres);
1293
9.13k
                mres = 0;
1294
9.13k
            }
1295
390k
#if defined(GHASH_CHUNK)
1296
394k
            while (len >= GHASH_CHUNK) {
1297
4.27k
                size_t j = GHASH_CHUNK;
1298
1299
4.27k
                GHASH(ctx, in, GHASH_CHUNK);
1300
824k
                while (j) {
1301
820k
                    size_t_aX *out_t = (size_t_aX *)out;
1302
820k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1303
1304
820k
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1305
820k
                    ++ctr;
1306
820k
                    if (IS_LITTLE_ENDIAN)
1307
#ifdef BSWAP4
1308
                        ctx->Yi.d[3] = BSWAP4(ctr);
1309
#else
1310
820k
                        PUTU32(ctx->Yi.c + 12, ctr);
1311
0
#endif
1312
0
                    else
1313
0
                        ctx->Yi.d[3] = ctr;
1314
2.46M
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1315
1.64M
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1316
820k
                    out += 16;
1317
820k
                    in += 16;
1318
820k
                    j -= 16;
1319
820k
                }
1320
4.27k
                len -= GHASH_CHUNK;
1321
4.27k
            }
1322
390k
#endif
1323
390k
            if ((i = (len & (size_t)-16))) {
1324
9.08k
                GHASH(ctx, in, i);
1325
166k
                while (len >= 16) {
1326
156k
                    size_t_aX *out_t = (size_t_aX *)out;
1327
156k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1328
1329
156k
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1330
156k
                    ++ctr;
1331
156k
                    if (IS_LITTLE_ENDIAN)
1332
#ifdef BSWAP4
1333
                        ctx->Yi.d[3] = BSWAP4(ctr);
1334
#else
1335
156k
                        PUTU32(ctx->Yi.c + 12, ctr);
1336
0
#endif
1337
0
                    else
1338
0
                        ctx->Yi.d[3] = ctr;
1339
470k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1340
313k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1341
156k
                    out += 16;
1342
156k
                    in += 16;
1343
156k
                    len -= 16;
1344
156k
                }
1345
9.08k
            }
1346
#else
1347
            while (len >= 16) {
1348
                size_t *out_t = (size_t *)out;
1349
                const size_t *in_t = (const size_t *)in;
1350
1351
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1352
                ++ctr;
1353
                if (IS_LITTLE_ENDIAN)
1354
#ifdef BSWAP4
1355
                    ctx->Yi.d[3] = BSWAP4(ctr);
1356
#else
1357
                    PUTU32(ctx->Yi.c + 12, ctr);
1358
#endif
1359
                else
1360
                    ctx->Yi.d[3] = ctr;
1361
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1362
                    size_t c = in_t[i];
1363
                    out_t[i] = c ^ ctx->EKi.t[i];
1364
                    ctx->Xi.t[i] ^= c;
1365
                }
1366
                GCM_MUL(ctx);
1367
                out += 16;
1368
                in += 16;
1369
                len -= 16;
1370
            }
1371
#endif
1372
390k
            if (len) {
1373
27.3k
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1374
27.3k
                ++ctr;
1375
27.3k
                if (IS_LITTLE_ENDIAN)
1376
#ifdef BSWAP4
1377
                    ctx->Yi.d[3] = BSWAP4(ctr);
1378
#else
1379
27.3k
                    PUTU32(ctx->Yi.c + 12, ctr);
1380
0
#endif
1381
0
                else
1382
0
                    ctx->Yi.d[3] = ctr;
1383
27.3k
#if defined(GHASH)
1384
232k
                while (len--) {
1385
204k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1386
204k
                    ++n;
1387
204k
                }
1388
#else
1389
                while (len--) {
1390
                    u8 c = in[n];
1391
                    ctx->Xi.c[n] ^= c;
1392
                    out[n] = c ^ ctx->EKi.c[n];
1393
                    ++n;
1394
                }
1395
                mres = n;
1396
#endif
1397
27.3k
            }
1398
1399
390k
            ctx->mres = mres;
1400
390k
            return 0;
1401
391k
        } while (0);
1402
391k
    }
1403
1.12k
#endif
1404
1.12k
    for (i = 0; i < len; ++i) {
1405
0
        u8 c;
1406
0
        if (n == 0) {
1407
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
1408
0
            ++ctr;
1409
0
            if (IS_LITTLE_ENDIAN)
1410
#ifdef BSWAP4
1411
                ctx->Yi.d[3] = BSWAP4(ctr);
1412
#else
1413
0
                PUTU32(ctx->Yi.c + 12, ctr);
1414
0
#endif
1415
0
            else
1416
0
                ctx->Yi.d[3] = ctr;
1417
0
        }
1418
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1419
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1420
0
        n = (n + 1) % 16;
1421
0
        if (mres == sizeof(ctx->Xn)) {
1422
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
1423
0
            mres = 0;
1424
0
        }
1425
#else
1426
        c = in[i];
1427
        out[i] = c ^ ctx->EKi.c[n];
1428
        ctx->Xi.c[n] ^= c;
1429
        mres = n = (n + 1) % 16;
1430
        if (n == 0)
1431
            GCM_MUL(ctx);
1432
#endif
1433
0
    }
1434
1435
1.12k
    ctx->mres = mres;
1436
1.12k
    return 0;
1437
391k
}
1438
1439
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1440
    const unsigned char *in, unsigned char *out,
1441
    size_t len, ctr128_f stream)
1442
3.91M
{
1443
#if defined(OPENSSL_SMALL_FOOTPRINT)
1444
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1445
#else
1446
3.91M
    DECLARE_IS_ENDIAN;
1447
3.91M
    unsigned int n, ctr, mres;
1448
3.91M
    size_t i;
1449
3.91M
    u64 mlen = ctx->len.u[1];
1450
3.91M
    void *key = ctx->key;
1451
3.91M
#ifdef GCM_FUNCREF_4BIT
1452
3.91M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1453
3.91M
#ifdef GHASH
1454
3.91M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1455
3.91M
        const u8 *inp, size_t len)
1456
3.91M
        = ctx->ghash;
1457
3.91M
#endif
1458
3.91M
#endif
1459
1460
3.91M
    mlen += len;
1461
3.91M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1462
0
        return -1;
1463
3.91M
    ctx->len.u[1] = mlen;
1464
1465
3.91M
    mres = ctx->mres;
1466
1467
3.91M
    if (ctx->ares) {
1468
        /* First call to encrypt finalizes GHASH(AAD) */
1469
1.96M
#if defined(GHASH)
1470
1.96M
        if (len == 0) {
1471
0
            GCM_MUL(ctx);
1472
0
            ctx->ares = 0;
1473
0
            return 0;
1474
0
        }
1475
1.96M
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1476
1.96M
        ctx->Xi.u[0] = 0;
1477
1.96M
        ctx->Xi.u[1] = 0;
1478
1.96M
        mres = sizeof(ctx->Xi);
1479
#else
1480
        GCM_MUL(ctx);
1481
#endif
1482
1.96M
        ctx->ares = 0;
1483
1.96M
    }
1484
1485
3.91M
    if (IS_LITTLE_ENDIAN)
1486
#ifdef BSWAP4
1487
        ctr = BSWAP4(ctx->Yi.d[3]);
1488
#else
1489
3.91M
        ctr = GETU32(ctx->Yi.c + 12);
1490
0
#endif
1491
0
    else
1492
0
        ctr = ctx->Yi.d[3];
1493
1494
3.91M
    n = mres % 16;
1495
3.91M
    if (n) {
1496
362k
#if defined(GHASH)
1497
2.15M
        while (n && len) {
1498
1.78M
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1499
1.78M
            --len;
1500
1.78M
            n = (n + 1) % 16;
1501
1.78M
        }
1502
362k
        if (n == 0) {
1503
168k
            GHASH(ctx, ctx->Xn, mres);
1504
168k
            mres = 0;
1505
194k
        } else {
1506
194k
            ctx->mres = mres;
1507
194k
            return 0;
1508
194k
        }
1509
#else
1510
        while (n && len) {
1511
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1512
            --len;
1513
            n = (n + 1) % 16;
1514
        }
1515
        if (n == 0) {
1516
            GCM_MUL(ctx);
1517
            mres = 0;
1518
        } else {
1519
            ctx->mres = n;
1520
            return 0;
1521
        }
1522
#endif
1523
362k
    }
1524
3.72M
#if defined(GHASH)
1525
3.72M
    if (len >= 16 && mres) {
1526
117k
        GHASH(ctx, ctx->Xn, mres);
1527
117k
        mres = 0;
1528
117k
    }
1529
3.72M
#if defined(GHASH_CHUNK)
1530
3.72M
    while (len >= GHASH_CHUNK) {
1531
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1532
0
        ctr += GHASH_CHUNK / 16;
1533
0
        if (IS_LITTLE_ENDIAN)
1534
#ifdef BSWAP4
1535
            ctx->Yi.d[3] = BSWAP4(ctr);
1536
#else
1537
0
            PUTU32(ctx->Yi.c + 12, ctr);
1538
0
#endif
1539
0
        else
1540
0
            ctx->Yi.d[3] = ctr;
1541
0
        GHASH(ctx, out, GHASH_CHUNK);
1542
0
        out += GHASH_CHUNK;
1543
0
        in += GHASH_CHUNK;
1544
0
        len -= GHASH_CHUNK;
1545
0
    }
1546
3.72M
#endif
1547
3.72M
#endif
1548
3.72M
    if ((i = (len & (size_t)-16))) {
1549
516k
        size_t j = i / 16;
1550
1551
516k
        (*stream)(in, out, j, key, ctx->Yi.c);
1552
516k
        ctr += (unsigned int)j;
1553
516k
        if (IS_LITTLE_ENDIAN)
1554
#ifdef BSWAP4
1555
            ctx->Yi.d[3] = BSWAP4(ctr);
1556
#else
1557
516k
            PUTU32(ctx->Yi.c + 12, ctr);
1558
0
#endif
1559
0
        else
1560
0
            ctx->Yi.d[3] = ctr;
1561
516k
        in += i;
1562
516k
        len -= i;
1563
516k
#if defined(GHASH)
1564
516k
        GHASH(ctx, out, i);
1565
516k
        out += i;
1566
#else
1567
        while (j--) {
1568
            for (i = 0; i < 16; ++i)
1569
                ctx->Xi.c[i] ^= out[i];
1570
            GCM_MUL(ctx);
1571
            out += 16;
1572
        }
1573
#endif
1574
516k
    }
1575
3.72M
    if (len) {
1576
3.66M
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1577
3.66M
        ++ctr;
1578
3.66M
        if (IS_LITTLE_ENDIAN)
1579
#ifdef BSWAP4
1580
            ctx->Yi.d[3] = BSWAP4(ctr);
1581
#else
1582
3.66M
            PUTU32(ctx->Yi.c + 12, ctr);
1583
0
#endif
1584
0
        else
1585
0
            ctx->Yi.d[3] = ctr;
1586
13.9M
        while (len--) {
1587
10.3M
#if defined(GHASH)
1588
10.3M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1589
#else
1590
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1591
#endif
1592
10.3M
            ++n;
1593
10.3M
        }
1594
3.66M
    }
1595
1596
3.72M
    ctx->mres = mres;
1597
3.72M
    return 0;
1598
3.91M
#endif
1599
3.91M
}
1600
1601
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1602
    const unsigned char *in, unsigned char *out,
1603
    size_t len, ctr128_f stream)
1604
1.27M
{
1605
#if defined(OPENSSL_SMALL_FOOTPRINT)
1606
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1607
#else
1608
1.27M
    DECLARE_IS_ENDIAN;
1609
1.27M
    unsigned int n, ctr, mres;
1610
1.27M
    size_t i;
1611
1.27M
    u64 mlen = ctx->len.u[1];
1612
1.27M
    void *key = ctx->key;
1613
1.27M
#ifdef GCM_FUNCREF_4BIT
1614
1.27M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1615
1.27M
#ifdef GHASH
1616
1.27M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1617
1.27M
        const u8 *inp, size_t len)
1618
1.27M
        = ctx->ghash;
1619
1.27M
#endif
1620
1.27M
#endif
1621
1622
1.27M
    mlen += len;
1623
1.27M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1624
0
        return -1;
1625
1.27M
    ctx->len.u[1] = mlen;
1626
1627
1.27M
    mres = ctx->mres;
1628
1629
1.27M
    if (ctx->ares) {
1630
        /* First call to decrypt finalizes GHASH(AAD) */
1631
696k
#if defined(GHASH)
1632
696k
        if (len == 0) {
1633
15.1k
            GCM_MUL(ctx);
1634
15.1k
            ctx->ares = 0;
1635
15.1k
            return 0;
1636
15.1k
        }
1637
681k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1638
681k
        ctx->Xi.u[0] = 0;
1639
681k
        ctx->Xi.u[1] = 0;
1640
681k
        mres = sizeof(ctx->Xi);
1641
#else
1642
        GCM_MUL(ctx);
1643
#endif
1644
681k
        ctx->ares = 0;
1645
681k
    }
1646
1647
1.25M
    if (IS_LITTLE_ENDIAN)
1648
#ifdef BSWAP4
1649
        ctr = BSWAP4(ctx->Yi.d[3]);
1650
#else
1651
1.25M
        ctr = GETU32(ctx->Yi.c + 12);
1652
0
#endif
1653
0
    else
1654
0
        ctr = ctx->Yi.d[3];
1655
1656
1.25M
    n = mres % 16;
1657
1.25M
    if (n) {
1658
0
#if defined(GHASH)
1659
0
        while (n && len) {
1660
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1661
0
            --len;
1662
0
            n = (n + 1) % 16;
1663
0
        }
1664
0
        if (n == 0) {
1665
0
            GHASH(ctx, ctx->Xn, mres);
1666
0
            mres = 0;
1667
0
        } else {
1668
0
            ctx->mres = mres;
1669
0
            return 0;
1670
0
        }
1671
#else
1672
        while (n && len) {
1673
            u8 c = *(in++);
1674
            *(out++) = c ^ ctx->EKi.c[n];
1675
            ctx->Xi.c[n] ^= c;
1676
            --len;
1677
            n = (n + 1) % 16;
1678
        }
1679
        if (n == 0) {
1680
            GCM_MUL(ctx);
1681
            mres = 0;
1682
        } else {
1683
            ctx->mres = n;
1684
            return 0;
1685
        }
1686
#endif
1687
0
    }
1688
1.25M
#if defined(GHASH)
1689
1.25M
    if (len >= 16 && mres) {
1690
0
        GHASH(ctx, ctx->Xn, mres);
1691
0
        mres = 0;
1692
0
    }
1693
1.25M
#if defined(GHASH_CHUNK)
1694
1.25M
    while (len >= GHASH_CHUNK) {
1695
0
        GHASH(ctx, in, GHASH_CHUNK);
1696
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1697
0
        ctr += GHASH_CHUNK / 16;
1698
0
        if (IS_LITTLE_ENDIAN)
1699
#ifdef BSWAP4
1700
            ctx->Yi.d[3] = BSWAP4(ctr);
1701
#else
1702
0
            PUTU32(ctx->Yi.c + 12, ctr);
1703
0
#endif
1704
0
        else
1705
0
            ctx->Yi.d[3] = ctr;
1706
0
        out += GHASH_CHUNK;
1707
0
        in += GHASH_CHUNK;
1708
0
        len -= GHASH_CHUNK;
1709
0
    }
1710
1.25M
#endif
1711
1.25M
#endif
1712
1.25M
    if ((i = (len & (size_t)-16))) {
1713
516k
        size_t j = i / 16;
1714
1715
516k
#if defined(GHASH)
1716
516k
        GHASH(ctx, in, i);
1717
#else
1718
        while (j--) {
1719
            size_t k;
1720
            for (k = 0; k < 16; ++k)
1721
                ctx->Xi.c[k] ^= in[k];
1722
            GCM_MUL(ctx);
1723
            in += 16;
1724
        }
1725
        j = i / 16;
1726
        in -= i;
1727
#endif
1728
516k
        (*stream)(in, out, j, key, ctx->Yi.c);
1729
516k
        ctr += (unsigned int)j;
1730
516k
        if (IS_LITTLE_ENDIAN)
1731
#ifdef BSWAP4
1732
            ctx->Yi.d[3] = BSWAP4(ctr);
1733
#else
1734
516k
            PUTU32(ctx->Yi.c + 12, ctr);
1735
0
#endif
1736
0
        else
1737
0
            ctx->Yi.d[3] = ctr;
1738
516k
        out += i;
1739
516k
        in += i;
1740
516k
        len -= i;
1741
516k
    }
1742
1.25M
    if (len) {
1743
1.23M
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1744
1.23M
        ++ctr;
1745
1.23M
        if (IS_LITTLE_ENDIAN)
1746
#ifdef BSWAP4
1747
            ctx->Yi.d[3] = BSWAP4(ctr);
1748
#else
1749
1.23M
            PUTU32(ctx->Yi.c + 12, ctr);
1750
0
#endif
1751
0
        else
1752
0
            ctx->Yi.d[3] = ctr;
1753
5.79M
        while (len--) {
1754
4.56M
#if defined(GHASH)
1755
4.56M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1756
#else
1757
            u8 c = in[n];
1758
            ctx->Xi.c[mres++] ^= c;
1759
            out[n] = c ^ ctx->EKi.c[n];
1760
#endif
1761
4.56M
            ++n;
1762
4.56M
        }
1763
1.23M
    }
1764
1765
1.25M
    ctx->mres = mres;
1766
1.25M
    return 0;
1767
1.25M
#endif
1768
1.25M
}
1769
1770
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1771
    size_t len)
1772
3.63M
{
1773
3.63M
    DECLARE_IS_ENDIAN;
1774
3.63M
    u64 alen = ctx->len.u[0] << 3;
1775
3.63M
    u64 clen = ctx->len.u[1] << 3;
1776
3.63M
#ifdef GCM_FUNCREF_4BIT
1777
3.63M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1778
3.63M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1779
3.63M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1780
3.63M
        const u8 *inp, size_t len)
1781
3.63M
        = ctx->ghash;
1782
3.63M
#endif
1783
3.63M
#endif
1784
1785
3.63M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1786
3.63M
    u128 bitlen;
1787
3.63M
    unsigned int mres = ctx->mres;
1788
1789
3.63M
    if (mres) {
1790
3.24M
        unsigned blocks = (mres + 15) & -16;
1791
1792
3.24M
        memset(ctx->Xn + mres, 0, blocks - mres);
1793
3.24M
        mres = blocks;
1794
3.24M
        if (mres == sizeof(ctx->Xn)) {
1795
0
            GHASH(ctx, ctx->Xn, mres);
1796
0
            mres = 0;
1797
0
        }
1798
3.24M
    } else if (ctx->ares) {
1799
311k
        GCM_MUL(ctx);
1800
311k
    }
1801
#else
1802
    if (ctx->mres || ctx->ares)
1803
        GCM_MUL(ctx);
1804
#endif
1805
1806
3.63M
    if (IS_LITTLE_ENDIAN) {
1807
#ifdef BSWAP8
1808
        alen = BSWAP8(alen);
1809
        clen = BSWAP8(clen);
1810
#else
1811
3.63M
        u8 *p = ctx->len.c;
1812
1813
3.63M
        ctx->len.u[0] = alen;
1814
3.63M
        ctx->len.u[1] = clen;
1815
1816
3.63M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1817
3.63M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1818
3.63M
#endif
1819
3.63M
    }
1820
1821
3.63M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1822
3.63M
    bitlen.hi = alen;
1823
3.63M
    bitlen.lo = clen;
1824
3.63M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1825
3.63M
    mres += sizeof(bitlen);
1826
3.63M
    GHASH(ctx, ctx->Xn, mres);
1827
#else
1828
    ctx->Xi.u[0] ^= alen;
1829
    ctx->Xi.u[1] ^= clen;
1830
    GCM_MUL(ctx);
1831
#endif
1832
1833
3.63M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1834
3.63M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1835
1836
3.63M
    if (tag && len <= sizeof(ctx->Xi))
1837
1.30M
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1838
2.32M
    else
1839
2.32M
        return -1;
1840
3.63M
}
1841
1842
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1843
2.32M
{
1844
2.32M
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1845
2.32M
    memcpy(tag, ctx->Xi.c,
1846
2.32M
        len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1847
2.32M
}
1848
1849
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1850
0
{
1851
0
    GCM128_CONTEXT *ret;
1852
1853
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1854
0
        CRYPTO_gcm128_init(ret, key, block);
1855
1856
0
    return ret;
1857
0
}
1858
1859
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1860
0
{
1861
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1862
0
}