Coverage Report

Created: 2026-04-09 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
#undef GETU32
25
#define GETU32(p) BSWAP4(*(const u32 *)(p))
26
#undef PUTU32
27
#define PUTU32(p, v) *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
31
#define REDUCE1BIT(V)                                           \
32
0
    do {                                                        \
33
0
        if (sizeof(size_t) == 8) {                              \
34
0
            u64 T = U64(0xe100000000000000) & (0 - (V.lo & 1)); \
35
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
36
0
            V.hi = (V.hi >> 1) ^ T;                             \
37
0
        } else {                                                \
38
0
            u32 T = 0xe1000000U & (0 - (u32)(V.lo & 1));        \
39
0
            V.lo = (V.hi << 63) | (V.lo >> 1);                  \
40
0
            V.hi = (V.hi >> 1) ^ ((u64)T << 32);                \
41
0
        }                                                       \
42
0
    } while (0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if TABLE_BITS == 8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
#ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
#else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
#endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
#define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u, ctx->Htable)
218
219
#elif TABLE_BITS == 4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
#if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
#endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
#if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
#else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
#endif
269
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
            }
283
        else
284
            for (j = 0; j < 16; ++j) {
285
                V = Htable[j];
286
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
287
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
288
            }
289
    }
290
#endif
291
0
}
292
293
#ifndef GHASH_ASM
294
static const size_t rem_4bit[16] = {
295
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
296
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
297
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
298
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
299
};
300
301
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
302
{
303
    u128 Z;
304
    int cnt = 15;
305
    size_t rem, nlo, nhi;
306
    DECLARE_IS_ENDIAN;
307
308
    nlo = ((const u8 *)Xi)[15];
309
    nhi = nlo >> 4;
310
    nlo &= 0xf;
311
312
    Z.hi = Htable[nlo].hi;
313
    Z.lo = Htable[nlo].lo;
314
315
    while (1) {
316
        rem = (size_t)Z.lo & 0xf;
317
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
318
        Z.hi = (Z.hi >> 4);
319
        if (sizeof(size_t) == 8)
320
            Z.hi ^= rem_4bit[rem];
321
        else
322
            Z.hi ^= (u64)rem_4bit[rem] << 32;
323
324
        Z.hi ^= Htable[nhi].hi;
325
        Z.lo ^= Htable[nhi].lo;
326
327
        if (--cnt < 0)
328
            break;
329
330
        nlo = ((const u8 *)Xi)[cnt];
331
        nhi = nlo >> 4;
332
        nlo &= 0xf;
333
334
        rem = (size_t)Z.lo & 0xf;
335
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
336
        Z.hi = (Z.hi >> 4);
337
        if (sizeof(size_t) == 8)
338
            Z.hi ^= rem_4bit[rem];
339
        else
340
            Z.hi ^= (u64)rem_4bit[rem] << 32;
341
342
        Z.hi ^= Htable[nlo].hi;
343
        Z.lo ^= Htable[nlo].lo;
344
    }
345
346
    if (IS_LITTLE_ENDIAN) {
347
#ifdef BSWAP8
348
        Xi[0] = BSWAP8(Z.hi);
349
        Xi[1] = BSWAP8(Z.lo);
350
#else
351
        u8 *p = (u8 *)Xi;
352
        u32 v;
353
        v = (u32)(Z.hi >> 32);
354
        PUTU32(p, v);
355
        v = (u32)(Z.hi);
356
        PUTU32(p + 4, v);
357
        v = (u32)(Z.lo >> 32);
358
        PUTU32(p + 8, v);
359
        v = (u32)(Z.lo);
360
        PUTU32(p + 12, v);
361
#endif
362
    } else {
363
        Xi[0] = Z.hi;
364
        Xi[1] = Z.lo;
365
    }
366
}
367
368
#if !defined(OPENSSL_SMALL_FOOTPRINT)
369
/*
370
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
371
 * details... Compiler-generated code doesn't seem to give any
372
 * performance improvement, at least not on x86[_64]. It's here
373
 * mostly as reference and a placeholder for possible future
374
 * non-trivial optimization[s]...
375
 */
376
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
377
    const u8 *inp, size_t len)
378
{
379
    u128 Z;
380
    int cnt;
381
    size_t rem, nlo, nhi;
382
    DECLARE_IS_ENDIAN;
383
384
#if 1
385
    do {
386
        cnt = 15;
387
        nlo = ((const u8 *)Xi)[15];
388
        nlo ^= inp[15];
389
        nhi = nlo >> 4;
390
        nlo &= 0xf;
391
392
        Z.hi = Htable[nlo].hi;
393
        Z.lo = Htable[nlo].lo;
394
395
        while (1) {
396
            rem = (size_t)Z.lo & 0xf;
397
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
398
            Z.hi = (Z.hi >> 4);
399
            if (sizeof(size_t) == 8)
400
                Z.hi ^= rem_4bit[rem];
401
            else
402
                Z.hi ^= (u64)rem_4bit[rem] << 32;
403
404
            Z.hi ^= Htable[nhi].hi;
405
            Z.lo ^= Htable[nhi].lo;
406
407
            if (--cnt < 0)
408
                break;
409
410
            nlo = ((const u8 *)Xi)[cnt];
411
            nlo ^= inp[cnt];
412
            nhi = nlo >> 4;
413
            nlo &= 0xf;
414
415
            rem = (size_t)Z.lo & 0xf;
416
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
417
            Z.hi = (Z.hi >> 4);
418
            if (sizeof(size_t) == 8)
419
                Z.hi ^= rem_4bit[rem];
420
            else
421
                Z.hi ^= (u64)rem_4bit[rem] << 32;
422
423
            Z.hi ^= Htable[nlo].hi;
424
            Z.lo ^= Htable[nlo].lo;
425
        }
426
#else
427
    /*
428
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
429
     * [should] give ~50% improvement... One could have PACK()-ed
430
     * the rem_8bit even here, but the priority is to minimize
431
     * cache footprint...
432
     */
433
    u128 Hshr4[16]; /* Htable shifted right by 4 bits */
434
    u8 Hshl4[16]; /* Htable shifted left by 4 bits */
435
    static const unsigned short rem_8bit[256] = {
436
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
437
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
438
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
439
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
440
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
441
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
442
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
443
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
444
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
445
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
446
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
447
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
448
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
449
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
450
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
451
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
452
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
453
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
454
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
455
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
456
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
457
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
458
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
459
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
460
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
461
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
462
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
463
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
464
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
465
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
466
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
467
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
468
    };
469
    /*
470
     * This pre-processing phase slows down procedure by approximately
471
     * same time as it makes each loop spin faster. In other words
472
     * single block performance is approximately same as straightforward
473
     * "4-bit" implementation, and then it goes only faster...
474
     */
475
    for (cnt = 0; cnt < 16; ++cnt) {
476
        Z.hi = Htable[cnt].hi;
477
        Z.lo = Htable[cnt].lo;
478
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
479
        Hshr4[cnt].hi = (Z.hi >> 4);
480
        Hshl4[cnt] = (u8)(Z.lo << 4);
481
    }
482
483
    do {
484
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
485
            nlo = ((const u8 *)Xi)[cnt];
486
            nlo ^= inp[cnt];
487
            nhi = nlo >> 4;
488
            nlo &= 0xf;
489
490
            Z.hi ^= Htable[nlo].hi;
491
            Z.lo ^= Htable[nlo].lo;
492
493
            rem = (size_t)Z.lo & 0xff;
494
495
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
496
            Z.hi = (Z.hi >> 8);
497
498
            Z.hi ^= Hshr4[nhi].hi;
499
            Z.lo ^= Hshr4[nhi].lo;
500
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
501
        }
502
503
        nlo = ((const u8 *)Xi)[0];
504
        nlo ^= inp[0];
505
        nhi = nlo >> 4;
506
        nlo &= 0xf;
507
508
        Z.hi ^= Htable[nlo].hi;
509
        Z.lo ^= Htable[nlo].lo;
510
511
        rem = (size_t)Z.lo & 0xf;
512
513
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
514
        Z.hi = (Z.hi >> 4);
515
516
        Z.hi ^= Htable[nhi].hi;
517
        Z.lo ^= Htable[nhi].lo;
518
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
519
#endif
520
521
        if (IS_LITTLE_ENDIAN) {
522
#ifdef BSWAP8
523
            Xi[0] = BSWAP8(Z.hi);
524
            Xi[1] = BSWAP8(Z.lo);
525
#else
526
            u8 *p = (u8 *)Xi;
527
            u32 v;
528
            v = (u32)(Z.hi >> 32);
529
            PUTU32(p, v);
530
            v = (u32)(Z.hi);
531
            PUTU32(p + 4, v);
532
            v = (u32)(Z.lo >> 32);
533
            PUTU32(p + 8, v);
534
            v = (u32)(Z.lo);
535
            PUTU32(p + 12, v);
536
#endif
537
        } else {
538
            Xi[0] = Z.hi;
539
            Xi[1] = Z.lo;
540
        }
541
    } while (inp += 16, len -= 16);
542
}
543
#endif
544
#else
545
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
546
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
547
    size_t len);
548
#endif
549
550
#define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
551
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
552
#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
553
/*
554
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
555
 * effect. In other words idea is to hash data while it's still in L1 cache
556
 * after encryption pass...
557
 */
558
5.73M
#define GHASH_CHUNK (3 * 1024)
559
#endif
560
561
#else /* TABLE_BITS */
562
563
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
564
{
565
    u128 V, Z = { 0, 0 };
566
    long X;
567
    int i, j;
568
    const long *xi = (const long *)Xi;
569
    DECLARE_IS_ENDIAN;
570
571
    V.hi = H[0]; /* H is in host byte order, no byte swapping */
572
    V.lo = H[1];
573
574
    for (j = 0; j < 16 / sizeof(long); ++j) {
575
        if (IS_LITTLE_ENDIAN) {
576
            if (sizeof(long) == 8) {
577
#ifdef BSWAP8
578
                X = (long)(BSWAP8(xi[j]));
579
#else
580
                const u8 *p = (const u8 *)(xi + j);
581
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
582
#endif
583
            } else {
584
                const u8 *p = (const u8 *)(xi + j);
585
                X = (long)GETU32(p);
586
            }
587
        } else
588
            X = xi[j];
589
590
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
591
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
592
            Z.hi ^= V.hi & M;
593
            Z.lo ^= V.lo & M;
594
595
            REDUCE1BIT(V);
596
        }
597
    }
598
599
    if (IS_LITTLE_ENDIAN) {
600
#ifdef BSWAP8
601
        Xi[0] = BSWAP8(Z.hi);
602
        Xi[1] = BSWAP8(Z.lo);
603
#else
604
        u8 *p = (u8 *)Xi;
605
        u32 v;
606
        v = (u32)(Z.hi >> 32);
607
        PUTU32(p, v);
608
        v = (u32)(Z.hi);
609
        PUTU32(p + 4, v);
610
        v = (u32)(Z.lo >> 32);
611
        PUTU32(p + 8, v);
612
        v = (u32)(Z.lo);
613
        PUTU32(p + 12, v);
614
#endif
615
    } else {
616
        Xi[0] = Z.hi;
617
        Xi[1] = Z.lo;
618
    }
619
}
620
621
#define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u, ctx->H.u)
622
623
#endif
624
625
#if TABLE_BITS == 4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
626
#if !defined(I386_ONLY) && (defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
627
#define GHASH_ASM_X86_OR_64
628
#define GCM_FUNCREF_4BIT
629
630
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
631
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
632
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
633
    size_t len);
634
635
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
636
#define gcm_init_avx gcm_init_clmul
637
#define gcm_gmult_avx gcm_gmult_clmul
638
#define gcm_ghash_avx gcm_ghash_clmul
639
#else
640
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
641
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
642
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
643
    size_t len);
644
#endif
645
646
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
647
#define GHASH_ASM_X86
648
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
649
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
650
    size_t len);
651
652
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
653
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
654
    size_t len);
655
#endif
656
#elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
657
#include "arm_arch.h"
658
#if __ARM_MAX_ARCH__ >= 7
659
#define GHASH_ASM_ARM
660
#define GCM_FUNCREF_4BIT
661
#define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
662
#if defined(__arm__) || defined(__arm)
663
#define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
664
#endif
665
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
666
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
667
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
668
    size_t len);
669
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
670
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
671
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
672
    size_t len);
673
#endif
674
#elif defined(__sparc__) || defined(__sparc)
675
#include "crypto/sparc_arch.h"
676
#define GHASH_ASM_SPARC
677
#define GCM_FUNCREF_4BIT
678
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
679
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
680
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
681
    size_t len);
682
#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
683
#include "crypto/ppc_arch.h"
684
#define GHASH_ASM_PPC
685
#define GCM_FUNCREF_4BIT
686
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
687
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
688
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
689
    size_t len);
690
#endif
691
#endif
692
693
#ifdef GCM_FUNCREF_4BIT
694
#undef GCM_MUL
695
785k
#define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
696
#ifdef GHASH
697
#undef GHASH
698
8.60M
#define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
699
#endif
700
#endif
701
702
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
703
1.14k
{
704
1.14k
    DECLARE_IS_ENDIAN;
705
706
1.14k
    memset(ctx, 0, sizeof(*ctx));
707
1.14k
    ctx->block = block;
708
1.14k
    ctx->key = key;
709
710
1.14k
    (*block)(ctx->H.c, ctx->H.c, key);
711
712
1.14k
    if (IS_LITTLE_ENDIAN) {
713
        /* H is stored in host byte order */
714
#ifdef BSWAP8
715
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
716
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
717
#else
718
1.14k
        u8 *p = ctx->H.c;
719
1.14k
        u64 hi, lo;
720
1.14k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
721
1.14k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
722
1.14k
        ctx->H.u[0] = hi;
723
1.14k
        ctx->H.u[1] = lo;
724
1.14k
#endif
725
1.14k
    }
726
#if TABLE_BITS == 8
727
    gcm_init_8bit(ctx->Htable, ctx->H.u);
728
#elif TABLE_BITS == 4
729
#if defined(GHASH)
730
1.14k
#define CTX__GHASH(f) (ctx->ghash = (f))
731
#else
732
#define CTX__GHASH(f) (ctx->ghash = NULL)
733
#endif
734
1.14k
#if defined(GHASH_ASM_X86_OR_64)
735
1.14k
#if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
736
1.14k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
737
1.14k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
738
1.14k
            gcm_init_avx(ctx->Htable, ctx->H.u);
739
1.14k
            ctx->gmult = gcm_gmult_avx;
740
1.14k
            CTX__GHASH(gcm_ghash_avx);
741
1.14k
        } else {
742
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
743
0
            ctx->gmult = gcm_gmult_clmul;
744
0
            CTX__GHASH(gcm_ghash_clmul);
745
0
        }
746
1.14k
        return;
747
1.14k
    }
748
0
#endif
749
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
750
#if defined(GHASH_ASM_X86) /* x86 only */
751
#if defined(OPENSSL_IA32_SSE2)
752
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
753
#else
754
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
755
#endif
756
        ctx->gmult = gcm_gmult_4bit_mmx;
757
        CTX__GHASH(gcm_ghash_4bit_mmx);
758
    } else {
759
        ctx->gmult = gcm_gmult_4bit_x86;
760
        CTX__GHASH(gcm_ghash_4bit_x86);
761
    }
762
#else
763
0
    ctx->gmult = gcm_gmult_4bit;
764
0
    CTX__GHASH(gcm_ghash_4bit);
765
0
#endif
766
#elif defined(GHASH_ASM_ARM)
767
#ifdef PMULL_CAPABLE
768
    if (PMULL_CAPABLE) {
769
        gcm_init_v8(ctx->Htable, ctx->H.u);
770
        ctx->gmult = gcm_gmult_v8;
771
        CTX__GHASH(gcm_ghash_v8);
772
    } else
773
#endif
774
#ifdef NEON_CAPABLE
775
        if (NEON_CAPABLE) {
776
        gcm_init_neon(ctx->Htable, ctx->H.u);
777
        ctx->gmult = gcm_gmult_neon;
778
        CTX__GHASH(gcm_ghash_neon);
779
    } else
780
#endif
781
    {
782
        gcm_init_4bit(ctx->Htable, ctx->H.u);
783
        ctx->gmult = gcm_gmult_4bit;
784
        CTX__GHASH(gcm_ghash_4bit);
785
    }
786
#elif defined(GHASH_ASM_SPARC)
787
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
788
        gcm_init_vis3(ctx->Htable, ctx->H.u);
789
        ctx->gmult = gcm_gmult_vis3;
790
        CTX__GHASH(gcm_ghash_vis3);
791
    } else {
792
        gcm_init_4bit(ctx->Htable, ctx->H.u);
793
        ctx->gmult = gcm_gmult_4bit;
794
        CTX__GHASH(gcm_ghash_4bit);
795
    }
796
#elif defined(GHASH_ASM_PPC)
797
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
798
        gcm_init_p8(ctx->Htable, ctx->H.u);
799
        ctx->gmult = gcm_gmult_p8;
800
        CTX__GHASH(gcm_ghash_p8);
801
    } else {
802
        gcm_init_4bit(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_4bit;
804
        CTX__GHASH(gcm_ghash_4bit);
805
    }
806
#else
807
    gcm_init_4bit(ctx->Htable, ctx->H.u);
808
#endif
809
0
#undef CTX__GHASH
810
0
#endif
811
0
}
812
813
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
814
    size_t len)
815
4.03M
{
816
4.03M
    DECLARE_IS_ENDIAN;
817
4.03M
    unsigned int ctr;
818
4.03M
#ifdef GCM_FUNCREF_4BIT
819
4.03M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
820
4.03M
#endif
821
822
4.03M
    ctx->len.u[0] = 0; /* AAD length */
823
4.03M
    ctx->len.u[1] = 0; /* message length */
824
4.03M
    ctx->ares = 0;
825
4.03M
    ctx->mres = 0;
826
827
4.03M
    if (len == 12) {
828
4.03M
        memcpy(ctx->Yi.c, iv, 12);
829
4.03M
        ctx->Yi.c[12] = 0;
830
4.03M
        ctx->Yi.c[13] = 0;
831
4.03M
        ctx->Yi.c[14] = 0;
832
4.03M
        ctx->Yi.c[15] = 1;
833
4.03M
        ctr = 1;
834
4.03M
    } else {
835
0
        size_t i;
836
0
        u64 len0 = len;
837
838
        /* Borrow ctx->Xi to calculate initial Yi */
839
0
        ctx->Xi.u[0] = 0;
840
0
        ctx->Xi.u[1] = 0;
841
842
0
        while (len >= 16) {
843
0
            for (i = 0; i < 16; ++i)
844
0
                ctx->Xi.c[i] ^= iv[i];
845
0
            GCM_MUL(ctx);
846
0
            iv += 16;
847
0
            len -= 16;
848
0
        }
849
0
        if (len) {
850
0
            for (i = 0; i < len; ++i)
851
0
                ctx->Xi.c[i] ^= iv[i];
852
0
            GCM_MUL(ctx);
853
0
        }
854
0
        len0 <<= 3;
855
0
        if (IS_LITTLE_ENDIAN) {
856
#ifdef BSWAP8
857
            ctx->Xi.u[1] ^= BSWAP8(len0);
858
#else
859
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
860
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
861
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
862
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
863
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
864
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
865
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
866
0
            ctx->Xi.c[15] ^= (u8)(len0);
867
0
#endif
868
0
        } else {
869
0
            ctx->Xi.u[1] ^= len0;
870
0
        }
871
872
0
        GCM_MUL(ctx);
873
874
0
        if (IS_LITTLE_ENDIAN)
875
#ifdef BSWAP4
876
            ctr = BSWAP4(ctx->Xi.d[3]);
877
#else
878
0
            ctr = GETU32(ctx->Xi.c + 12);
879
0
#endif
880
0
        else
881
0
            ctr = ctx->Xi.d[3];
882
883
        /* Copy borrowed Xi to Yi */
884
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
885
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
886
0
    }
887
888
4.03M
    ctx->Xi.u[0] = 0;
889
4.03M
    ctx->Xi.u[1] = 0;
890
891
4.03M
    (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
892
4.03M
    ++ctr;
893
4.03M
    if (IS_LITTLE_ENDIAN)
894
#ifdef BSWAP4
895
        ctx->Yi.d[3] = BSWAP4(ctr);
896
#else
897
4.03M
        PUTU32(ctx->Yi.c + 12, ctr);
898
0
#endif
899
0
    else
900
0
        ctx->Yi.d[3] = ctr;
901
4.03M
}
902
903
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
904
    size_t len)
905
4.55M
{
906
4.55M
    size_t i;
907
4.55M
    unsigned int n;
908
4.55M
    u64 alen = ctx->len.u[0];
909
4.55M
#ifdef GCM_FUNCREF_4BIT
910
4.55M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
911
4.55M
#ifdef GHASH
912
4.55M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
913
4.55M
        const u8 *inp, size_t len)
914
4.55M
        = ctx->ghash;
915
4.55M
#endif
916
4.55M
#endif
917
918
4.55M
    if (ctx->len.u[1])
919
0
        return -2;
920
921
4.55M
    alen += len;
922
4.55M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
923
0
        return -1;
924
4.55M
    ctx->len.u[0] = alen;
925
926
4.55M
    n = ctx->ares;
927
4.55M
    if (n) {
928
2.03M
        while (n && len) {
929
1.56M
            ctx->Xi.c[n] ^= *(aad++);
930
1.56M
            --len;
931
1.56M
            n = (n + 1) % 16;
932
1.56M
        }
933
469k
        if (n == 0)
934
38.6k
            GCM_MUL(ctx);
935
430k
        else {
936
430k
            ctx->ares = n;
937
430k
            return 0;
938
430k
        }
939
469k
    }
940
4.12M
#ifdef GHASH
941
4.12M
    if ((i = (len & (size_t)-16))) {
942
1.52M
        GHASH(ctx, aad, i);
943
1.52M
        aad += i;
944
1.52M
        len -= i;
945
1.52M
    }
946
#else
947
    while (len >= 16) {
948
        for (i = 0; i < 16; ++i)
949
            ctx->Xi.c[i] ^= aad[i];
950
        GCM_MUL(ctx);
951
        aad += 16;
952
        len -= 16;
953
    }
954
#endif
955
4.12M
    if (len) {
956
3.63M
        n = (unsigned int)len;
957
43.1M
        for (i = 0; i < len; ++i)
958
39.4M
            ctx->Xi.c[i] ^= aad[i];
959
3.63M
    }
960
961
4.12M
    ctx->ares = n;
962
4.12M
    return 0;
963
4.55M
}
964
965
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
966
    const unsigned char *in, unsigned char *out,
967
    size_t len)
968
1.64M
{
969
1.64M
    DECLARE_IS_ENDIAN;
970
1.64M
    unsigned int n, ctr, mres;
971
1.64M
    size_t i;
972
1.64M
    u64 mlen = ctx->len.u[1];
973
1.64M
    block128_f block = ctx->block;
974
1.64M
    void *key = ctx->key;
975
1.64M
#ifdef GCM_FUNCREF_4BIT
976
1.64M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
977
1.64M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
978
1.64M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
979
1.64M
        const u8 *inp, size_t len)
980
1.64M
        = ctx->ghash;
981
1.64M
#endif
982
1.64M
#endif
983
984
1.64M
    mlen += len;
985
1.64M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
1.64M
    ctx->len.u[1] = mlen;
988
989
1.64M
    mres = ctx->mres;
990
991
1.64M
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
11.5k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
11.5k
        if (len == 0) {
995
10.6k
            GCM_MUL(ctx);
996
10.6k
            ctx->ares = 0;
997
10.6k
            return 0;
998
10.6k
        }
999
878
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
878
        ctx->Xi.u[0] = 0;
1001
878
        ctx->Xi.u[1] = 0;
1002
878
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
878
        ctx->ares = 0;
1007
878
    }
1008
1009
1.63M
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
1.63M
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
1.63M
    n = mres % 16;
1019
1.63M
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
1.63M
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
1.63M
        do {
1022
1.63M
            if (n) {
1023
1.61M
#if defined(GHASH)
1024
24.0M
                while (n && len) {
1025
22.4M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
22.4M
                    --len;
1027
22.4M
                    n = (n + 1) % 16;
1028
22.4M
                }
1029
1.61M
                if (n == 0) {
1030
1.61M
                    GHASH(ctx, ctx->Xn, mres);
1031
1.61M
                    mres = 0;
1032
1.61M
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
#else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
#endif
1050
1.61M
            }
1051
1.63M
#if defined(STRICT_ALIGNMENT)
1052
1.63M
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
1.61M
                break;
1054
19.9k
#endif
1055
19.9k
#if defined(GHASH)
1056
19.9k
            if (len >= 16 && mres) {
1057
576
                GHASH(ctx, ctx->Xn, mres);
1058
576
                mres = 0;
1059
576
            }
1060
19.9k
#if defined(GHASH_CHUNK)
1061
19.9k
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
19.9k
#endif
1088
19.9k
            if ((i = (len & (size_t)-16))) {
1089
576
                size_t j = i;
1090
1091
1.15k
                while (len >= 16) {
1092
576
                    size_t_aX *out_t = (size_t_aX *)out;
1093
576
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
576
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1096
576
                    ++ctr;
1097
576
                    if (IS_LITTLE_ENDIAN)
1098
#ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#else
1101
576
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
1.72k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
1.15k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
576
                    out += 16;
1108
576
                    in += 16;
1109
576
                    len -= 16;
1110
576
                }
1111
576
                GHASH(ctx, out - j, j);
1112
576
            }
1113
#else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
#endif
1136
19.9k
            if (len) {
1137
723
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1138
723
                ++ctr;
1139
723
                if (IS_LITTLE_ENDIAN)
1140
#ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
#else
1143
723
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
#endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
723
#if defined(GHASH)
1148
3.58k
                while (len--) {
1149
2.86k
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
2.86k
                    ++n;
1151
2.86k
                }
1152
#else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
#endif
1159
723
            }
1160
1161
19.9k
            ctx->mres = mres;
1162
19.9k
            return 0;
1163
1.63M
        } while (0);
1164
1.63M
    }
1165
1.61M
#endif
1166
1.61M
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
1.61M
    ctx->mres = mres;
1195
1.61M
    return 0;
1196
1.63M
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
    const unsigned char *in, unsigned char *out,
1200
    size_t len)
1201
660k
{
1202
660k
    DECLARE_IS_ENDIAN;
1203
660k
    unsigned int n, ctr, mres;
1204
660k
    size_t i;
1205
660k
    u64 mlen = ctx->len.u[1];
1206
660k
    block128_f block = ctx->block;
1207
660k
    void *key = ctx->key;
1208
660k
#ifdef GCM_FUNCREF_4BIT
1209
660k
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
660k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
660k
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1212
660k
        const u8 *inp, size_t len)
1213
660k
        = ctx->ghash;
1214
660k
#endif
1215
660k
#endif
1216
1217
660k
    mlen += len;
1218
660k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1219
0
        return -1;
1220
660k
    ctx->len.u[1] = mlen;
1221
1222
660k
    mres = ctx->mres;
1223
1224
660k
    if (ctx->ares) {
1225
        /* First call to decrypt finalizes GHASH(AAD) */
1226
281k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227
281k
        if (len == 0) {
1228
239k
            GCM_MUL(ctx);
1229
239k
            ctx->ares = 0;
1230
239k
            return 0;
1231
239k
        }
1232
42.3k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1233
42.3k
        ctx->Xi.u[0] = 0;
1234
42.3k
        ctx->Xi.u[1] = 0;
1235
42.3k
        mres = sizeof(ctx->Xi);
1236
#else
1237
        GCM_MUL(ctx);
1238
#endif
1239
42.3k
        ctx->ares = 0;
1240
42.3k
    }
1241
1242
420k
    if (IS_LITTLE_ENDIAN)
1243
#ifdef BSWAP4
1244
        ctr = BSWAP4(ctx->Yi.d[3]);
1245
#else
1246
420k
        ctr = GETU32(ctx->Yi.c + 12);
1247
0
#endif
1248
0
    else
1249
0
        ctr = ctx->Yi.d[3];
1250
1251
420k
    n = mres % 16;
1252
420k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1253
420k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1254
420k
        do {
1255
420k
            if (n) {
1256
0
#if defined(GHASH)
1257
0
                while (n && len) {
1258
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1259
0
                    --len;
1260
0
                    n = (n + 1) % 16;
1261
0
                }
1262
0
                if (n == 0) {
1263
0
                    GHASH(ctx, ctx->Xn, mres);
1264
0
                    mres = 0;
1265
0
                } else {
1266
0
                    ctx->mres = mres;
1267
0
                    return 0;
1268
0
                }
1269
#else
1270
                while (n && len) {
1271
                    u8 c = *(in++);
1272
                    *(out++) = c ^ ctx->EKi.c[n];
1273
                    ctx->Xi.c[n] ^= c;
1274
                    --len;
1275
                    n = (n + 1) % 16;
1276
                }
1277
                if (n == 0) {
1278
                    GCM_MUL(ctx);
1279
                    mres = 0;
1280
                } else {
1281
                    ctx->mres = n;
1282
                    return 0;
1283
                }
1284
#endif
1285
0
            }
1286
420k
#if defined(STRICT_ALIGNMENT)
1287
420k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1288
1.32k
                break;
1289
419k
#endif
1290
419k
#if defined(GHASH)
1291
419k
            if (len >= 16 && mres) {
1292
14.2k
                GHASH(ctx, ctx->Xn, mres);
1293
14.2k
                mres = 0;
1294
14.2k
            }
1295
419k
#if defined(GHASH_CHUNK)
1296
425k
            while (len >= GHASH_CHUNK) {
1297
5.71k
                size_t j = GHASH_CHUNK;
1298
1299
5.71k
                GHASH(ctx, in, GHASH_CHUNK);
1300
1.10M
                while (j) {
1301
1.09M
                    size_t_aX *out_t = (size_t_aX *)out;
1302
1.09M
                    const size_t_aX *in_t = (const size_t_aX *)in;
1303
1304
1.09M
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1305
1.09M
                    ++ctr;
1306
1.09M
                    if (IS_LITTLE_ENDIAN)
1307
#ifdef BSWAP4
1308
                        ctx->Yi.d[3] = BSWAP4(ctr);
1309
#else
1310
1.09M
                        PUTU32(ctx->Yi.c + 12, ctr);
1311
0
#endif
1312
0
                    else
1313
0
                        ctx->Yi.d[3] = ctr;
1314
3.29M
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1315
2.19M
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1316
1.09M
                    out += 16;
1317
1.09M
                    in += 16;
1318
1.09M
                    j -= 16;
1319
1.09M
                }
1320
5.71k
                len -= GHASH_CHUNK;
1321
5.71k
            }
1322
419k
#endif
1323
419k
            if ((i = (len & (size_t)-16))) {
1324
14.1k
                GHASH(ctx, in, i);
1325
228k
                while (len >= 16) {
1326
214k
                    size_t_aX *out_t = (size_t_aX *)out;
1327
214k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1328
1329
214k
                    (*block)(ctx->Yi.c, ctx->EKi.c, key);
1330
214k
                    ++ctr;
1331
214k
                    if (IS_LITTLE_ENDIAN)
1332
#ifdef BSWAP4
1333
                        ctx->Yi.d[3] = BSWAP4(ctr);
1334
#else
1335
214k
                        PUTU32(ctx->Yi.c + 12, ctr);
1336
0
#endif
1337
0
                    else
1338
0
                        ctx->Yi.d[3] = ctr;
1339
642k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1340
428k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1341
214k
                    out += 16;
1342
214k
                    in += 16;
1343
214k
                    len -= 16;
1344
214k
                }
1345
14.1k
            }
1346
#else
1347
            while (len >= 16) {
1348
                size_t *out_t = (size_t *)out;
1349
                const size_t *in_t = (const size_t *)in;
1350
1351
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1352
                ++ctr;
1353
                if (IS_LITTLE_ENDIAN)
1354
#ifdef BSWAP4
1355
                    ctx->Yi.d[3] = BSWAP4(ctr);
1356
#else
1357
                    PUTU32(ctx->Yi.c + 12, ctr);
1358
#endif
1359
                else
1360
                    ctx->Yi.d[3] = ctr;
1361
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1362
                    size_t c = in_t[i];
1363
                    out_t[i] = c ^ ctx->EKi.t[i];
1364
                    ctx->Xi.t[i] ^= c;
1365
                }
1366
                GCM_MUL(ctx);
1367
                out += 16;
1368
                in += 16;
1369
                len -= 16;
1370
            }
1371
#endif
1372
419k
            if (len) {
1373
39.5k
                (*block)(ctx->Yi.c, ctx->EKi.c, key);
1374
39.5k
                ++ctr;
1375
39.5k
                if (IS_LITTLE_ENDIAN)
1376
#ifdef BSWAP4
1377
                    ctx->Yi.d[3] = BSWAP4(ctr);
1378
#else
1379
39.5k
                    PUTU32(ctx->Yi.c + 12, ctr);
1380
0
#endif
1381
0
                else
1382
0
                    ctx->Yi.d[3] = ctr;
1383
39.5k
#if defined(GHASH)
1384
359k
                while (len--) {
1385
320k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1386
320k
                    ++n;
1387
320k
                }
1388
#else
1389
                while (len--) {
1390
                    u8 c = in[n];
1391
                    ctx->Xi.c[n] ^= c;
1392
                    out[n] = c ^ ctx->EKi.c[n];
1393
                    ++n;
1394
                }
1395
                mres = n;
1396
#endif
1397
39.5k
            }
1398
1399
419k
            ctx->mres = mres;
1400
419k
            return 0;
1401
420k
        } while (0);
1402
420k
    }
1403
1.32k
#endif
1404
1.32k
    for (i = 0; i < len; ++i) {
1405
0
        u8 c;
1406
0
        if (n == 0) {
1407
0
            (*block)(ctx->Yi.c, ctx->EKi.c, key);
1408
0
            ++ctr;
1409
0
            if (IS_LITTLE_ENDIAN)
1410
#ifdef BSWAP4
1411
                ctx->Yi.d[3] = BSWAP4(ctr);
1412
#else
1413
0
                PUTU32(ctx->Yi.c + 12, ctr);
1414
0
#endif
1415
0
            else
1416
0
                ctx->Yi.d[3] = ctr;
1417
0
        }
1418
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1419
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1420
0
        n = (n + 1) % 16;
1421
0
        if (mres == sizeof(ctx->Xn)) {
1422
0
            GHASH(ctx, ctx->Xn, sizeof(ctx->Xn));
1423
0
            mres = 0;
1424
0
        }
1425
#else
1426
        c = in[i];
1427
        out[i] = c ^ ctx->EKi.c[n];
1428
        ctx->Xi.c[n] ^= c;
1429
        mres = n = (n + 1) % 16;
1430
        if (n == 0)
1431
            GCM_MUL(ctx);
1432
#endif
1433
0
    }
1434
1435
1.32k
    ctx->mres = mres;
1436
1.32k
    return 0;
1437
420k
}
1438
1439
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1440
    const unsigned char *in, unsigned char *out,
1441
    size_t len, ctr128_f stream)
1442
4.16M
{
1443
#if defined(OPENSSL_SMALL_FOOTPRINT)
1444
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1445
#else
1446
4.16M
    DECLARE_IS_ENDIAN;
1447
4.16M
    unsigned int n, ctr, mres;
1448
4.16M
    size_t i;
1449
4.16M
    u64 mlen = ctx->len.u[1];
1450
4.16M
    void *key = ctx->key;
1451
4.16M
#ifdef GCM_FUNCREF_4BIT
1452
4.16M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1453
4.16M
#ifdef GHASH
1454
4.16M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1455
4.16M
        const u8 *inp, size_t len)
1456
4.16M
        = ctx->ghash;
1457
4.16M
#endif
1458
4.16M
#endif
1459
1460
4.16M
    mlen += len;
1461
4.16M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1462
0
        return -1;
1463
4.16M
    ctx->len.u[1] = mlen;
1464
1465
4.16M
    mres = ctx->mres;
1466
1467
4.16M
    if (ctx->ares) {
1468
        /* First call to encrypt finalizes GHASH(AAD) */
1469
2.10M
#if defined(GHASH)
1470
2.10M
        if (len == 0) {
1471
0
            GCM_MUL(ctx);
1472
0
            ctx->ares = 0;
1473
0
            return 0;
1474
0
        }
1475
2.10M
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1476
2.10M
        ctx->Xi.u[0] = 0;
1477
2.10M
        ctx->Xi.u[1] = 0;
1478
2.10M
        mres = sizeof(ctx->Xi);
1479
#else
1480
        GCM_MUL(ctx);
1481
#endif
1482
2.10M
        ctx->ares = 0;
1483
2.10M
    }
1484
1485
4.16M
    if (IS_LITTLE_ENDIAN)
1486
#ifdef BSWAP4
1487
        ctr = BSWAP4(ctx->Yi.d[3]);
1488
#else
1489
4.16M
        ctr = GETU32(ctx->Yi.c + 12);
1490
0
#endif
1491
0
    else
1492
0
        ctr = ctx->Yi.d[3];
1493
1494
4.16M
    n = mres % 16;
1495
4.16M
    if (n) {
1496
375k
#if defined(GHASH)
1497
2.21M
        while (n && len) {
1498
1.83M
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1499
1.83M
            --len;
1500
1.83M
            n = (n + 1) % 16;
1501
1.83M
        }
1502
375k
        if (n == 0) {
1503
173k
            GHASH(ctx, ctx->Xn, mres);
1504
173k
            mres = 0;
1505
202k
        } else {
1506
202k
            ctx->mres = mres;
1507
202k
            return 0;
1508
202k
        }
1509
#else
1510
        while (n && len) {
1511
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1512
            --len;
1513
            n = (n + 1) % 16;
1514
        }
1515
        if (n == 0) {
1516
            GCM_MUL(ctx);
1517
            mres = 0;
1518
        } else {
1519
            ctx->mres = n;
1520
            return 0;
1521
        }
1522
#endif
1523
375k
    }
1524
3.96M
#if defined(GHASH)
1525
3.96M
    if (len >= 16 && mres) {
1526
127k
        GHASH(ctx, ctx->Xn, mres);
1527
127k
        mres = 0;
1528
127k
    }
1529
3.96M
#if defined(GHASH_CHUNK)
1530
3.96M
    while (len >= GHASH_CHUNK) {
1531
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1532
0
        ctr += GHASH_CHUNK / 16;
1533
0
        if (IS_LITTLE_ENDIAN)
1534
#ifdef BSWAP4
1535
            ctx->Yi.d[3] = BSWAP4(ctr);
1536
#else
1537
0
            PUTU32(ctx->Yi.c + 12, ctr);
1538
0
#endif
1539
0
        else
1540
0
            ctx->Yi.d[3] = ctr;
1541
0
        GHASH(ctx, out, GHASH_CHUNK);
1542
0
        out += GHASH_CHUNK;
1543
0
        in += GHASH_CHUNK;
1544
0
        len -= GHASH_CHUNK;
1545
0
    }
1546
3.96M
#endif
1547
3.96M
#endif
1548
3.96M
    if ((i = (len & (size_t)-16))) {
1549
543k
        size_t j = i / 16;
1550
1551
543k
        (*stream)(in, out, j, key, ctx->Yi.c);
1552
543k
        ctr += (unsigned int)j;
1553
543k
        if (IS_LITTLE_ENDIAN)
1554
#ifdef BSWAP4
1555
            ctx->Yi.d[3] = BSWAP4(ctr);
1556
#else
1557
543k
            PUTU32(ctx->Yi.c + 12, ctr);
1558
0
#endif
1559
0
        else
1560
0
            ctx->Yi.d[3] = ctr;
1561
543k
        in += i;
1562
543k
        len -= i;
1563
543k
#if defined(GHASH)
1564
543k
        GHASH(ctx, out, i);
1565
543k
        out += i;
1566
#else
1567
        while (j--) {
1568
            for (i = 0; i < 16; ++i)
1569
                ctx->Xi.c[i] ^= out[i];
1570
            GCM_MUL(ctx);
1571
            out += 16;
1572
        }
1573
#endif
1574
543k
    }
1575
3.96M
    if (len) {
1576
3.89M
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1577
3.89M
        ++ctr;
1578
3.89M
        if (IS_LITTLE_ENDIAN)
1579
#ifdef BSWAP4
1580
            ctx->Yi.d[3] = BSWAP4(ctr);
1581
#else
1582
3.89M
            PUTU32(ctx->Yi.c + 12, ctr);
1583
0
#endif
1584
0
        else
1585
0
            ctx->Yi.d[3] = ctr;
1586
14.7M
        while (len--) {
1587
10.8M
#if defined(GHASH)
1588
10.8M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1589
#else
1590
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1591
#endif
1592
10.8M
            ++n;
1593
10.8M
        }
1594
3.89M
    }
1595
1596
3.96M
    ctx->mres = mres;
1597
3.96M
    return 0;
1598
4.16M
#endif
1599
4.16M
}
1600
1601
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1602
    const unsigned char *in, unsigned char *out,
1603
    size_t len, ctr128_f stream)
1604
1.33M
{
1605
#if defined(OPENSSL_SMALL_FOOTPRINT)
1606
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1607
#else
1608
1.33M
    DECLARE_IS_ENDIAN;
1609
1.33M
    unsigned int n, ctr, mres;
1610
1.33M
    size_t i;
1611
1.33M
    u64 mlen = ctx->len.u[1];
1612
1.33M
    void *key = ctx->key;
1613
1.33M
#ifdef GCM_FUNCREF_4BIT
1614
1.33M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1615
1.33M
#ifdef GHASH
1616
1.33M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1617
1.33M
        const u8 *inp, size_t len)
1618
1.33M
        = ctx->ghash;
1619
1.33M
#endif
1620
1.33M
#endif
1621
1622
1.33M
    mlen += len;
1623
1.33M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1624
0
        return -1;
1625
1.33M
    ctx->len.u[1] = mlen;
1626
1627
1.33M
    mres = ctx->mres;
1628
1629
1.33M
    if (ctx->ares) {
1630
        /* First call to decrypt finalizes GHASH(AAD) */
1631
722k
#if defined(GHASH)
1632
722k
        if (len == 0) {
1633
15.4k
            GCM_MUL(ctx);
1634
15.4k
            ctx->ares = 0;
1635
15.4k
            return 0;
1636
15.4k
        }
1637
706k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1638
706k
        ctx->Xi.u[0] = 0;
1639
706k
        ctx->Xi.u[1] = 0;
1640
706k
        mres = sizeof(ctx->Xi);
1641
#else
1642
        GCM_MUL(ctx);
1643
#endif
1644
706k
        ctx->ares = 0;
1645
706k
    }
1646
1647
1.31M
    if (IS_LITTLE_ENDIAN)
1648
#ifdef BSWAP4
1649
        ctr = BSWAP4(ctx->Yi.d[3]);
1650
#else
1651
1.31M
        ctr = GETU32(ctx->Yi.c + 12);
1652
0
#endif
1653
0
    else
1654
0
        ctr = ctx->Yi.d[3];
1655
1656
1.31M
    n = mres % 16;
1657
1.31M
    if (n) {
1658
0
#if defined(GHASH)
1659
0
        while (n && len) {
1660
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1661
0
            --len;
1662
0
            n = (n + 1) % 16;
1663
0
        }
1664
0
        if (n == 0) {
1665
0
            GHASH(ctx, ctx->Xn, mres);
1666
0
            mres = 0;
1667
0
        } else {
1668
0
            ctx->mres = mres;
1669
0
            return 0;
1670
0
        }
1671
#else
1672
        while (n && len) {
1673
            u8 c = *(in++);
1674
            *(out++) = c ^ ctx->EKi.c[n];
1675
            ctx->Xi.c[n] ^= c;
1676
            --len;
1677
            n = (n + 1) % 16;
1678
        }
1679
        if (n == 0) {
1680
            GCM_MUL(ctx);
1681
            mres = 0;
1682
        } else {
1683
            ctx->mres = n;
1684
            return 0;
1685
        }
1686
#endif
1687
0
    }
1688
1.31M
#if defined(GHASH)
1689
1.31M
    if (len >= 16 && mres) {
1690
0
        GHASH(ctx, ctx->Xn, mres);
1691
0
        mres = 0;
1692
0
    }
1693
1.31M
#if defined(GHASH_CHUNK)
1694
1.31M
    while (len >= GHASH_CHUNK) {
1695
0
        GHASH(ctx, in, GHASH_CHUNK);
1696
0
        (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1697
0
        ctr += GHASH_CHUNK / 16;
1698
0
        if (IS_LITTLE_ENDIAN)
1699
#ifdef BSWAP4
1700
            ctx->Yi.d[3] = BSWAP4(ctr);
1701
#else
1702
0
            PUTU32(ctx->Yi.c + 12, ctr);
1703
0
#endif
1704
0
        else
1705
0
            ctx->Yi.d[3] = ctr;
1706
0
        out += GHASH_CHUNK;
1707
0
        in += GHASH_CHUNK;
1708
0
        len -= GHASH_CHUNK;
1709
0
    }
1710
1.31M
#endif
1711
1.31M
#endif
1712
1.31M
    if ((i = (len & (size_t)-16))) {
1713
549k
        size_t j = i / 16;
1714
1715
549k
#if defined(GHASH)
1716
549k
        GHASH(ctx, in, i);
1717
#else
1718
        while (j--) {
1719
            size_t k;
1720
            for (k = 0; k < 16; ++k)
1721
                ctx->Xi.c[k] ^= in[k];
1722
            GCM_MUL(ctx);
1723
            in += 16;
1724
        }
1725
        j = i / 16;
1726
        in -= i;
1727
#endif
1728
549k
        (*stream)(in, out, j, key, ctx->Yi.c);
1729
549k
        ctr += (unsigned int)j;
1730
549k
        if (IS_LITTLE_ENDIAN)
1731
#ifdef BSWAP4
1732
            ctx->Yi.d[3] = BSWAP4(ctr);
1733
#else
1734
549k
            PUTU32(ctx->Yi.c + 12, ctr);
1735
0
#endif
1736
0
        else
1737
0
            ctx->Yi.d[3] = ctr;
1738
549k
        out += i;
1739
549k
        in += i;
1740
549k
        len -= i;
1741
549k
    }
1742
1.31M
    if (len) {
1743
1.27M
        (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1744
1.27M
        ++ctr;
1745
1.27M
        if (IS_LITTLE_ENDIAN)
1746
#ifdef BSWAP4
1747
            ctx->Yi.d[3] = BSWAP4(ctr);
1748
#else
1749
1.27M
            PUTU32(ctx->Yi.c + 12, ctr);
1750
0
#endif
1751
0
        else
1752
0
            ctx->Yi.d[3] = ctr;
1753
6.00M
        while (len--) {
1754
4.72M
#if defined(GHASH)
1755
4.72M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1756
#else
1757
            u8 c = in[n];
1758
            ctx->Xi.c[mres++] ^= c;
1759
            out[n] = c ^ ctx->EKi.c[n];
1760
#endif
1761
4.72M
            ++n;
1762
4.72M
        }
1763
1.27M
    }
1764
1765
1.31M
    ctx->mres = mres;
1766
1.31M
    return 0;
1767
1.31M
#endif
1768
1.31M
}
1769
1770
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1771
    size_t len)
1772
4.03M
{
1773
4.03M
    DECLARE_IS_ENDIAN;
1774
4.03M
    u64 alen = ctx->len.u[0] << 3;
1775
4.03M
    u64 clen = ctx->len.u[1] << 3;
1776
4.03M
#ifdef GCM_FUNCREF_4BIT
1777
4.03M
    void (*gcm_gmult_p)(u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1778
4.03M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1779
4.03M
    void (*gcm_ghash_p)(u64 Xi[2], const u128 Htable[16],
1780
4.03M
        const u8 *inp, size_t len)
1781
4.03M
        = ctx->ghash;
1782
4.03M
#endif
1783
4.03M
#endif
1784
1785
4.03M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1786
4.03M
    u128 bitlen;
1787
4.03M
    unsigned int mres = ctx->mres;
1788
1789
4.03M
    if (mres) {
1790
3.43M
        unsigned blocks = (mres + 15) & -16;
1791
1792
3.43M
        memset(ctx->Xn + mres, 0, blocks - mres);
1793
3.43M
        mres = blocks;
1794
3.43M
        if (mres == sizeof(ctx->Xn)) {
1795
0
            GHASH(ctx, ctx->Xn, mres);
1796
0
            mres = 0;
1797
0
        }
1798
3.43M
    } else if (ctx->ares) {
1799
480k
        GCM_MUL(ctx);
1800
480k
    }
1801
#else
1802
    if (ctx->mres || ctx->ares)
1803
        GCM_MUL(ctx);
1804
#endif
1805
1806
4.03M
    if (IS_LITTLE_ENDIAN) {
1807
#ifdef BSWAP8
1808
        alen = BSWAP8(alen);
1809
        clen = BSWAP8(clen);
1810
#else
1811
4.03M
        u8 *p = ctx->len.c;
1812
1813
4.03M
        ctx->len.u[0] = alen;
1814
4.03M
        ctx->len.u[1] = clen;
1815
1816
4.03M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1817
4.03M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1818
4.03M
#endif
1819
4.03M
    }
1820
1821
4.03M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1822
4.03M
    bitlen.hi = alen;
1823
4.03M
    bitlen.lo = clen;
1824
4.03M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1825
4.03M
    mres += sizeof(bitlen);
1826
4.03M
    GHASH(ctx, ctx->Xn, mres);
1827
#else
1828
    ctx->Xi.u[0] ^= alen;
1829
    ctx->Xi.u[1] ^= clen;
1830
    GCM_MUL(ctx);
1831
#endif
1832
1833
4.03M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1834
4.03M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1835
1836
4.03M
    if (tag && len <= sizeof(ctx->Xi))
1837
1.38M
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1838
2.65M
    else
1839
2.65M
        return -1;
1840
4.03M
}
1841
1842
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1843
2.65M
{
1844
2.65M
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1845
2.65M
    memcpy(tag, ctx->Xi.c,
1846
2.65M
        len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1847
2.65M
}
1848
1849
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1850
0
{
1851
0
    GCM128_CONTEXT *ret;
1852
1853
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1854
0
        CRYPTO_gcm128_init(ret, key, block);
1855
1856
0
    return ret;
1857
0
}
1858
1859
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1860
0
{
1861
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1862
0
}