Coverage Report

Created: 2023-09-25 06:45

/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31
0
#define REDUCE1BIT(V)   do { \
32
0
        if (sizeof(size_t)==8) { \
33
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
35
0
                V.hi  = (V.hi>>1 )^T; \
36
0
        } \
37
0
        else { \
38
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41
0
        } \
42
0
} while(0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if     TABLE_BITS==8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
# ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
# else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
# endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219
#elif   TABLE_BITS==4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
# if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
# endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
# if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
# else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
# endif
269
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
        } else
283
            for (j = 0; j < 16; ++j) {
284
                V = Htable[j];
285
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287
            }
288
    }
289
# endif
290
0
}
291
292
# ifndef GHASH_ASM
293
static const size_t rem_4bit[16] = {
294
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298
};
299
300
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301
{
302
    u128 Z;
303
    int cnt = 15;
304
    size_t rem, nlo, nhi;
305
    DECLARE_IS_ENDIAN;
306
307
    nlo = ((const u8 *)Xi)[15];
308
    nhi = nlo >> 4;
309
    nlo &= 0xf;
310
311
    Z.hi = Htable[nlo].hi;
312
    Z.lo = Htable[nlo].lo;
313
314
    while (1) {
315
        rem = (size_t)Z.lo & 0xf;
316
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317
        Z.hi = (Z.hi >> 4);
318
        if (sizeof(size_t) == 8)
319
            Z.hi ^= rem_4bit[rem];
320
        else
321
            Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323
        Z.hi ^= Htable[nhi].hi;
324
        Z.lo ^= Htable[nhi].lo;
325
326
        if (--cnt < 0)
327
            break;
328
329
        nlo = ((const u8 *)Xi)[cnt];
330
        nhi = nlo >> 4;
331
        nlo &= 0xf;
332
333
        rem = (size_t)Z.lo & 0xf;
334
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335
        Z.hi = (Z.hi >> 4);
336
        if (sizeof(size_t) == 8)
337
            Z.hi ^= rem_4bit[rem];
338
        else
339
            Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341
        Z.hi ^= Htable[nlo].hi;
342
        Z.lo ^= Htable[nlo].lo;
343
    }
344
345
    if (IS_LITTLE_ENDIAN) {
346
#  ifdef BSWAP8
347
        Xi[0] = BSWAP8(Z.hi);
348
        Xi[1] = BSWAP8(Z.lo);
349
#  else
350
        u8 *p = (u8 *)Xi;
351
        u32 v;
352
        v = (u32)(Z.hi >> 32);
353
        PUTU32(p, v);
354
        v = (u32)(Z.hi);
355
        PUTU32(p + 4, v);
356
        v = (u32)(Z.lo >> 32);
357
        PUTU32(p + 8, v);
358
        v = (u32)(Z.lo);
359
        PUTU32(p + 12, v);
360
#  endif
361
    } else {
362
        Xi[0] = Z.hi;
363
        Xi[1] = Z.lo;
364
    }
365
}
366
367
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368
/*
369
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370
 * details... Compiler-generated code doesn't seem to give any
371
 * performance improvement, at least not on x86[_64]. It's here
372
 * mostly as reference and a placeholder for possible future
373
 * non-trivial optimization[s]...
374
 */
375
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376
                           const u8 *inp, size_t len)
377
{
378
    u128 Z;
379
    int cnt;
380
    size_t rem, nlo, nhi;
381
    DECLARE_IS_ENDIAN;
382
383
#   if 1
384
    do {
385
        cnt = 15;
386
        nlo = ((const u8 *)Xi)[15];
387
        nlo ^= inp[15];
388
        nhi = nlo >> 4;
389
        nlo &= 0xf;
390
391
        Z.hi = Htable[nlo].hi;
392
        Z.lo = Htable[nlo].lo;
393
394
        while (1) {
395
            rem = (size_t)Z.lo & 0xf;
396
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397
            Z.hi = (Z.hi >> 4);
398
            if (sizeof(size_t) == 8)
399
                Z.hi ^= rem_4bit[rem];
400
            else
401
                Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403
            Z.hi ^= Htable[nhi].hi;
404
            Z.lo ^= Htable[nhi].lo;
405
406
            if (--cnt < 0)
407
                break;
408
409
            nlo = ((const u8 *)Xi)[cnt];
410
            nlo ^= inp[cnt];
411
            nhi = nlo >> 4;
412
            nlo &= 0xf;
413
414
            rem = (size_t)Z.lo & 0xf;
415
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416
            Z.hi = (Z.hi >> 4);
417
            if (sizeof(size_t) == 8)
418
                Z.hi ^= rem_4bit[rem];
419
            else
420
                Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422
            Z.hi ^= Htable[nlo].hi;
423
            Z.lo ^= Htable[nlo].lo;
424
        }
425
#   else
426
    /*
427
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428
     * [should] give ~50% improvement... One could have PACK()-ed
429
     * the rem_8bit even here, but the priority is to minimize
430
     * cache footprint...
431
     */
432
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434
    static const unsigned short rem_8bit[256] = {
435
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467
    };
468
    /*
469
     * This pre-processing phase slows down procedure by approximately
470
     * same time as it makes each loop spin faster. In other words
471
     * single block performance is approximately same as straightforward
472
     * "4-bit" implementation, and then it goes only faster...
473
     */
474
    for (cnt = 0; cnt < 16; ++cnt) {
475
        Z.hi = Htable[cnt].hi;
476
        Z.lo = Htable[cnt].lo;
477
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478
        Hshr4[cnt].hi = (Z.hi >> 4);
479
        Hshl4[cnt] = (u8)(Z.lo << 4);
480
    }
481
482
    do {
483
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484
            nlo = ((const u8 *)Xi)[cnt];
485
            nlo ^= inp[cnt];
486
            nhi = nlo >> 4;
487
            nlo &= 0xf;
488
489
            Z.hi ^= Htable[nlo].hi;
490
            Z.lo ^= Htable[nlo].lo;
491
492
            rem = (size_t)Z.lo & 0xff;
493
494
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495
            Z.hi = (Z.hi >> 8);
496
497
            Z.hi ^= Hshr4[nhi].hi;
498
            Z.lo ^= Hshr4[nhi].lo;
499
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500
        }
501
502
        nlo = ((const u8 *)Xi)[0];
503
        nlo ^= inp[0];
504
        nhi = nlo >> 4;
505
        nlo &= 0xf;
506
507
        Z.hi ^= Htable[nlo].hi;
508
        Z.lo ^= Htable[nlo].lo;
509
510
        rem = (size_t)Z.lo & 0xf;
511
512
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513
        Z.hi = (Z.hi >> 4);
514
515
        Z.hi ^= Htable[nhi].hi;
516
        Z.lo ^= Htable[nhi].lo;
517
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518
#   endif
519
520
        if (IS_LITTLE_ENDIAN) {
521
#   ifdef BSWAP8
522
            Xi[0] = BSWAP8(Z.hi);
523
            Xi[1] = BSWAP8(Z.lo);
524
#   else
525
            u8 *p = (u8 *)Xi;
526
            u32 v;
527
            v = (u32)(Z.hi >> 32);
528
            PUTU32(p, v);
529
            v = (u32)(Z.hi);
530
            PUTU32(p + 4, v);
531
            v = (u32)(Z.lo >> 32);
532
            PUTU32(p + 8, v);
533
            v = (u32)(Z.lo);
534
            PUTU32(p + 12, v);
535
#   endif
536
        } else {
537
            Xi[0] = Z.hi;
538
            Xi[1] = Z.lo;
539
        }
540
    } while (inp += 16, len -= 16);
541
}
542
#  endif
543
# else
544
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546
                    size_t len);
547
# endif
548
549
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552
/*
553
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554
 * effect. In other words idea is to hash data while it's still in L1 cache
555
 * after encryption pass...
556
 */
557
28.3k
#  define GHASH_CHUNK       (3*1024)
558
# endif
559
560
#else                           /* TABLE_BITS */
561
562
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563
{
564
    u128 V, Z = { 0, 0 };
565
    long X;
566
    int i, j;
567
    const long *xi = (const long *)Xi;
568
    DECLARE_IS_ENDIAN;
569
570
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571
    V.lo = H[1];
572
573
    for (j = 0; j < 16 / sizeof(long); ++j) {
574
        if (IS_LITTLE_ENDIAN) {
575
            if (sizeof(long) == 8) {
576
# ifdef BSWAP8
577
                X = (long)(BSWAP8(xi[j]));
578
# else
579
                const u8 *p = (const u8 *)(xi + j);
580
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581
# endif
582
            } else {
583
                const u8 *p = (const u8 *)(xi + j);
584
                X = (long)GETU32(p);
585
            }
586
        } else
587
            X = xi[j];
588
589
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591
            Z.hi ^= V.hi & M;
592
            Z.lo ^= V.lo & M;
593
594
            REDUCE1BIT(V);
595
        }
596
    }
597
598
    if (IS_LITTLE_ENDIAN) {
599
# ifdef BSWAP8
600
        Xi[0] = BSWAP8(Z.hi);
601
        Xi[1] = BSWAP8(Z.lo);
602
# else
603
        u8 *p = (u8 *)Xi;
604
        u32 v;
605
        v = (u32)(Z.hi >> 32);
606
        PUTU32(p, v);
607
        v = (u32)(Z.hi);
608
        PUTU32(p + 4, v);
609
        v = (u32)(Z.lo >> 32);
610
        PUTU32(p + 8, v);
611
        v = (u32)(Z.lo);
612
        PUTU32(p + 12, v);
613
# endif
614
    } else {
615
        Xi[0] = Z.hi;
616
        Xi[1] = Z.lo;
617
    }
618
}
619
620
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622
#endif
623
624
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625
# if    !defined(I386_ONLY) && \
626
        (defined(__i386)        || defined(__i386__)    || \
627
         defined(__x86_64)      || defined(__x86_64__)  || \
628
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629
#  define GHASH_ASM_X86_OR_64
630
#  define GCM_FUNCREF_4BIT
631
632
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635
                     size_t len);
636
637
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638
#   define gcm_init_avx   gcm_init_clmul
639
#   define gcm_gmult_avx  gcm_gmult_clmul
640
#   define gcm_ghash_avx  gcm_ghash_clmul
641
#  else
642
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645
                   size_t len);
646
#  endif
647
648
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649
#   define GHASH_ASM_X86
650
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652
                        size_t len);
653
654
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656
                        size_t len);
657
#  endif
658
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659
#  include "arm_arch.h"
660
#  if __ARM_MAX_ARCH__>=7
661
#   define GHASH_ASM_ARM
662
#   define GCM_FUNCREF_4BIT
663
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664
#   if defined(__arm__) || defined(__arm)
665
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666
#   endif
667
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                    size_t len);
671
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674
                  size_t len);
675
#  endif
676
# elif defined(__sparc__) || defined(__sparc)
677
#  include "crypto/sparc_arch.h"
678
#  define GHASH_ASM_SPARC
679
#  define GCM_FUNCREF_4BIT
680
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683
                    size_t len);
684
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685
#  include "crypto/ppc_arch.h"
686
#  define GHASH_ASM_PPC
687
#  define GCM_FUNCREF_4BIT
688
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691
                  size_t len);
692
# endif
693
#endif
694
695
#ifdef GCM_FUNCREF_4BIT
696
# undef  GCM_MUL
697
2.61k
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698
# ifdef GHASH
699
#  undef  GHASH
700
31.7k
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701
# endif
702
#endif
703
704
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705
1.13k
{
706
1.13k
    DECLARE_IS_ENDIAN;
707
708
1.13k
    memset(ctx, 0, sizeof(*ctx));
709
1.13k
    ctx->block = block;
710
1.13k
    ctx->key = key;
711
712
1.13k
    (*block) (ctx->H.c, ctx->H.c, key);
713
714
1.13k
    if (IS_LITTLE_ENDIAN) {
715
        /* H is stored in host byte order */
716
#ifdef BSWAP8
717
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719
#else
720
1.13k
        u8 *p = ctx->H.c;
721
1.13k
        u64 hi, lo;
722
1.13k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723
1.13k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724
1.13k
        ctx->H.u[0] = hi;
725
1.13k
        ctx->H.u[1] = lo;
726
1.13k
#endif
727
1.13k
    }
728
#if     TABLE_BITS==8
729
    gcm_init_8bit(ctx->Htable, ctx->H.u);
730
#elif   TABLE_BITS==4
731
1.13k
# if    defined(GHASH)
732
1.13k
#  define CTX__GHASH(f) (ctx->ghash = (f))
733
# else
734
#  define CTX__GHASH(f) (ctx->ghash = NULL)
735
# endif
736
1.13k
# if    defined(GHASH_ASM_X86_OR_64)
737
1.13k
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738
1.13k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739
1.13k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740
1.13k
            gcm_init_avx(ctx->Htable, ctx->H.u);
741
1.13k
            ctx->gmult = gcm_gmult_avx;
742
1.13k
            CTX__GHASH(gcm_ghash_avx);
743
1.13k
        } else {
744
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
745
0
            ctx->gmult = gcm_gmult_clmul;
746
0
            CTX__GHASH(gcm_ghash_clmul);
747
0
        }
748
1.13k
        return;
749
1.13k
    }
750
0
#  endif
751
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
752
#  if   defined(GHASH_ASM_X86)  /* x86 only */
753
#   if  defined(OPENSSL_IA32_SSE2)
754
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755
#   else
756
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757
#   endif
758
        ctx->gmult = gcm_gmult_4bit_mmx;
759
        CTX__GHASH(gcm_ghash_4bit_mmx);
760
    } else {
761
        ctx->gmult = gcm_gmult_4bit_x86;
762
        CTX__GHASH(gcm_ghash_4bit_x86);
763
    }
764
#  else
765
0
    ctx->gmult = gcm_gmult_4bit;
766
0
    CTX__GHASH(gcm_ghash_4bit);
767
0
#  endif
768
# elif  defined(GHASH_ASM_ARM)
769
#  ifdef PMULL_CAPABLE
770
    if (PMULL_CAPABLE) {
771
        gcm_init_v8(ctx->Htable, ctx->H.u);
772
        ctx->gmult = gcm_gmult_v8;
773
        CTX__GHASH(gcm_ghash_v8);
774
    } else
775
#  endif
776
#  ifdef NEON_CAPABLE
777
    if (NEON_CAPABLE) {
778
        gcm_init_neon(ctx->Htable, ctx->H.u);
779
        ctx->gmult = gcm_gmult_neon;
780
        CTX__GHASH(gcm_ghash_neon);
781
    } else
782
#  endif
783
    {
784
        gcm_init_4bit(ctx->Htable, ctx->H.u);
785
        ctx->gmult = gcm_gmult_4bit;
786
        CTX__GHASH(gcm_ghash_4bit);
787
    }
788
# elif  defined(GHASH_ASM_SPARC)
789
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790
        gcm_init_vis3(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_vis3;
792
        CTX__GHASH(gcm_ghash_vis3);
793
    } else {
794
        gcm_init_4bit(ctx->Htable, ctx->H.u);
795
        ctx->gmult = gcm_gmult_4bit;
796
        CTX__GHASH(gcm_ghash_4bit);
797
    }
798
# elif  defined(GHASH_ASM_PPC)
799
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800
        gcm_init_p8(ctx->Htable, ctx->H.u);
801
        ctx->gmult = gcm_gmult_p8;
802
        CTX__GHASH(gcm_ghash_p8);
803
    } else {
804
        gcm_init_4bit(ctx->Htable, ctx->H.u);
805
        ctx->gmult = gcm_gmult_4bit;
806
        CTX__GHASH(gcm_ghash_4bit);
807
    }
808
# else
809
    gcm_init_4bit(ctx->Htable, ctx->H.u);
810
# endif
811
0
# undef CTX__GHASH
812
0
#endif
813
0
}
814
815
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816
                         size_t len)
817
27.9k
{
818
27.9k
    DECLARE_IS_ENDIAN;
819
27.9k
    unsigned int ctr;
820
27.9k
#ifdef GCM_FUNCREF_4BIT
821
27.9k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822
27.9k
#endif
823
824
27.9k
    ctx->len.u[0] = 0;          /* AAD length */
825
27.9k
    ctx->len.u[1] = 0;          /* message length */
826
27.9k
    ctx->ares = 0;
827
27.9k
    ctx->mres = 0;
828
829
27.9k
    if (len == 12) {
830
27.9k
        memcpy(ctx->Yi.c, iv, 12);
831
27.9k
        ctx->Yi.c[12] = 0;
832
27.9k
        ctx->Yi.c[13] = 0;
833
27.9k
        ctx->Yi.c[14] = 0;
834
27.9k
        ctx->Yi.c[15] = 1;
835
27.9k
        ctr = 1;
836
27.9k
    } else {
837
0
        size_t i;
838
0
        u64 len0 = len;
839
840
        /* Borrow ctx->Xi to calculate initial Yi */
841
0
        ctx->Xi.u[0] = 0;
842
0
        ctx->Xi.u[1] = 0;
843
844
0
        while (len >= 16) {
845
0
            for (i = 0; i < 16; ++i)
846
0
                ctx->Xi.c[i] ^= iv[i];
847
0
            GCM_MUL(ctx);
848
0
            iv += 16;
849
0
            len -= 16;
850
0
        }
851
0
        if (len) {
852
0
            for (i = 0; i < len; ++i)
853
0
                ctx->Xi.c[i] ^= iv[i];
854
0
            GCM_MUL(ctx);
855
0
        }
856
0
        len0 <<= 3;
857
0
        if (IS_LITTLE_ENDIAN) {
858
#ifdef BSWAP8
859
            ctx->Xi.u[1] ^= BSWAP8(len0);
860
#else
861
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868
0
            ctx->Xi.c[15] ^= (u8)(len0);
869
0
#endif
870
0
        } else {
871
0
            ctx->Xi.u[1] ^= len0;
872
0
        }
873
874
0
        GCM_MUL(ctx);
875
876
0
        if (IS_LITTLE_ENDIAN)
877
#ifdef BSWAP4
878
            ctr = BSWAP4(ctx->Xi.d[3]);
879
#else
880
0
            ctr = GETU32(ctx->Xi.c + 12);
881
0
#endif
882
0
        else
883
0
            ctr = ctx->Xi.d[3];
884
885
        /* Copy borrowed Xi to Yi */
886
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
887
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
888
0
    }
889
890
27.9k
    ctx->Xi.u[0] = 0;
891
27.9k
    ctx->Xi.u[1] = 0;
892
893
27.9k
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894
27.9k
    ++ctr;
895
27.9k
    if (IS_LITTLE_ENDIAN)
896
#ifdef BSWAP4
897
        ctx->Yi.d[3] = BSWAP4(ctr);
898
#else
899
27.9k
        PUTU32(ctx->Yi.c + 12, ctr);
900
0
#endif
901
0
    else
902
0
        ctx->Yi.d[3] = ctr;
903
27.9k
}
904
905
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906
                      size_t len)
907
32.9k
{
908
32.9k
    size_t i;
909
32.9k
    unsigned int n;
910
32.9k
    u64 alen = ctx->len.u[0];
911
32.9k
#ifdef GCM_FUNCREF_4BIT
912
32.9k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913
32.9k
# ifdef GHASH
914
32.9k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915
32.9k
                         const u8 *inp, size_t len) = ctx->ghash;
916
32.9k
# endif
917
32.9k
#endif
918
919
32.9k
    if (ctx->len.u[1])
920
0
        return -2;
921
922
32.9k
    alen += len;
923
32.9k
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924
0
        return -1;
925
32.9k
    ctx->len.u[0] = alen;
926
927
32.9k
    n = ctx->ares;
928
32.9k
    if (n) {
929
0
        while (n && len) {
930
0
            ctx->Xi.c[n] ^= *(aad++);
931
0
            --len;
932
0
            n = (n + 1) % 16;
933
0
        }
934
0
        if (n == 0)
935
0
            GCM_MUL(ctx);
936
0
        else {
937
0
            ctx->ares = n;
938
0
            return 0;
939
0
        }
940
0
    }
941
32.9k
#ifdef GHASH
942
32.9k
    if ((i = (len & (size_t)-16))) {
943
0
        GHASH(ctx, aad, i);
944
0
        aad += i;
945
0
        len -= i;
946
0
    }
947
#else
948
    while (len >= 16) {
949
        for (i = 0; i < 16; ++i)
950
            ctx->Xi.c[i] ^= aad[i];
951
        GCM_MUL(ctx);
952
        aad += 16;
953
        len -= 16;
954
    }
955
#endif
956
32.9k
    if (len) {
957
32.9k
        n = (unsigned int)len;
958
206k
        for (i = 0; i < len; ++i)
959
173k
            ctx->Xi.c[i] ^= aad[i];
960
32.9k
    }
961
962
32.9k
    ctx->ares = n;
963
32.9k
    return 0;
964
32.9k
}
965
966
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967
                          const unsigned char *in, unsigned char *out,
968
                          size_t len)
969
1.76k
{
970
1.76k
    DECLARE_IS_ENDIAN;
971
1.76k
    unsigned int n, ctr, mres;
972
1.76k
    size_t i;
973
1.76k
    u64 mlen = ctx->len.u[1];
974
1.76k
    block128_f block = ctx->block;
975
1.76k
    void *key = ctx->key;
976
1.76k
#ifdef GCM_FUNCREF_4BIT
977
1.76k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978
1.76k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979
1.76k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980
1.76k
                         const u8 *inp, size_t len) = ctx->ghash;
981
1.76k
# endif
982
1.76k
#endif
983
984
1.76k
    mlen += len;
985
1.76k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
1.76k
    ctx->len.u[1] = mlen;
988
989
1.76k
    mres = ctx->mres;
990
991
1.76k
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
1.76k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
1.76k
        if (len == 0) {
995
1.53k
            GCM_MUL(ctx);
996
1.53k
            ctx->ares = 0;
997
1.53k
            return 0;
998
1.53k
        }
999
232
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
232
        ctx->Xi.u[0] = 0;
1001
232
        ctx->Xi.u[1] = 0;
1002
232
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
232
        ctx->ares = 0;
1007
232
    }
1008
1009
232
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
232
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
232
    n = mres % 16;
1019
232
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
232
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
232
        do {
1022
232
            if (n) {
1023
0
# if defined(GHASH)
1024
0
                while (n && len) {
1025
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
0
                    --len;
1027
0
                    n = (n + 1) % 16;
1028
0
                }
1029
0
                if (n == 0) {
1030
0
                    GHASH(ctx, ctx->Xn, mres);
1031
0
                    mres = 0;
1032
0
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
# else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
# endif
1050
0
            }
1051
232
# if defined(STRICT_ALIGNMENT)
1052
232
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
0
                break;
1054
232
# endif
1055
232
# if defined(GHASH)
1056
232
            if (len >= 16 && mres) {
1057
121
                GHASH(ctx, ctx->Xn, mres);
1058
121
                mres = 0;
1059
121
            }
1060
232
#  if defined(GHASH_CHUNK)
1061
232
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#   ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#   else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#   endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
232
#  endif
1088
232
            if ((i = (len & (size_t)-16))) {
1089
121
                size_t j = i;
1090
1091
242
                while (len >= 16) {
1092
121
                    size_t_aX *out_t = (size_t_aX *)out;
1093
121
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
121
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096
121
                    ++ctr;
1097
121
                    if (IS_LITTLE_ENDIAN)
1098
#  ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#  else
1101
121
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#  endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
363
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
242
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
121
                    out += 16;
1108
121
                    in += 16;
1109
121
                    len -= 16;
1110
121
                }
1111
121
                GHASH(ctx, out - j, j);
1112
121
            }
1113
# else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#  ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#  else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#  endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
# endif
1136
232
            if (len) {
1137
111
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
111
                ++ctr;
1139
111
                if (IS_LITTLE_ENDIAN)
1140
# ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
# else
1143
111
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
# endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
111
# if defined(GHASH)
1148
333
                while (len--) {
1149
222
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
222
                    ++n;
1151
222
                }
1152
# else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
# endif
1159
111
            }
1160
1161
232
            ctx->mres = mres;
1162
232
            return 0;
1163
232
        } while (0);
1164
232
    }
1165
0
#endif
1166
0
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
0
    ctx->mres = mres;
1195
0
    return 0;
1196
232
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
                          const unsigned char *in, unsigned char *out,
1200
                          size_t len)
1201
1.23k
{
1202
1.23k
    DECLARE_IS_ENDIAN;
1203
1.23k
    unsigned int n, ctr, mres;
1204
1.23k
    size_t i;
1205
1.23k
    u64 mlen = ctx->len.u[1];
1206
1.23k
    block128_f block = ctx->block;
1207
1.23k
    void *key = ctx->key;
1208
1.23k
#ifdef GCM_FUNCREF_4BIT
1209
1.23k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
1.23k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
1.23k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212
1.23k
                         const u8 *inp, size_t len) = ctx->ghash;
1213
1.23k
# endif
1214
1.23k
#endif
1215
1216
1.23k
    mlen += len;
1217
1.23k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218
0
        return -1;
1219
1.23k
    ctx->len.u[1] = mlen;
1220
1221
1.23k
    mres = ctx->mres;
1222
1223
1.23k
    if (ctx->ares) {
1224
        /* First call to decrypt finalizes GHASH(AAD) */
1225
1.23k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226
1.23k
        if (len == 0) {
1227
1.02k
            GCM_MUL(ctx);
1228
1.02k
            ctx->ares = 0;
1229
1.02k
            return 0;
1230
1.02k
        }
1231
205
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232
205
        ctx->Xi.u[0] = 0;
1233
205
        ctx->Xi.u[1] = 0;
1234
205
        mres = sizeof(ctx->Xi);
1235
#else
1236
        GCM_MUL(ctx);
1237
#endif
1238
205
        ctx->ares = 0;
1239
205
    }
1240
1241
205
    if (IS_LITTLE_ENDIAN)
1242
#ifdef BSWAP4
1243
        ctr = BSWAP4(ctx->Yi.d[3]);
1244
#else
1245
205
        ctr = GETU32(ctx->Yi.c + 12);
1246
0
#endif
1247
0
    else
1248
0
        ctr = ctx->Yi.d[3];
1249
1250
205
    n = mres % 16;
1251
205
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252
205
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253
205
        do {
1254
205
            if (n) {
1255
0
# if defined(GHASH)
1256
0
                while (n && len) {
1257
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258
0
                    --len;
1259
0
                    n = (n + 1) % 16;
1260
0
                }
1261
0
                if (n == 0) {
1262
0
                    GHASH(ctx, ctx->Xn, mres);
1263
0
                    mres = 0;
1264
0
                } else {
1265
0
                    ctx->mres = mres;
1266
0
                    return 0;
1267
0
                }
1268
# else
1269
                while (n && len) {
1270
                    u8 c = *(in++);
1271
                    *(out++) = c ^ ctx->EKi.c[n];
1272
                    ctx->Xi.c[n] ^= c;
1273
                    --len;
1274
                    n = (n + 1) % 16;
1275
                }
1276
                if (n == 0) {
1277
                    GCM_MUL(ctx);
1278
                    mres = 0;
1279
                } else {
1280
                    ctx->mres = n;
1281
                    return 0;
1282
                }
1283
# endif
1284
0
            }
1285
205
# if defined(STRICT_ALIGNMENT)
1286
205
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
0
                break;
1288
205
# endif
1289
205
# if defined(GHASH)
1290
205
            if (len >= 16 && mres) {
1291
175
                GHASH(ctx, ctx->Xn, mres);
1292
175
                mres = 0;
1293
175
            }
1294
205
#  if defined(GHASH_CHUNK)
1295
367
            while (len >= GHASH_CHUNK) {
1296
162
                size_t j = GHASH_CHUNK;
1297
1298
162
                GHASH(ctx, in, GHASH_CHUNK);
1299
31.2k
                while (j) {
1300
31.1k
                    size_t_aX *out_t = (size_t_aX *)out;
1301
31.1k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1302
1303
31.1k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304
31.1k
                    ++ctr;
1305
31.1k
                    if (IS_LITTLE_ENDIAN)
1306
#   ifdef BSWAP4
1307
                        ctx->Yi.d[3] = BSWAP4(ctr);
1308
#   else
1309
31.1k
                        PUTU32(ctx->Yi.c + 12, ctr);
1310
0
#   endif
1311
0
                    else
1312
0
                        ctx->Yi.d[3] = ctr;
1313
93.3k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314
62.2k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315
31.1k
                    out += 16;
1316
31.1k
                    in += 16;
1317
31.1k
                    j -= 16;
1318
31.1k
                }
1319
162
                len -= GHASH_CHUNK;
1320
162
            }
1321
205
#  endif
1322
205
            if ((i = (len & (size_t)-16))) {
1323
169
                GHASH(ctx, in, i);
1324
7.80k
                while (len >= 16) {
1325
7.63k
                    size_t_aX *out_t = (size_t_aX *)out;
1326
7.63k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1327
1328
7.63k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329
7.63k
                    ++ctr;
1330
7.63k
                    if (IS_LITTLE_ENDIAN)
1331
#  ifdef BSWAP4
1332
                        ctx->Yi.d[3] = BSWAP4(ctr);
1333
#  else
1334
7.63k
                        PUTU32(ctx->Yi.c + 12, ctr);
1335
0
#  endif
1336
0
                    else
1337
0
                        ctx->Yi.d[3] = ctr;
1338
22.8k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339
15.2k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340
7.63k
                    out += 16;
1341
7.63k
                    in += 16;
1342
7.63k
                    len -= 16;
1343
7.63k
                }
1344
169
            }
1345
# else
1346
            while (len >= 16) {
1347
                size_t *out_t = (size_t *)out;
1348
                const size_t *in_t = (const size_t *)in;
1349
1350
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351
                ++ctr;
1352
                if (IS_LITTLE_ENDIAN)
1353
#  ifdef BSWAP4
1354
                    ctx->Yi.d[3] = BSWAP4(ctr);
1355
#  else
1356
                    PUTU32(ctx->Yi.c + 12, ctr);
1357
#  endif
1358
                else
1359
                    ctx->Yi.d[3] = ctr;
1360
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361
                    size_t c = in_t[i];
1362
                    out_t[i] = c ^ ctx->EKi.t[i];
1363
                    ctx->Xi.t[i] ^= c;
1364
                }
1365
                GCM_MUL(ctx);
1366
                out += 16;
1367
                in += 16;
1368
                len -= 16;
1369
            }
1370
# endif
1371
205
            if (len) {
1372
173
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373
173
                ++ctr;
1374
173
                if (IS_LITTLE_ENDIAN)
1375
# ifdef BSWAP4
1376
                    ctx->Yi.d[3] = BSWAP4(ctr);
1377
# else
1378
173
                    PUTU32(ctx->Yi.c + 12, ctr);
1379
0
# endif
1380
0
                else
1381
0
                    ctx->Yi.d[3] = ctr;
1382
173
# if defined(GHASH)
1383
1.44k
                while (len--) {
1384
1.27k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385
1.27k
                    ++n;
1386
1.27k
                }
1387
# else
1388
                while (len--) {
1389
                    u8 c = in[n];
1390
                    ctx->Xi.c[n] ^= c;
1391
                    out[n] = c ^ ctx->EKi.c[n];
1392
                    ++n;
1393
                }
1394
                mres = n;
1395
# endif
1396
173
            }
1397
1398
205
            ctx->mres = mres;
1399
205
            return 0;
1400
205
        } while (0);
1401
205
    }
1402
0
#endif
1403
0
    for (i = 0; i < len; ++i) {
1404
0
        u8 c;
1405
0
        if (n == 0) {
1406
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407
0
            ++ctr;
1408
0
            if (IS_LITTLE_ENDIAN)
1409
#ifdef BSWAP4
1410
                ctx->Yi.d[3] = BSWAP4(ctr);
1411
#else
1412
0
                PUTU32(ctx->Yi.c + 12, ctr);
1413
0
#endif
1414
0
            else
1415
0
                ctx->Yi.d[3] = ctr;
1416
0
        }
1417
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419
0
        n = (n + 1) % 16;
1420
0
        if (mres == sizeof(ctx->Xn)) {
1421
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422
0
            mres = 0;
1423
0
        }
1424
#else
1425
        c = in[i];
1426
        out[i] = c ^ ctx->EKi.c[n];
1427
        ctx->Xi.c[n] ^= c;
1428
        mres = n = (n + 1) % 16;
1429
        if (n == 0)
1430
            GCM_MUL(ctx);
1431
#endif
1432
0
    }
1433
1434
0
    ctx->mres = mres;
1435
0
    return 0;
1436
205
}
1437
1438
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439
                                const unsigned char *in, unsigned char *out,
1440
                                size_t len, ctr128_f stream)
1441
2.63k
{
1442
#if defined(OPENSSL_SMALL_FOOTPRINT)
1443
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444
#else
1445
2.63k
    DECLARE_IS_ENDIAN;
1446
2.63k
    unsigned int n, ctr, mres;
1447
2.63k
    size_t i;
1448
2.63k
    u64 mlen = ctx->len.u[1];
1449
2.63k
    void *key = ctx->key;
1450
2.63k
# ifdef GCM_FUNCREF_4BIT
1451
2.63k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452
2.63k
#  ifdef GHASH
1453
2.63k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454
2.63k
                         const u8 *inp, size_t len) = ctx->ghash;
1455
2.63k
#  endif
1456
2.63k
# endif
1457
1458
2.63k
    mlen += len;
1459
2.63k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460
0
        return -1;
1461
2.63k
    ctx->len.u[1] = mlen;
1462
1463
2.63k
    mres = ctx->mres;
1464
1465
2.63k
    if (ctx->ares) {
1466
        /* First call to encrypt finalizes GHASH(AAD) */
1467
1.10k
#if defined(GHASH)
1468
1.10k
        if (len == 0) {
1469
0
            GCM_MUL(ctx);
1470
0
            ctx->ares = 0;
1471
0
            return 0;
1472
0
        }
1473
1.10k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474
1.10k
        ctx->Xi.u[0] = 0;
1475
1.10k
        ctx->Xi.u[1] = 0;
1476
1.10k
        mres = sizeof(ctx->Xi);
1477
#else
1478
        GCM_MUL(ctx);
1479
#endif
1480
1.10k
        ctx->ares = 0;
1481
1.10k
    }
1482
1483
2.63k
    if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
        ctr = BSWAP4(ctx->Yi.d[3]);
1486
# else
1487
2.63k
        ctr = GETU32(ctx->Yi.c + 12);
1488
0
# endif
1489
0
    else
1490
0
        ctr = ctx->Yi.d[3];
1491
1492
2.63k
    n = mres % 16;
1493
2.63k
    if (n) {
1494
0
# if defined(GHASH)
1495
0
        while (n && len) {
1496
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497
0
            --len;
1498
0
            n = (n + 1) % 16;
1499
0
        }
1500
0
        if (n == 0) {
1501
0
            GHASH(ctx, ctx->Xn, mres);
1502
0
            mres = 0;
1503
0
        } else {
1504
0
            ctx->mres = mres;
1505
0
            return 0;
1506
0
        }
1507
# else
1508
        while (n && len) {
1509
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510
            --len;
1511
            n = (n + 1) % 16;
1512
        }
1513
        if (n == 0) {
1514
            GCM_MUL(ctx);
1515
            mres = 0;
1516
        } else {
1517
            ctx->mres = n;
1518
            return 0;
1519
        }
1520
# endif
1521
0
    }
1522
2.63k
# if defined(GHASH)
1523
2.63k
        if (len >= 16 && mres) {
1524
282
            GHASH(ctx, ctx->Xn, mres);
1525
282
            mres = 0;
1526
282
        }
1527
2.63k
#  if defined(GHASH_CHUNK)
1528
2.63k
    while (len >= GHASH_CHUNK) {
1529
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530
0
        ctr += GHASH_CHUNK / 16;
1531
0
        if (IS_LITTLE_ENDIAN)
1532
#   ifdef BSWAP4
1533
            ctx->Yi.d[3] = BSWAP4(ctr);
1534
#   else
1535
0
            PUTU32(ctx->Yi.c + 12, ctr);
1536
0
#   endif
1537
0
        else
1538
0
            ctx->Yi.d[3] = ctr;
1539
0
        GHASH(ctx, out, GHASH_CHUNK);
1540
0
        out += GHASH_CHUNK;
1541
0
        in += GHASH_CHUNK;
1542
0
        len -= GHASH_CHUNK;
1543
0
    }
1544
2.63k
#  endif
1545
2.63k
# endif
1546
2.63k
    if ((i = (len & (size_t)-16))) {
1547
1.81k
        size_t j = i / 16;
1548
1549
1.81k
        (*stream) (in, out, j, key, ctx->Yi.c);
1550
1.81k
        ctr += (unsigned int)j;
1551
1.81k
        if (IS_LITTLE_ENDIAN)
1552
# ifdef BSWAP4
1553
            ctx->Yi.d[3] = BSWAP4(ctr);
1554
# else
1555
1.81k
            PUTU32(ctx->Yi.c + 12, ctr);
1556
0
# endif
1557
0
        else
1558
0
            ctx->Yi.d[3] = ctr;
1559
1.81k
        in += i;
1560
1.81k
        len -= i;
1561
1.81k
# if defined(GHASH)
1562
1.81k
        GHASH(ctx, out, i);
1563
1.81k
        out += i;
1564
# else
1565
        while (j--) {
1566
            for (i = 0; i < 16; ++i)
1567
                ctx->Xi.c[i] ^= out[i];
1568
            GCM_MUL(ctx);
1569
            out += 16;
1570
        }
1571
# endif
1572
1.81k
    }
1573
2.63k
    if (len) {
1574
2.16k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575
2.16k
        ++ctr;
1576
2.16k
        if (IS_LITTLE_ENDIAN)
1577
# ifdef BSWAP4
1578
            ctx->Yi.d[3] = BSWAP4(ctr);
1579
# else
1580
2.16k
            PUTU32(ctx->Yi.c + 12, ctr);
1581
0
# endif
1582
0
        else
1583
0
            ctx->Yi.d[3] = ctr;
1584
12.9k
        while (len--) {
1585
10.7k
# if defined(GHASH)
1586
10.7k
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587
# else
1588
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589
# endif
1590
10.7k
            ++n;
1591
10.7k
        }
1592
2.16k
    }
1593
1594
2.63k
    ctx->mres = mres;
1595
2.63k
    return 0;
1596
2.63k
#endif
1597
2.63k
}
1598
1599
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600
                                const unsigned char *in, unsigned char *out,
1601
                                size_t len, ctr128_f stream)
1602
24.8k
{
1603
#if defined(OPENSSL_SMALL_FOOTPRINT)
1604
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605
#else
1606
24.8k
    DECLARE_IS_ENDIAN;
1607
24.8k
    unsigned int n, ctr, mres;
1608
24.8k
    size_t i;
1609
24.8k
    u64 mlen = ctx->len.u[1];
1610
24.8k
    void *key = ctx->key;
1611
24.8k
# ifdef GCM_FUNCREF_4BIT
1612
24.8k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613
24.8k
#  ifdef GHASH
1614
24.8k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615
24.8k
                         const u8 *inp, size_t len) = ctx->ghash;
1616
24.8k
#  endif
1617
24.8k
# endif
1618
1619
24.8k
    mlen += len;
1620
24.8k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621
0
        return -1;
1622
24.8k
    ctx->len.u[1] = mlen;
1623
1624
24.8k
    mres = ctx->mres;
1625
1626
24.8k
    if (ctx->ares) {
1627
        /* First call to decrypt finalizes GHASH(AAD) */
1628
23.8k
# if defined(GHASH)
1629
23.8k
        if (len == 0) {
1630
53
            GCM_MUL(ctx);
1631
53
            ctx->ares = 0;
1632
53
            return 0;
1633
53
        }
1634
23.7k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635
23.7k
        ctx->Xi.u[0] = 0;
1636
23.7k
        ctx->Xi.u[1] = 0;
1637
23.7k
        mres = sizeof(ctx->Xi);
1638
# else
1639
        GCM_MUL(ctx);
1640
# endif
1641
23.7k
        ctx->ares = 0;
1642
23.7k
    }
1643
1644
24.7k
    if (IS_LITTLE_ENDIAN)
1645
# ifdef BSWAP4
1646
        ctr = BSWAP4(ctx->Yi.d[3]);
1647
# else
1648
24.7k
        ctr = GETU32(ctx->Yi.c + 12);
1649
0
# endif
1650
0
    else
1651
0
        ctr = ctx->Yi.d[3];
1652
1653
24.7k
    n = mres % 16;
1654
24.7k
    if (n) {
1655
0
# if defined(GHASH)
1656
0
        while (n && len) {
1657
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658
0
            --len;
1659
0
            n = (n + 1) % 16;
1660
0
        }
1661
0
        if (n == 0) {
1662
0
            GHASH(ctx, ctx->Xn, mres);
1663
0
            mres = 0;
1664
0
        } else {
1665
0
            ctx->mres = mres;
1666
0
            return 0;
1667
0
        }
1668
# else
1669
        while (n && len) {
1670
            u8 c = *(in++);
1671
            *(out++) = c ^ ctx->EKi.c[n];
1672
            ctx->Xi.c[n] ^= c;
1673
            --len;
1674
            n = (n + 1) % 16;
1675
        }
1676
        if (n == 0) {
1677
            GCM_MUL(ctx);
1678
            mres = 0;
1679
        } else {
1680
            ctx->mres = n;
1681
            return 0;
1682
        }
1683
# endif
1684
0
    }
1685
24.7k
# if defined(GHASH)
1686
24.7k
    if (len >= 16 && mres) {
1687
0
        GHASH(ctx, ctx->Xn, mres);
1688
0
        mres = 0;
1689
0
    }
1690
24.7k
#  if defined(GHASH_CHUNK)
1691
24.7k
    while (len >= GHASH_CHUNK) {
1692
0
        GHASH(ctx, in, GHASH_CHUNK);
1693
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694
0
        ctr += GHASH_CHUNK / 16;
1695
0
        if (IS_LITTLE_ENDIAN)
1696
#   ifdef BSWAP4
1697
            ctx->Yi.d[3] = BSWAP4(ctr);
1698
#   else
1699
0
            PUTU32(ctx->Yi.c + 12, ctr);
1700
0
#   endif
1701
0
        else
1702
0
            ctx->Yi.d[3] = ctr;
1703
0
        out += GHASH_CHUNK;
1704
0
        in += GHASH_CHUNK;
1705
0
        len -= GHASH_CHUNK;
1706
0
    }
1707
24.7k
#  endif
1708
24.7k
# endif
1709
24.7k
    if ((i = (len & (size_t)-16))) {
1710
990
        size_t j = i / 16;
1711
1712
990
# if defined(GHASH)
1713
990
        GHASH(ctx, in, i);
1714
# else
1715
        while (j--) {
1716
            size_t k;
1717
            for (k = 0; k < 16; ++k)
1718
                ctx->Xi.c[k] ^= in[k];
1719
            GCM_MUL(ctx);
1720
            in += 16;
1721
        }
1722
        j = i / 16;
1723
        in -= i;
1724
# endif
1725
990
        (*stream) (in, out, j, key, ctx->Yi.c);
1726
990
        ctr += (unsigned int)j;
1727
990
        if (IS_LITTLE_ENDIAN)
1728
# ifdef BSWAP4
1729
            ctx->Yi.d[3] = BSWAP4(ctr);
1730
# else
1731
990
            PUTU32(ctx->Yi.c + 12, ctr);
1732
0
# endif
1733
0
        else
1734
0
            ctx->Yi.d[3] = ctr;
1735
990
        out += i;
1736
990
        in += i;
1737
990
        len -= i;
1738
990
    }
1739
24.7k
    if (len) {
1740
23.8k
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741
23.8k
        ++ctr;
1742
23.8k
        if (IS_LITTLE_ENDIAN)
1743
# ifdef BSWAP4
1744
            ctx->Yi.d[3] = BSWAP4(ctr);
1745
# else
1746
23.8k
            PUTU32(ctx->Yi.c + 12, ctr);
1747
0
# endif
1748
0
        else
1749
0
            ctx->Yi.d[3] = ctr;
1750
98.2k
        while (len--) {
1751
74.3k
# if defined(GHASH)
1752
74.3k
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753
# else
1754
            u8 c = in[n];
1755
            ctx->Xi.c[mres++] ^= c;
1756
            out[n] = c ^ ctx->EKi.c[n];
1757
# endif
1758
74.3k
            ++n;
1759
74.3k
        }
1760
23.8k
    }
1761
1762
24.7k
    ctx->mres = mres;
1763
24.7k
    return 0;
1764
24.7k
#endif
1765
24.7k
}
1766
1767
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768
                         size_t len)
1769
27.9k
{
1770
27.9k
    DECLARE_IS_ENDIAN;
1771
27.9k
    u64 alen = ctx->len.u[0] << 3;
1772
27.9k
    u64 clen = ctx->len.u[1] << 3;
1773
27.9k
#ifdef GCM_FUNCREF_4BIT
1774
27.9k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775
27.9k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776
27.9k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777
27.9k
                         const u8 *inp, size_t len) = ctx->ghash;
1778
27.9k
# endif
1779
27.9k
#endif
1780
1781
27.9k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782
27.9k
    u128 bitlen;
1783
27.9k
    unsigned int mres = ctx->mres;
1784
1785
27.9k
    if (mres) {
1786
26.3k
        unsigned blocks = (mres + 15) & -16;
1787
1788
26.3k
        memset(ctx->Xn + mres, 0, blocks - mres);
1789
26.3k
        mres = blocks;
1790
26.3k
        if (mres == sizeof(ctx->Xn)) {
1791
0
            GHASH(ctx, ctx->Xn, mres);
1792
0
            mres = 0;
1793
0
        }
1794
26.3k
    } else if (ctx->ares) {
1795
0
        GCM_MUL(ctx);
1796
0
    }
1797
#else
1798
    if (ctx->mres || ctx->ares)
1799
        GCM_MUL(ctx);
1800
#endif
1801
1802
27.9k
    if (IS_LITTLE_ENDIAN) {
1803
#ifdef BSWAP8
1804
        alen = BSWAP8(alen);
1805
        clen = BSWAP8(clen);
1806
#else
1807
27.9k
        u8 *p = ctx->len.c;
1808
1809
27.9k
        ctx->len.u[0] = alen;
1810
27.9k
        ctx->len.u[1] = clen;
1811
1812
27.9k
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813
27.9k
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814
27.9k
#endif
1815
27.9k
    }
1816
1817
27.9k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
27.9k
    bitlen.hi = alen;
1819
27.9k
    bitlen.lo = clen;
1820
27.9k
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821
27.9k
    mres += sizeof(bitlen);
1822
27.9k
    GHASH(ctx, ctx->Xn, mres);
1823
#else
1824
    ctx->Xi.u[0] ^= alen;
1825
    ctx->Xi.u[1] ^= clen;
1826
    GCM_MUL(ctx);
1827
#endif
1828
1829
27.9k
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830
27.9k
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831
1832
27.9k
    if (tag && len <= sizeof(ctx->Xi))
1833
25.0k
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834
2.86k
    else
1835
2.86k
        return -1;
1836
27.9k
}
1837
1838
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839
4.26k
{
1840
4.26k
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841
4.26k
    memcpy(tag, ctx->Xi.c,
1842
4.26k
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843
4.26k
}
1844
1845
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846
0
{
1847
0
    GCM128_CONTEXT *ret;
1848
1849
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850
0
        CRYPTO_gcm128_init(ret, key, block);
1851
1852
0
    return ret;
1853
0
}
1854
1855
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856
0
{
1857
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858
0
}