Coverage Report

Created: 2023-06-08 06:40

/src/openssl111/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/crypto.h>
11
#include "modes_local.h"
12
#include <string.h>
13
14
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
15
typedef size_t size_t_aX __attribute((__aligned__(1)));
16
#else
17
typedef size_t size_t_aX;
18
#endif
19
20
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
21
/* redefine, because alignment is ensured */
22
# undef  GETU32
23
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
24
# undef  PUTU32
25
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
26
#endif
27
28
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
29
0
#define REDUCE1BIT(V)   do { \
30
0
        if (sizeof(size_t)==8) { \
31
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
32
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
33
0
                V.hi  = (V.hi>>1 )^T; \
34
0
        } \
35
0
        else { \
36
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
37
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
38
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
39
0
        } \
40
0
} while(0)
41
42
/*-
43
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
44
 * never be set to 8. 8 is effectively reserved for testing purposes.
45
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
46
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
47
 * whole spectrum of possible table driven implementations. Why? In
48
 * non-"Shoup's" case memory access pattern is segmented in such manner,
49
 * that it's trivial to see that cache timing information can reveal
50
 * fair portion of intermediate hash value. Given that ciphertext is
51
 * always available to attacker, it's possible for him to attempt to
52
 * deduce secret parameter H and if successful, tamper with messages
53
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
54
 * not as trivial, but there is no reason to believe that it's resistant
55
 * to cache-timing attack. And the thing about "8-bit" implementation is
56
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
57
 * key + 1KB shared. Well, on pros side it should be twice as fast as
58
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
59
 * was observed to run ~75% faster, closer to 100% for commercial
60
 * compilers... Yet "4-bit" procedure is preferred, because it's
61
 * believed to provide better security-performance balance and adequate
62
 * all-round performance. "All-round" refers to things like:
63
 *
64
 * - shorter setup time effectively improves overall timing for
65
 *   handling short messages;
66
 * - larger table allocation can become unbearable because of VM
67
 *   subsystem penalties (for example on Windows large enough free
68
 *   results in VM working set trimming, meaning that consequent
69
 *   malloc would immediately incur working set expansion);
70
 * - larger table has larger cache footprint, which can affect
71
 *   performance of other code paths (not necessarily even from same
72
 *   thread in Hyper-Threading world);
73
 *
74
 * Value of 1 is not appropriate for performance reasons.
75
 */
76
#if     TABLE_BITS==8
77
78
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
79
{
80
    int i, j;
81
    u128 V;
82
83
    Htable[0].hi = 0;
84
    Htable[0].lo = 0;
85
    V.hi = H[0];
86
    V.lo = H[1];
87
88
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
89
        REDUCE1BIT(V);
90
        Htable[i] = V;
91
    }
92
93
    for (i = 2; i < 256; i <<= 1) {
94
        u128 *Hi = Htable + i, H0 = *Hi;
95
        for (j = 1; j < i; ++j) {
96
            Hi[j].hi = H0.hi ^ Htable[j].hi;
97
            Hi[j].lo = H0.lo ^ Htable[j].lo;
98
        }
99
    }
100
}
101
102
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
103
{
104
    u128 Z = { 0, 0 };
105
    const u8 *xi = (const u8 *)Xi + 15;
106
    size_t rem, n = *xi;
107
    const union {
108
        long one;
109
        char little;
110
    } is_endian = { 1 };
111
    static const size_t rem_8bit[256] = {
112
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
113
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
114
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
115
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
116
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
117
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
118
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
119
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
120
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
121
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
122
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
123
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
124
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
125
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
126
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
127
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
128
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
129
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
130
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
131
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
132
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
133
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
134
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
135
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
136
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
137
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
138
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
139
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
140
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
141
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
142
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
143
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
144
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
145
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
146
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
147
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
148
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
149
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
150
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
151
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
152
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
153
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
154
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
155
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
156
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
157
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
158
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
159
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
160
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
161
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
162
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
163
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
164
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
165
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
166
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
167
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
168
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
169
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
170
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
171
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
172
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
173
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
174
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
175
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
176
    };
177
178
    while (1) {
179
        Z.hi ^= Htable[n].hi;
180
        Z.lo ^= Htable[n].lo;
181
182
        if ((u8 *)Xi == xi)
183
            break;
184
185
        n = *(--xi);
186
187
        rem = (size_t)Z.lo & 0xff;
188
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
189
        Z.hi = (Z.hi >> 8);
190
        if (sizeof(size_t) == 8)
191
            Z.hi ^= rem_8bit[rem];
192
        else
193
            Z.hi ^= (u64)rem_8bit[rem] << 32;
194
    }
195
196
    if (is_endian.little) {
197
# ifdef BSWAP8
198
        Xi[0] = BSWAP8(Z.hi);
199
        Xi[1] = BSWAP8(Z.lo);
200
# else
201
        u8 *p = (u8 *)Xi;
202
        u32 v;
203
        v = (u32)(Z.hi >> 32);
204
        PUTU32(p, v);
205
        v = (u32)(Z.hi);
206
        PUTU32(p + 4, v);
207
        v = (u32)(Z.lo >> 32);
208
        PUTU32(p + 8, v);
209
        v = (u32)(Z.lo);
210
        PUTU32(p + 12, v);
211
# endif
212
    } else {
213
        Xi[0] = Z.hi;
214
        Xi[1] = Z.lo;
215
    }
216
}
217
218
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
219
220
#elif   TABLE_BITS==4
221
222
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
223
0
{
224
0
    u128 V;
225
# if defined(OPENSSL_SMALL_FOOTPRINT)
226
    int i;
227
# endif
228
229
0
    Htable[0].hi = 0;
230
0
    Htable[0].lo = 0;
231
0
    V.hi = H[0];
232
0
    V.lo = H[1];
233
234
# if defined(OPENSSL_SMALL_FOOTPRINT)
235
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
236
        REDUCE1BIT(V);
237
        Htable[i] = V;
238
    }
239
240
    for (i = 2; i < 16; i <<= 1) {
241
        u128 *Hi = Htable + i;
242
        int j;
243
        for (V = *Hi, j = 1; j < i; ++j) {
244
            Hi[j].hi = V.hi ^ Htable[j].hi;
245
            Hi[j].lo = V.lo ^ Htable[j].lo;
246
        }
247
    }
248
# else
249
0
    Htable[8] = V;
250
0
    REDUCE1BIT(V);
251
0
    Htable[4] = V;
252
0
    REDUCE1BIT(V);
253
0
    Htable[2] = V;
254
0
    REDUCE1BIT(V);
255
0
    Htable[1] = V;
256
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
257
0
    V = Htable[4];
258
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
259
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
260
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
261
0
    V = Htable[8];
262
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
263
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
264
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
265
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
266
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
267
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
268
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
269
0
# endif
270
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
271
    /*
272
     * ARM assembler expects specific dword order in Htable.
273
     */
274
    {
275
        int j;
276
        const union {
277
            long one;
278
            char little;
279
        } is_endian = { 1 };
280
281
        if (is_endian.little)
282
            for (j = 0; j < 16; ++j) {
283
                V = Htable[j];
284
                Htable[j].hi = V.lo;
285
                Htable[j].lo = V.hi;
286
        } else
287
            for (j = 0; j < 16; ++j) {
288
                V = Htable[j];
289
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
290
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
291
            }
292
    }
293
# endif
294
0
}
295
296
# ifndef GHASH_ASM
297
static const size_t rem_4bit[16] = {
298
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
299
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
300
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
301
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
302
};
303
304
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
305
{
306
    u128 Z;
307
    int cnt = 15;
308
    size_t rem, nlo, nhi;
309
    const union {
310
        long one;
311
        char little;
312
    } is_endian = { 1 };
313
314
    nlo = ((const u8 *)Xi)[15];
315
    nhi = nlo >> 4;
316
    nlo &= 0xf;
317
318
    Z.hi = Htable[nlo].hi;
319
    Z.lo = Htable[nlo].lo;
320
321
    while (1) {
322
        rem = (size_t)Z.lo & 0xf;
323
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
324
        Z.hi = (Z.hi >> 4);
325
        if (sizeof(size_t) == 8)
326
            Z.hi ^= rem_4bit[rem];
327
        else
328
            Z.hi ^= (u64)rem_4bit[rem] << 32;
329
330
        Z.hi ^= Htable[nhi].hi;
331
        Z.lo ^= Htable[nhi].lo;
332
333
        if (--cnt < 0)
334
            break;
335
336
        nlo = ((const u8 *)Xi)[cnt];
337
        nhi = nlo >> 4;
338
        nlo &= 0xf;
339
340
        rem = (size_t)Z.lo & 0xf;
341
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
342
        Z.hi = (Z.hi >> 4);
343
        if (sizeof(size_t) == 8)
344
            Z.hi ^= rem_4bit[rem];
345
        else
346
            Z.hi ^= (u64)rem_4bit[rem] << 32;
347
348
        Z.hi ^= Htable[nlo].hi;
349
        Z.lo ^= Htable[nlo].lo;
350
    }
351
352
    if (is_endian.little) {
353
#  ifdef BSWAP8
354
        Xi[0] = BSWAP8(Z.hi);
355
        Xi[1] = BSWAP8(Z.lo);
356
#  else
357
        u8 *p = (u8 *)Xi;
358
        u32 v;
359
        v = (u32)(Z.hi >> 32);
360
        PUTU32(p, v);
361
        v = (u32)(Z.hi);
362
        PUTU32(p + 4, v);
363
        v = (u32)(Z.lo >> 32);
364
        PUTU32(p + 8, v);
365
        v = (u32)(Z.lo);
366
        PUTU32(p + 12, v);
367
#  endif
368
    } else {
369
        Xi[0] = Z.hi;
370
        Xi[1] = Z.lo;
371
    }
372
}
373
374
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
375
/*
376
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
377
 * details... Compiler-generated code doesn't seem to give any
378
 * performance improvement, at least not on x86[_64]. It's here
379
 * mostly as reference and a placeholder for possible future
380
 * non-trivial optimization[s]...
381
 */
382
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
383
                           const u8 *inp, size_t len)
384
{
385
    u128 Z;
386
    int cnt;
387
    size_t rem, nlo, nhi;
388
    const union {
389
        long one;
390
        char little;
391
    } is_endian = { 1 };
392
393
#   if 1
394
    do {
395
        cnt = 15;
396
        nlo = ((const u8 *)Xi)[15];
397
        nlo ^= inp[15];
398
        nhi = nlo >> 4;
399
        nlo &= 0xf;
400
401
        Z.hi = Htable[nlo].hi;
402
        Z.lo = Htable[nlo].lo;
403
404
        while (1) {
405
            rem = (size_t)Z.lo & 0xf;
406
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
407
            Z.hi = (Z.hi >> 4);
408
            if (sizeof(size_t) == 8)
409
                Z.hi ^= rem_4bit[rem];
410
            else
411
                Z.hi ^= (u64)rem_4bit[rem] << 32;
412
413
            Z.hi ^= Htable[nhi].hi;
414
            Z.lo ^= Htable[nhi].lo;
415
416
            if (--cnt < 0)
417
                break;
418
419
            nlo = ((const u8 *)Xi)[cnt];
420
            nlo ^= inp[cnt];
421
            nhi = nlo >> 4;
422
            nlo &= 0xf;
423
424
            rem = (size_t)Z.lo & 0xf;
425
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
426
            Z.hi = (Z.hi >> 4);
427
            if (sizeof(size_t) == 8)
428
                Z.hi ^= rem_4bit[rem];
429
            else
430
                Z.hi ^= (u64)rem_4bit[rem] << 32;
431
432
            Z.hi ^= Htable[nlo].hi;
433
            Z.lo ^= Htable[nlo].lo;
434
        }
435
#   else
436
    /*
437
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
438
     * [should] give ~50% improvement... One could have PACK()-ed
439
     * the rem_8bit even here, but the priority is to minimize
440
     * cache footprint...
441
     */
442
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
443
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
444
    static const unsigned short rem_8bit[256] = {
445
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
446
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
447
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
448
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
449
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
450
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
451
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
452
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
453
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
454
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
455
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
456
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
457
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
458
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
459
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
460
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
461
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
462
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
463
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
464
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
465
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
466
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
467
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
468
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
469
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
470
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
471
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
472
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
473
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
474
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
475
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
476
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
477
    };
478
    /*
479
     * This pre-processing phase slows down procedure by approximately
480
     * same time as it makes each loop spin faster. In other words
481
     * single block performance is approximately same as straightforward
482
     * "4-bit" implementation, and then it goes only faster...
483
     */
484
    for (cnt = 0; cnt < 16; ++cnt) {
485
        Z.hi = Htable[cnt].hi;
486
        Z.lo = Htable[cnt].lo;
487
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
488
        Hshr4[cnt].hi = (Z.hi >> 4);
489
        Hshl4[cnt] = (u8)(Z.lo << 4);
490
    }
491
492
    do {
493
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
494
            nlo = ((const u8 *)Xi)[cnt];
495
            nlo ^= inp[cnt];
496
            nhi = nlo >> 4;
497
            nlo &= 0xf;
498
499
            Z.hi ^= Htable[nlo].hi;
500
            Z.lo ^= Htable[nlo].lo;
501
502
            rem = (size_t)Z.lo & 0xff;
503
504
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
505
            Z.hi = (Z.hi >> 8);
506
507
            Z.hi ^= Hshr4[nhi].hi;
508
            Z.lo ^= Hshr4[nhi].lo;
509
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
510
        }
511
512
        nlo = ((const u8 *)Xi)[0];
513
        nlo ^= inp[0];
514
        nhi = nlo >> 4;
515
        nlo &= 0xf;
516
517
        Z.hi ^= Htable[nlo].hi;
518
        Z.lo ^= Htable[nlo].lo;
519
520
        rem = (size_t)Z.lo & 0xf;
521
522
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
523
        Z.hi = (Z.hi >> 4);
524
525
        Z.hi ^= Htable[nhi].hi;
526
        Z.lo ^= Htable[nhi].lo;
527
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
528
#   endif
529
530
        if (is_endian.little) {
531
#   ifdef BSWAP8
532
            Xi[0] = BSWAP8(Z.hi);
533
            Xi[1] = BSWAP8(Z.lo);
534
#   else
535
            u8 *p = (u8 *)Xi;
536
            u32 v;
537
            v = (u32)(Z.hi >> 32);
538
            PUTU32(p, v);
539
            v = (u32)(Z.hi);
540
            PUTU32(p + 4, v);
541
            v = (u32)(Z.lo >> 32);
542
            PUTU32(p + 8, v);
543
            v = (u32)(Z.lo);
544
            PUTU32(p + 12, v);
545
#   endif
546
        } else {
547
            Xi[0] = Z.hi;
548
            Xi[1] = Z.lo;
549
        }
550
    } while (inp += 16, len -= 16);
551
}
552
#  endif
553
# else
554
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
555
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
556
                    size_t len);
557
# endif
558
559
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
560
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
561
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
562
/*
563
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
564
 * effect. In other words idea is to hash data while it's still in L1 cache
565
 * after encryption pass...
566
 */
567
416
#  define GHASH_CHUNK       (3*1024)
568
# endif
569
570
#else                           /* TABLE_BITS */
571
572
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
573
{
574
    u128 V, Z = { 0, 0 };
575
    long X;
576
    int i, j;
577
    const long *xi = (const long *)Xi;
578
    const union {
579
        long one;
580
        char little;
581
    } is_endian = { 1 };
582
583
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
584
    V.lo = H[1];
585
586
    for (j = 0; j < 16 / sizeof(long); ++j) {
587
        if (is_endian.little) {
588
            if (sizeof(long) == 8) {
589
# ifdef BSWAP8
590
                X = (long)(BSWAP8(xi[j]));
591
# else
592
                const u8 *p = (const u8 *)(xi + j);
593
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
594
# endif
595
            } else {
596
                const u8 *p = (const u8 *)(xi + j);
597
                X = (long)GETU32(p);
598
            }
599
        } else
600
            X = xi[j];
601
602
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
603
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
604
            Z.hi ^= V.hi & M;
605
            Z.lo ^= V.lo & M;
606
607
            REDUCE1BIT(V);
608
        }
609
    }
610
611
    if (is_endian.little) {
612
# ifdef BSWAP8
613
        Xi[0] = BSWAP8(Z.hi);
614
        Xi[1] = BSWAP8(Z.lo);
615
# else
616
        u8 *p = (u8 *)Xi;
617
        u32 v;
618
        v = (u32)(Z.hi >> 32);
619
        PUTU32(p, v);
620
        v = (u32)(Z.hi);
621
        PUTU32(p + 4, v);
622
        v = (u32)(Z.lo >> 32);
623
        PUTU32(p + 8, v);
624
        v = (u32)(Z.lo);
625
        PUTU32(p + 12, v);
626
# endif
627
    } else {
628
        Xi[0] = Z.hi;
629
        Xi[1] = Z.lo;
630
    }
631
}
632
633
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
634
635
#endif
636
637
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
638
# if    !defined(I386_ONLY) && \
639
        (defined(__i386)        || defined(__i386__)    || \
640
         defined(__x86_64)      || defined(__x86_64__)  || \
641
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
642
#  define GHASH_ASM_X86_OR_64
643
#  define GCM_FUNCREF_4BIT
644
extern unsigned int OPENSSL_ia32cap_P[];
645
646
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
647
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
648
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
649
                     size_t len);
650
651
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
652
#   define gcm_init_avx   gcm_init_clmul
653
#   define gcm_gmult_avx  gcm_gmult_clmul
654
#   define gcm_ghash_avx  gcm_ghash_clmul
655
#  else
656
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
657
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
658
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
659
                   size_t len);
660
#  endif
661
662
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
663
#   define GHASH_ASM_X86
664
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
665
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
666
                        size_t len);
667
668
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                        size_t len);
671
#  endif
672
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
673
#  include "arm_arch.h"
674
#  if __ARM_MAX_ARCH__>=7
675
#   define GHASH_ASM_ARM
676
#   define GCM_FUNCREF_4BIT
677
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
678
#   if defined(__arm__) || defined(__arm)
679
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
680
#   endif
681
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
682
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
683
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
684
                    size_t len);
685
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
686
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
687
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
688
                  size_t len);
689
#  endif
690
# elif defined(__sparc__) || defined(__sparc)
691
#  include "sparc_arch.h"
692
#  define GHASH_ASM_SPARC
693
#  define GCM_FUNCREF_4BIT
694
extern unsigned int OPENSSL_sparcv9cap_P[];
695
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
696
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
697
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
698
                    size_t len);
699
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
700
#  include "ppc_arch.h"
701
#  define GHASH_ASM_PPC
702
#  define GCM_FUNCREF_4BIT
703
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
704
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
705
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
706
                  size_t len);
707
# endif
708
#endif
709
710
#ifdef GCM_FUNCREF_4BIT
711
# undef  GCM_MUL
712
53
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713
# ifdef GHASH
714
#  undef  GHASH
715
669
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716
# endif
717
#endif
718
719
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
720
248
{
721
248
    const union {
722
248
        long one;
723
248
        char little;
724
248
    } is_endian = { 1 };
725
726
248
    memset(ctx, 0, sizeof(*ctx));
727
248
    ctx->block = block;
728
248
    ctx->key = key;
729
730
248
    (*block) (ctx->H.c, ctx->H.c, key);
731
732
248
    if (is_endian.little) {
733
        /* H is stored in host byte order */
734
#ifdef BSWAP8
735
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
736
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
737
#else
738
248
        u8 *p = ctx->H.c;
739
248
        u64 hi, lo;
740
248
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
741
248
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
742
248
        ctx->H.u[0] = hi;
743
248
        ctx->H.u[1] = lo;
744
248
#endif
745
248
    }
746
#if     TABLE_BITS==8
747
    gcm_init_8bit(ctx->Htable, ctx->H.u);
748
#elif   TABLE_BITS==4
749
248
# if    defined(GHASH)
750
248
#  define CTX__GHASH(f) (ctx->ghash = (f))
751
# else
752
#  define CTX__GHASH(f) (ctx->ghash = NULL)
753
# endif
754
248
# if    defined(GHASH_ASM_X86_OR_64)
755
248
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
756
248
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
757
248
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
758
248
            gcm_init_avx(ctx->Htable, ctx->H.u);
759
248
            ctx->gmult = gcm_gmult_avx;
760
248
            CTX__GHASH(gcm_ghash_avx);
761
248
        } else {
762
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
763
0
            ctx->gmult = gcm_gmult_clmul;
764
0
            CTX__GHASH(gcm_ghash_clmul);
765
0
        }
766
248
        return;
767
248
    }
768
0
#  endif
769
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
770
#  if   defined(GHASH_ASM_X86)  /* x86 only */
771
#   if  defined(OPENSSL_IA32_SSE2)
772
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
773
#   else
774
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
775
#   endif
776
        ctx->gmult = gcm_gmult_4bit_mmx;
777
        CTX__GHASH(gcm_ghash_4bit_mmx);
778
    } else {
779
        ctx->gmult = gcm_gmult_4bit_x86;
780
        CTX__GHASH(gcm_ghash_4bit_x86);
781
    }
782
#  else
783
0
    ctx->gmult = gcm_gmult_4bit;
784
0
    CTX__GHASH(gcm_ghash_4bit);
785
0
#  endif
786
# elif  defined(GHASH_ASM_ARM)
787
#  ifdef PMULL_CAPABLE
788
    if (PMULL_CAPABLE) {
789
        gcm_init_v8(ctx->Htable, ctx->H.u);
790
        ctx->gmult = gcm_gmult_v8;
791
        CTX__GHASH(gcm_ghash_v8);
792
    } else
793
#  endif
794
#  ifdef NEON_CAPABLE
795
    if (NEON_CAPABLE) {
796
        gcm_init_neon(ctx->Htable, ctx->H.u);
797
        ctx->gmult = gcm_gmult_neon;
798
        CTX__GHASH(gcm_ghash_neon);
799
    } else
800
#  endif
801
    {
802
        gcm_init_4bit(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_4bit;
804
        CTX__GHASH(gcm_ghash_4bit);
805
    }
806
# elif  defined(GHASH_ASM_SPARC)
807
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
808
        gcm_init_vis3(ctx->Htable, ctx->H.u);
809
        ctx->gmult = gcm_gmult_vis3;
810
        CTX__GHASH(gcm_ghash_vis3);
811
    } else {
812
        gcm_init_4bit(ctx->Htable, ctx->H.u);
813
        ctx->gmult = gcm_gmult_4bit;
814
        CTX__GHASH(gcm_ghash_4bit);
815
    }
816
# elif  defined(GHASH_ASM_PPC)
817
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
818
        gcm_init_p8(ctx->Htable, ctx->H.u);
819
        ctx->gmult = gcm_gmult_p8;
820
        CTX__GHASH(gcm_ghash_p8);
821
    } else {
822
        gcm_init_4bit(ctx->Htable, ctx->H.u);
823
        ctx->gmult = gcm_gmult_4bit;
824
        CTX__GHASH(gcm_ghash_4bit);
825
    }
826
# else
827
    gcm_init_4bit(ctx->Htable, ctx->H.u);
828
# endif
829
0
# undef CTX__GHASH
830
0
#endif
831
0
}
832
833
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
834
                         size_t len)
835
298
{
836
298
    const union {
837
298
        long one;
838
298
        char little;
839
298
    } is_endian = { 1 };
840
298
    unsigned int ctr;
841
298
#ifdef GCM_FUNCREF_4BIT
842
298
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
843
298
#endif
844
845
298
    ctx->len.u[0] = 0;          /* AAD length */
846
298
    ctx->len.u[1] = 0;          /* message length */
847
298
    ctx->ares = 0;
848
298
    ctx->mres = 0;
849
850
298
    if (len == 12) {
851
298
        memcpy(ctx->Yi.c, iv, 12);
852
298
        ctx->Yi.c[12] = 0;
853
298
        ctx->Yi.c[13] = 0;
854
298
        ctx->Yi.c[14] = 0;
855
298
        ctx->Yi.c[15] = 1;
856
298
        ctr = 1;
857
298
    } else {
858
0
        size_t i;
859
0
        u64 len0 = len;
860
861
        /* Borrow ctx->Xi to calculate initial Yi */
862
0
        ctx->Xi.u[0] = 0;
863
0
        ctx->Xi.u[1] = 0;
864
865
0
        while (len >= 16) {
866
0
            for (i = 0; i < 16; ++i)
867
0
                ctx->Xi.c[i] ^= iv[i];
868
0
            GCM_MUL(ctx);
869
0
            iv += 16;
870
0
            len -= 16;
871
0
        }
872
0
        if (len) {
873
0
            for (i = 0; i < len; ++i)
874
0
                ctx->Xi.c[i] ^= iv[i];
875
0
            GCM_MUL(ctx);
876
0
        }
877
0
        len0 <<= 3;
878
0
        if (is_endian.little) {
879
#ifdef BSWAP8
880
            ctx->Xi.u[1] ^= BSWAP8(len0);
881
#else
882
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
883
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
884
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
885
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
886
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
887
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
888
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
889
0
            ctx->Xi.c[15] ^= (u8)(len0);
890
0
#endif
891
0
        } else {
892
0
            ctx->Xi.u[1] ^= len0;
893
0
        }
894
895
0
        GCM_MUL(ctx);
896
897
0
        if (is_endian.little)
898
#ifdef BSWAP4
899
            ctr = BSWAP4(ctx->Xi.d[3]);
900
#else
901
0
            ctr = GETU32(ctx->Xi.c + 12);
902
0
#endif
903
0
        else
904
0
            ctr = ctx->Xi.d[3];
905
906
        /* Copy borrowed Xi to Yi */
907
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
908
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
909
0
    }
910
911
298
    ctx->Xi.u[0] = 0;
912
298
    ctx->Xi.u[1] = 0;
913
914
298
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
915
298
    ++ctr;
916
298
    if (is_endian.little)
917
#ifdef BSWAP4
918
        ctx->Yi.d[3] = BSWAP4(ctr);
919
#else
920
298
        PUTU32(ctx->Yi.c + 12, ctr);
921
0
#endif
922
0
    else
923
0
        ctx->Yi.d[3] = ctr;
924
298
}
925
926
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
927
                      size_t len)
928
298
{
929
298
    size_t i;
930
298
    unsigned int n;
931
298
    u64 alen = ctx->len.u[0];
932
298
#ifdef GCM_FUNCREF_4BIT
933
298
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
934
298
# ifdef GHASH
935
298
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
936
298
                         const u8 *inp, size_t len) = ctx->ghash;
937
298
# endif
938
298
#endif
939
940
298
    if (ctx->len.u[1])
941
0
        return -2;
942
943
298
    alen += len;
944
298
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
945
0
        return -1;
946
298
    ctx->len.u[0] = alen;
947
948
298
    n = ctx->ares;
949
298
    if (n) {
950
0
        while (n && len) {
951
0
            ctx->Xi.c[n] ^= *(aad++);
952
0
            --len;
953
0
            n = (n + 1) % 16;
954
0
        }
955
0
        if (n == 0)
956
0
            GCM_MUL(ctx);
957
0
        else {
958
0
            ctx->ares = n;
959
0
            return 0;
960
0
        }
961
0
    }
962
298
#ifdef GHASH
963
298
    if ((i = (len & (size_t)-16))) {
964
0
        GHASH(ctx, aad, i);
965
0
        aad += i;
966
0
        len -= i;
967
0
    }
968
#else
969
    while (len >= 16) {
970
        for (i = 0; i < 16; ++i)
971
            ctx->Xi.c[i] ^= aad[i];
972
        GCM_MUL(ctx);
973
        aad += 16;
974
        len -= 16;
975
    }
976
#endif
977
298
    if (len) {
978
298
        n = (unsigned int)len;
979
3.82k
        for (i = 0; i < len; ++i)
980
3.53k
            ctx->Xi.c[i] ^= aad[i];
981
298
    }
982
983
298
    ctx->ares = n;
984
298
    return 0;
985
298
}
986
987
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
988
                          const unsigned char *in, unsigned char *out,
989
                          size_t len)
990
120
{
991
120
    const union {
992
120
        long one;
993
120
        char little;
994
120
    } is_endian = { 1 };
995
120
    unsigned int n, ctr, mres;
996
120
    size_t i;
997
120
    u64 mlen = ctx->len.u[1];
998
120
    block128_f block = ctx->block;
999
120
    void *key = ctx->key;
1000
120
#ifdef GCM_FUNCREF_4BIT
1001
120
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1002
120
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1003
120
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1004
120
                         const u8 *inp, size_t len) = ctx->ghash;
1005
120
# endif
1006
120
#endif
1007
1008
120
    mlen += len;
1009
120
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1010
0
        return -1;
1011
120
    ctx->len.u[1] = mlen;
1012
1013
120
    mres = ctx->mres;
1014
1015
120
    if (ctx->ares) {
1016
        /* First call to encrypt finalizes GHASH(AAD) */
1017
120
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1018
120
        if (len == 0) {
1019
0
            GCM_MUL(ctx);
1020
0
            ctx->ares = 0;
1021
0
            return 0;
1022
0
        }
1023
120
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1024
120
        ctx->Xi.u[0] = 0;
1025
120
        ctx->Xi.u[1] = 0;
1026
120
        mres = sizeof(ctx->Xi);
1027
#else
1028
        GCM_MUL(ctx);
1029
#endif
1030
120
        ctx->ares = 0;
1031
120
    }
1032
1033
120
    if (is_endian.little)
1034
#ifdef BSWAP4
1035
        ctr = BSWAP4(ctx->Yi.d[3]);
1036
#else
1037
120
        ctr = GETU32(ctx->Yi.c + 12);
1038
0
#endif
1039
0
    else
1040
0
        ctr = ctx->Yi.d[3];
1041
1042
120
    n = mres % 16;
1043
120
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1044
120
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1045
120
        do {
1046
120
            if (n) {
1047
0
# if defined(GHASH)
1048
0
                while (n && len) {
1049
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1050
0
                    --len;
1051
0
                    n = (n + 1) % 16;
1052
0
                }
1053
0
                if (n == 0) {
1054
0
                    GHASH(ctx, ctx->Xn, mres);
1055
0
                    mres = 0;
1056
0
                } else {
1057
0
                    ctx->mres = mres;
1058
0
                    return 0;
1059
0
                }
1060
# else
1061
                while (n && len) {
1062
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1063
                    --len;
1064
                    n = (n + 1) % 16;
1065
                }
1066
                if (n == 0) {
1067
                    GCM_MUL(ctx);
1068
                    mres = 0;
1069
                } else {
1070
                    ctx->mres = n;
1071
                    return 0;
1072
                }
1073
# endif
1074
0
            }
1075
120
# if defined(STRICT_ALIGNMENT)
1076
120
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1077
0
                break;
1078
120
# endif
1079
120
# if defined(GHASH)
1080
120
            if (len >= 16 && mres) {
1081
62
                GHASH(ctx, ctx->Xn, mres);
1082
62
                mres = 0;
1083
62
            }
1084
120
#  if defined(GHASH_CHUNK)
1085
120
            while (len >= GHASH_CHUNK) {
1086
0
                size_t j = GHASH_CHUNK;
1087
1088
0
                while (j) {
1089
0
                    size_t_aX *out_t = (size_t_aX *)out;
1090
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1091
1092
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1093
0
                    ++ctr;
1094
0
                    if (is_endian.little)
1095
#   ifdef BSWAP4
1096
                        ctx->Yi.d[3] = BSWAP4(ctr);
1097
#   else
1098
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1099
0
#   endif
1100
0
                    else
1101
0
                        ctx->Yi.d[3] = ctr;
1102
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1103
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1104
0
                    out += 16;
1105
0
                    in += 16;
1106
0
                    j -= 16;
1107
0
                }
1108
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1109
0
                len -= GHASH_CHUNK;
1110
0
            }
1111
120
#  endif
1112
120
            if ((i = (len & (size_t)-16))) {
1113
62
                size_t j = i;
1114
1115
124
                while (len >= 16) {
1116
62
                    size_t_aX *out_t = (size_t_aX *)out;
1117
62
                    const size_t_aX *in_t = (const size_t_aX *)in;
1118
1119
62
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1120
62
                    ++ctr;
1121
62
                    if (is_endian.little)
1122
#  ifdef BSWAP4
1123
                        ctx->Yi.d[3] = BSWAP4(ctr);
1124
#  else
1125
62
                        PUTU32(ctx->Yi.c + 12, ctr);
1126
0
#  endif
1127
0
                    else
1128
0
                        ctx->Yi.d[3] = ctr;
1129
186
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1130
124
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1131
62
                    out += 16;
1132
62
                    in += 16;
1133
62
                    len -= 16;
1134
62
                }
1135
62
                GHASH(ctx, out - j, j);
1136
62
            }
1137
# else
1138
            while (len >= 16) {
1139
                size_t *out_t = (size_t *)out;
1140
                const size_t *in_t = (const size_t *)in;
1141
1142
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143
                ++ctr;
1144
                if (is_endian.little)
1145
#  ifdef BSWAP4
1146
                    ctx->Yi.d[3] = BSWAP4(ctr);
1147
#  else
1148
                    PUTU32(ctx->Yi.c + 12, ctr);
1149
#  endif
1150
                else
1151
                    ctx->Yi.d[3] = ctr;
1152
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1153
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1154
                GCM_MUL(ctx);
1155
                out += 16;
1156
                in += 16;
1157
                len -= 16;
1158
            }
1159
# endif
1160
120
            if (len) {
1161
58
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1162
58
                ++ctr;
1163
58
                if (is_endian.little)
1164
# ifdef BSWAP4
1165
                    ctx->Yi.d[3] = BSWAP4(ctr);
1166
# else
1167
58
                    PUTU32(ctx->Yi.c + 12, ctr);
1168
0
# endif
1169
0
                else
1170
0
                    ctx->Yi.d[3] = ctr;
1171
58
# if defined(GHASH)
1172
174
                while (len--) {
1173
116
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1174
116
                    ++n;
1175
116
                }
1176
# else
1177
                while (len--) {
1178
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1179
                    ++n;
1180
                }
1181
                mres = n;
1182
# endif
1183
58
            }
1184
1185
120
            ctx->mres = mres;
1186
120
            return 0;
1187
120
        } while (0);
1188
120
    }
1189
0
#endif
1190
0
    for (i = 0; i < len; ++i) {
1191
0
        if (n == 0) {
1192
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1193
0
            ++ctr;
1194
0
            if (is_endian.little)
1195
#ifdef BSWAP4
1196
                ctx->Yi.d[3] = BSWAP4(ctr);
1197
#else
1198
0
                PUTU32(ctx->Yi.c + 12, ctr);
1199
0
#endif
1200
0
            else
1201
0
                ctx->Yi.d[3] = ctr;
1202
0
        }
1203
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1204
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1205
0
        n = (n + 1) % 16;
1206
0
        if (mres == sizeof(ctx->Xn)) {
1207
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1208
0
            mres = 0;
1209
0
        }
1210
#else
1211
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1212
        mres = n = (n + 1) % 16;
1213
        if (n == 0)
1214
            GCM_MUL(ctx);
1215
#endif
1216
0
    }
1217
1218
0
    ctx->mres = mres;
1219
0
    return 0;
1220
120
}
1221
1222
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1223
                          const unsigned char *in, unsigned char *out,
1224
                          size_t len)
1225
99
{
1226
99
    const union {
1227
99
        long one;
1228
99
        char little;
1229
99
    } is_endian = { 1 };
1230
99
    unsigned int n, ctr, mres;
1231
99
    size_t i;
1232
99
    u64 mlen = ctx->len.u[1];
1233
99
    block128_f block = ctx->block;
1234
99
    void *key = ctx->key;
1235
99
#ifdef GCM_FUNCREF_4BIT
1236
99
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1237
99
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1238
99
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1239
99
                         const u8 *inp, size_t len) = ctx->ghash;
1240
99
# endif
1241
99
#endif
1242
1243
99
    mlen += len;
1244
99
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1245
0
        return -1;
1246
99
    ctx->len.u[1] = mlen;
1247
1248
99
    mres = ctx->mres;
1249
1250
99
    if (ctx->ares) {
1251
        /* First call to decrypt finalizes GHASH(AAD) */
1252
99
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1253
99
        if (len == 0) {
1254
49
            GCM_MUL(ctx);
1255
49
            ctx->ares = 0;
1256
49
            return 0;
1257
49
        }
1258
50
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1259
50
        ctx->Xi.u[0] = 0;
1260
50
        ctx->Xi.u[1] = 0;
1261
50
        mres = sizeof(ctx->Xi);
1262
#else
1263
        GCM_MUL(ctx);
1264
#endif
1265
50
        ctx->ares = 0;
1266
50
    }
1267
1268
50
    if (is_endian.little)
1269
#ifdef BSWAP4
1270
        ctr = BSWAP4(ctx->Yi.d[3]);
1271
#else
1272
50
        ctr = GETU32(ctx->Yi.c + 12);
1273
0
#endif
1274
0
    else
1275
0
        ctr = ctx->Yi.d[3];
1276
1277
50
    n = mres % 16;
1278
50
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1279
50
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1280
50
        do {
1281
50
            if (n) {
1282
0
# if defined(GHASH)
1283
0
                while (n && len) {
1284
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1285
0
                    --len;
1286
0
                    n = (n + 1) % 16;
1287
0
                }
1288
0
                if (n == 0) {
1289
0
                    GHASH(ctx, ctx->Xn, mres);
1290
0
                    mres = 0;
1291
0
                } else {
1292
0
                    ctx->mres = mres;
1293
0
                    return 0;
1294
0
                }
1295
# else
1296
                while (n && len) {
1297
                    u8 c = *(in++);
1298
                    *(out++) = c ^ ctx->EKi.c[n];
1299
                    ctx->Xi.c[n] ^= c;
1300
                    --len;
1301
                    n = (n + 1) % 16;
1302
                }
1303
                if (n == 0) {
1304
                    GCM_MUL(ctx);
1305
                    mres = 0;
1306
                } else {
1307
                    ctx->mres = n;
1308
                    return 0;
1309
                }
1310
# endif
1311
0
            }
1312
50
# if defined(STRICT_ALIGNMENT)
1313
50
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1314
0
                break;
1315
50
# endif
1316
50
# if defined(GHASH)
1317
50
            if (len >= 16 && mres) {
1318
44
                GHASH(ctx, ctx->Xn, mres);
1319
44
                mres = 0;
1320
44
            }
1321
50
#  if defined(GHASH_CHUNK)
1322
91
            while (len >= GHASH_CHUNK) {
1323
41
                size_t j = GHASH_CHUNK;
1324
1325
41
                GHASH(ctx, in, GHASH_CHUNK);
1326
7.91k
                while (j) {
1327
7.87k
                    size_t_aX *out_t = (size_t_aX *)out;
1328
7.87k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1329
1330
7.87k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1331
7.87k
                    ++ctr;
1332
7.87k
                    if (is_endian.little)
1333
#   ifdef BSWAP4
1334
                        ctx->Yi.d[3] = BSWAP4(ctr);
1335
#   else
1336
7.87k
                        PUTU32(ctx->Yi.c + 12, ctr);
1337
0
#   endif
1338
0
                    else
1339
0
                        ctx->Yi.d[3] = ctr;
1340
23.6k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1341
15.7k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1342
7.87k
                    out += 16;
1343
7.87k
                    in += 16;
1344
7.87k
                    j -= 16;
1345
7.87k
                }
1346
41
                len -= GHASH_CHUNK;
1347
41
            }
1348
50
#  endif
1349
50
            if ((i = (len & (size_t)-16))) {
1350
42
                GHASH(ctx, in, i);
1351
2.33k
                while (len >= 16) {
1352
2.29k
                    size_t_aX *out_t = (size_t_aX *)out;
1353
2.29k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1354
1355
2.29k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1356
2.29k
                    ++ctr;
1357
2.29k
                    if (is_endian.little)
1358
#  ifdef BSWAP4
1359
                        ctx->Yi.d[3] = BSWAP4(ctr);
1360
#  else
1361
2.29k
                        PUTU32(ctx->Yi.c + 12, ctr);
1362
0
#  endif
1363
0
                    else
1364
0
                        ctx->Yi.d[3] = ctr;
1365
6.88k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1366
4.59k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1367
2.29k
                    out += 16;
1368
2.29k
                    in += 16;
1369
2.29k
                    len -= 16;
1370
2.29k
                }
1371
42
            }
1372
# else
1373
            while (len >= 16) {
1374
                size_t *out_t = (size_t *)out;
1375
                const size_t *in_t = (const size_t *)in;
1376
1377
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1378
                ++ctr;
1379
                if (is_endian.little)
1380
#  ifdef BSWAP4
1381
                    ctx->Yi.d[3] = BSWAP4(ctr);
1382
#  else
1383
                    PUTU32(ctx->Yi.c + 12, ctr);
1384
#  endif
1385
                else
1386
                    ctx->Yi.d[3] = ctr;
1387
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1388
                    size_t c = in_t[i];
1389
                    out_t[i] = c ^ ctx->EKi.t[i];
1390
                    ctx->Xi.t[i] ^= c;
1391
                }
1392
                GCM_MUL(ctx);
1393
                out += 16;
1394
                in += 16;
1395
                len -= 16;
1396
            }
1397
# endif
1398
50
            if (len) {
1399
39
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1400
39
                ++ctr;
1401
39
                if (is_endian.little)
1402
# ifdef BSWAP4
1403
                    ctx->Yi.d[3] = BSWAP4(ctr);
1404
# else
1405
39
                    PUTU32(ctx->Yi.c + 12, ctr);
1406
0
# endif
1407
0
                else
1408
0
                    ctx->Yi.d[3] = ctr;
1409
39
# if defined(GHASH)
1410
354
                while (len--) {
1411
315
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1412
315
                    ++n;
1413
315
                }
1414
# else
1415
                while (len--) {
1416
                    u8 c = in[n];
1417
                    ctx->Xi.c[n] ^= c;
1418
                    out[n] = c ^ ctx->EKi.c[n];
1419
                    ++n;
1420
                }
1421
                mres = n;
1422
# endif
1423
39
            }
1424
1425
50
            ctx->mres = mres;
1426
50
            return 0;
1427
50
        } while (0);
1428
50
    }
1429
0
#endif
1430
0
    for (i = 0; i < len; ++i) {
1431
0
        u8 c;
1432
0
        if (n == 0) {
1433
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1434
0
            ++ctr;
1435
0
            if (is_endian.little)
1436
#ifdef BSWAP4
1437
                ctx->Yi.d[3] = BSWAP4(ctr);
1438
#else
1439
0
                PUTU32(ctx->Yi.c + 12, ctr);
1440
0
#endif
1441
0
            else
1442
0
                ctx->Yi.d[3] = ctr;
1443
0
        }
1444
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1445
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1446
0
        n = (n + 1) % 16;
1447
0
        if (mres == sizeof(ctx->Xn)) {
1448
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1449
0
            mres = 0;
1450
0
        }
1451
#else
1452
        c = in[i];
1453
        out[i] = c ^ ctx->EKi.c[n];
1454
        ctx->Xi.c[n] ^= c;
1455
        mres = n = (n + 1) % 16;
1456
        if (n == 0)
1457
            GCM_MUL(ctx);
1458
#endif
1459
0
    }
1460
1461
0
    ctx->mres = mres;
1462
0
    return 0;
1463
50
}
1464
1465
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1466
                                const unsigned char *in, unsigned char *out,
1467
                                size_t len, ctr128_f stream)
1468
64
{
1469
#if defined(OPENSSL_SMALL_FOOTPRINT)
1470
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1471
#else
1472
64
    const union {
1473
64
        long one;
1474
64
        char little;
1475
64
    } is_endian = { 1 };
1476
64
    unsigned int n, ctr, mres;
1477
64
    size_t i;
1478
64
    u64 mlen = ctx->len.u[1];
1479
64
    void *key = ctx->key;
1480
64
# ifdef GCM_FUNCREF_4BIT
1481
64
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1482
64
#  ifdef GHASH
1483
64
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1484
64
                         const u8 *inp, size_t len) = ctx->ghash;
1485
64
#  endif
1486
64
# endif
1487
1488
64
    mlen += len;
1489
64
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1490
0
        return -1;
1491
64
    ctx->len.u[1] = mlen;
1492
1493
64
    mres = ctx->mres;
1494
1495
64
    if (ctx->ares) {
1496
        /* First call to encrypt finalizes GHASH(AAD) */
1497
64
#if defined(GHASH)
1498
64
        if (len == 0) {
1499
0
            GCM_MUL(ctx);
1500
0
            ctx->ares = 0;
1501
0
            return 0;
1502
0
        }
1503
64
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1504
64
        ctx->Xi.u[0] = 0;
1505
64
        ctx->Xi.u[1] = 0;
1506
64
        mres = sizeof(ctx->Xi);
1507
#else
1508
        GCM_MUL(ctx);
1509
#endif
1510
64
        ctx->ares = 0;
1511
64
    }
1512
1513
64
    if (is_endian.little)
1514
# ifdef BSWAP4
1515
        ctr = BSWAP4(ctx->Yi.d[3]);
1516
# else
1517
64
        ctr = GETU32(ctx->Yi.c + 12);
1518
0
# endif
1519
0
    else
1520
0
        ctr = ctx->Yi.d[3];
1521
1522
64
    n = mres % 16;
1523
64
    if (n) {
1524
0
# if defined(GHASH)
1525
0
        while (n && len) {
1526
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1527
0
            --len;
1528
0
            n = (n + 1) % 16;
1529
0
        }
1530
0
        if (n == 0) {
1531
0
            GHASH(ctx, ctx->Xn, mres);
1532
0
            mres = 0;
1533
0
        } else {
1534
0
            ctx->mres = mres;
1535
0
            return 0;
1536
0
        }
1537
# else
1538
        while (n && len) {
1539
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1540
            --len;
1541
            n = (n + 1) % 16;
1542
        }
1543
        if (n == 0) {
1544
            GCM_MUL(ctx);
1545
            mres = 0;
1546
        } else {
1547
            ctx->mres = n;
1548
            return 0;
1549
        }
1550
# endif
1551
0
    }
1552
64
# if defined(GHASH)
1553
64
        if (len >= 16 && mres) {
1554
38
            GHASH(ctx, ctx->Xn, mres);
1555
38
            mres = 0;
1556
38
        }
1557
64
#  if defined(GHASH_CHUNK)
1558
64
    while (len >= GHASH_CHUNK) {
1559
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1560
0
        ctr += GHASH_CHUNK / 16;
1561
0
        if (is_endian.little)
1562
#   ifdef BSWAP4
1563
            ctx->Yi.d[3] = BSWAP4(ctr);
1564
#   else
1565
0
            PUTU32(ctx->Yi.c + 12, ctr);
1566
0
#   endif
1567
0
        else
1568
0
            ctx->Yi.d[3] = ctr;
1569
0
        GHASH(ctx, out, GHASH_CHUNK);
1570
0
        out += GHASH_CHUNK;
1571
0
        in += GHASH_CHUNK;
1572
0
        len -= GHASH_CHUNK;
1573
0
    }
1574
64
#  endif
1575
64
# endif
1576
64
    if ((i = (len & (size_t)-16))) {
1577
38
        size_t j = i / 16;
1578
1579
38
        (*stream) (in, out, j, key, ctx->Yi.c);
1580
38
        ctr += (unsigned int)j;
1581
38
        if (is_endian.little)
1582
# ifdef BSWAP4
1583
            ctx->Yi.d[3] = BSWAP4(ctr);
1584
# else
1585
38
            PUTU32(ctx->Yi.c + 12, ctr);
1586
0
# endif
1587
0
        else
1588
0
            ctx->Yi.d[3] = ctr;
1589
38
        in += i;
1590
38
        len -= i;
1591
38
# if defined(GHASH)
1592
38
        GHASH(ctx, out, i);
1593
38
        out += i;
1594
# else
1595
        while (j--) {
1596
            for (i = 0; i < 16; ++i)
1597
                ctx->Xi.c[i] ^= out[i];
1598
            GCM_MUL(ctx);
1599
            out += 16;
1600
        }
1601
# endif
1602
38
    }
1603
64
    if (len) {
1604
26
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1605
26
        ++ctr;
1606
26
        if (is_endian.little)
1607
# ifdef BSWAP4
1608
            ctx->Yi.d[3] = BSWAP4(ctr);
1609
# else
1610
26
            PUTU32(ctx->Yi.c + 12, ctr);
1611
0
# endif
1612
0
        else
1613
0
            ctx->Yi.d[3] = ctr;
1614
78
        while (len--) {
1615
52
# if defined(GHASH)
1616
52
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1617
# else
1618
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1619
# endif
1620
52
            ++n;
1621
52
        }
1622
26
    }
1623
1624
64
    ctx->mres = mres;
1625
64
    return 0;
1626
64
#endif
1627
64
}
1628
1629
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1630
                                const unsigned char *in, unsigned char *out,
1631
                                size_t len, ctr128_f stream)
1632
63
{
1633
#if defined(OPENSSL_SMALL_FOOTPRINT)
1634
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1635
#else
1636
63
    const union {
1637
63
        long one;
1638
63
        char little;
1639
63
    } is_endian = { 1 };
1640
63
    unsigned int n, ctr, mres;
1641
63
    size_t i;
1642
63
    u64 mlen = ctx->len.u[1];
1643
63
    void *key = ctx->key;
1644
63
# ifdef GCM_FUNCREF_4BIT
1645
63
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1646
63
#  ifdef GHASH
1647
63
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1648
63
                         const u8 *inp, size_t len) = ctx->ghash;
1649
63
#  endif
1650
63
# endif
1651
1652
63
    mlen += len;
1653
63
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1654
0
        return -1;
1655
63
    ctx->len.u[1] = mlen;
1656
1657
63
    mres = ctx->mres;
1658
1659
63
    if (ctx->ares) {
1660
        /* First call to decrypt finalizes GHASH(AAD) */
1661
15
# if defined(GHASH)
1662
15
        if (len == 0) {
1663
4
            GCM_MUL(ctx);
1664
4
            ctx->ares = 0;
1665
4
            return 0;
1666
4
        }
1667
11
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1668
11
        ctx->Xi.u[0] = 0;
1669
11
        ctx->Xi.u[1] = 0;
1670
11
        mres = sizeof(ctx->Xi);
1671
# else
1672
        GCM_MUL(ctx);
1673
# endif
1674
11
        ctx->ares = 0;
1675
11
    }
1676
1677
59
    if (is_endian.little)
1678
# ifdef BSWAP4
1679
        ctr = BSWAP4(ctx->Yi.d[3]);
1680
# else
1681
59
        ctr = GETU32(ctx->Yi.c + 12);
1682
0
# endif
1683
0
    else
1684
0
        ctr = ctx->Yi.d[3];
1685
1686
59
    n = mres % 16;
1687
59
    if (n) {
1688
0
# if defined(GHASH)
1689
0
        while (n && len) {
1690
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1691
0
            --len;
1692
0
            n = (n + 1) % 16;
1693
0
        }
1694
0
        if (n == 0) {
1695
0
            GHASH(ctx, ctx->Xn, mres);
1696
0
            mres = 0;
1697
0
        } else {
1698
0
            ctx->mres = mres;
1699
0
            return 0;
1700
0
        }
1701
# else
1702
        while (n && len) {
1703
            u8 c = *(in++);
1704
            *(out++) = c ^ ctx->EKi.c[n];
1705
            ctx->Xi.c[n] ^= c;
1706
            --len;
1707
            n = (n + 1) % 16;
1708
        }
1709
        if (n == 0) {
1710
            GCM_MUL(ctx);
1711
            mres = 0;
1712
        } else {
1713
            ctx->mres = n;
1714
            return 0;
1715
        }
1716
# endif
1717
0
    }
1718
59
# if defined(GHASH)
1719
59
    if (len >= 16 && mres) {
1720
0
        GHASH(ctx, ctx->Xn, mres);
1721
0
        mres = 0;
1722
0
    }
1723
59
#  if defined(GHASH_CHUNK)
1724
59
    while (len >= GHASH_CHUNK) {
1725
0
        GHASH(ctx, in, GHASH_CHUNK);
1726
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1727
0
        ctr += GHASH_CHUNK / 16;
1728
0
        if (is_endian.little)
1729
#   ifdef BSWAP4
1730
            ctx->Yi.d[3] = BSWAP4(ctr);
1731
#   else
1732
0
            PUTU32(ctx->Yi.c + 12, ctr);
1733
0
#   endif
1734
0
        else
1735
0
            ctx->Yi.d[3] = ctr;
1736
0
        out += GHASH_CHUNK;
1737
0
        in += GHASH_CHUNK;
1738
0
        len -= GHASH_CHUNK;
1739
0
    }
1740
59
#  endif
1741
59
# endif
1742
59
    if ((i = (len & (size_t)-16))) {
1743
44
        size_t j = i / 16;
1744
1745
44
# if defined(GHASH)
1746
44
        GHASH(ctx, in, i);
1747
# else
1748
        while (j--) {
1749
            size_t k;
1750
            for (k = 0; k < 16; ++k)
1751
                ctx->Xi.c[k] ^= in[k];
1752
            GCM_MUL(ctx);
1753
            in += 16;
1754
        }
1755
        j = i / 16;
1756
        in -= i;
1757
# endif
1758
44
        (*stream) (in, out, j, key, ctx->Yi.c);
1759
44
        ctr += (unsigned int)j;
1760
44
        if (is_endian.little)
1761
# ifdef BSWAP4
1762
            ctx->Yi.d[3] = BSWAP4(ctr);
1763
# else
1764
44
            PUTU32(ctx->Yi.c + 12, ctr);
1765
0
# endif
1766
0
        else
1767
0
            ctx->Yi.d[3] = ctr;
1768
44
        out += i;
1769
44
        in += i;
1770
44
        len -= i;
1771
44
    }
1772
59
    if (len) {
1773
44
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1774
44
        ++ctr;
1775
44
        if (is_endian.little)
1776
# ifdef BSWAP4
1777
            ctx->Yi.d[3] = BSWAP4(ctr);
1778
# else
1779
44
            PUTU32(ctx->Yi.c + 12, ctr);
1780
0
# endif
1781
0
        else
1782
0
            ctx->Yi.d[3] = ctr;
1783
357
        while (len--) {
1784
313
# if defined(GHASH)
1785
313
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1786
# else
1787
            u8 c = in[n];
1788
            ctx->Xi.c[mres++] ^= c;
1789
            out[n] = c ^ ctx->EKi.c[n];
1790
# endif
1791
313
            ++n;
1792
313
        }
1793
44
    }
1794
1795
59
    ctx->mres = mres;
1796
59
    return 0;
1797
59
#endif
1798
59
}
1799
1800
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1801
                         size_t len)
1802
298
{
1803
298
    const union {
1804
298
        long one;
1805
298
        char little;
1806
298
    } is_endian = { 1 };
1807
298
    u64 alen = ctx->len.u[0] << 3;
1808
298
    u64 clen = ctx->len.u[1] << 3;
1809
298
#ifdef GCM_FUNCREF_4BIT
1810
298
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1811
298
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1812
298
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1813
298
                         const u8 *inp, size_t len) = ctx->ghash;
1814
298
# endif
1815
298
#endif
1816
1817
298
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
298
    u128 bitlen;
1819
298
    unsigned int mres = ctx->mres;
1820
1821
298
    if (mres) {
1822
167
        unsigned blocks = (mres + 15) & -16;
1823
1824
167
        memset(ctx->Xn + mres, 0, blocks - mres);
1825
167
        mres = blocks;
1826
167
        if (mres == sizeof(ctx->Xn)) {
1827
0
            GHASH(ctx, ctx->Xn, mres);
1828
0
            mres = 0;
1829
0
        }
1830
167
    } else if (ctx->ares) {
1831
0
        GCM_MUL(ctx);
1832
0
    }
1833
#else
1834
    if (ctx->mres || ctx->ares)
1835
        GCM_MUL(ctx);
1836
#endif
1837
1838
298
    if (is_endian.little) {
1839
#ifdef BSWAP8
1840
        alen = BSWAP8(alen);
1841
        clen = BSWAP8(clen);
1842
#else
1843
298
        u8 *p = ctx->len.c;
1844
1845
298
        ctx->len.u[0] = alen;
1846
298
        ctx->len.u[1] = clen;
1847
1848
298
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1849
298
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1850
298
#endif
1851
298
    }
1852
1853
298
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1854
298
    bitlen.hi = alen;
1855
298
    bitlen.lo = clen;
1856
298
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1857
298
    mres += sizeof(bitlen);
1858
298
    GHASH(ctx, ctx->Xn, mres);
1859
#else
1860
    ctx->Xi.u[0] ^= alen;
1861
    ctx->Xi.u[1] ^= clen;
1862
    GCM_MUL(ctx);
1863
#endif
1864
1865
298
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1866
298
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1867
1868
298
    if (tag && len <= sizeof(ctx->Xi))
1869
43
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1870
255
    else
1871
255
        return -1;
1872
298
}
1873
1874
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1875
255
{
1876
255
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1877
255
    memcpy(tag, ctx->Xi.c,
1878
255
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1879
255
}
1880
1881
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1882
0
{
1883
0
    GCM128_CONTEXT *ret;
1884
1885
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1886
0
        CRYPTO_gcm128_init(ret, key, block);
1887
1888
0
    return ret;
1889
0
}
1890
1891
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1892
0
{
1893
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1894
0
}