Coverage Report

Created: 2023-09-25 06:41

/src/openssl111/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/crypto.h>
11
#include "modes_local.h"
12
#include <string.h>
13
14
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
15
typedef size_t size_t_aX __attribute((__aligned__(1)));
16
#else
17
typedef size_t size_t_aX;
18
#endif
19
20
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
21
/* redefine, because alignment is ensured */
22
# undef  GETU32
23
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
24
# undef  PUTU32
25
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
26
#endif
27
28
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
29
0
#define REDUCE1BIT(V)   do { \
30
0
        if (sizeof(size_t)==8) { \
31
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
32
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
33
0
                V.hi  = (V.hi>>1 )^T; \
34
0
        } \
35
0
        else { \
36
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
37
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
38
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
39
0
        } \
40
0
} while(0)
41
42
/*-
43
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
44
 * never be set to 8. 8 is effectively reserved for testing purposes.
45
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
46
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
47
 * whole spectrum of possible table driven implementations. Why? In
48
 * non-"Shoup's" case memory access pattern is segmented in such manner,
49
 * that it's trivial to see that cache timing information can reveal
50
 * fair portion of intermediate hash value. Given that ciphertext is
51
 * always available to attacker, it's possible for him to attempt to
52
 * deduce secret parameter H and if successful, tamper with messages
53
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
54
 * not as trivial, but there is no reason to believe that it's resistant
55
 * to cache-timing attack. And the thing about "8-bit" implementation is
56
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
57
 * key + 1KB shared. Well, on pros side it should be twice as fast as
58
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
59
 * was observed to run ~75% faster, closer to 100% for commercial
60
 * compilers... Yet "4-bit" procedure is preferred, because it's
61
 * believed to provide better security-performance balance and adequate
62
 * all-round performance. "All-round" refers to things like:
63
 *
64
 * - shorter setup time effectively improves overall timing for
65
 *   handling short messages;
66
 * - larger table allocation can become unbearable because of VM
67
 *   subsystem penalties (for example on Windows large enough free
68
 *   results in VM working set trimming, meaning that consequent
69
 *   malloc would immediately incur working set expansion);
70
 * - larger table has larger cache footprint, which can affect
71
 *   performance of other code paths (not necessarily even from same
72
 *   thread in Hyper-Threading world);
73
 *
74
 * Value of 1 is not appropriate for performance reasons.
75
 */
76
#if     TABLE_BITS==8
77
78
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
79
{
80
    int i, j;
81
    u128 V;
82
83
    Htable[0].hi = 0;
84
    Htable[0].lo = 0;
85
    V.hi = H[0];
86
    V.lo = H[1];
87
88
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
89
        REDUCE1BIT(V);
90
        Htable[i] = V;
91
    }
92
93
    for (i = 2; i < 256; i <<= 1) {
94
        u128 *Hi = Htable + i, H0 = *Hi;
95
        for (j = 1; j < i; ++j) {
96
            Hi[j].hi = H0.hi ^ Htable[j].hi;
97
            Hi[j].lo = H0.lo ^ Htable[j].lo;
98
        }
99
    }
100
}
101
102
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
103
{
104
    u128 Z = { 0, 0 };
105
    const u8 *xi = (const u8 *)Xi + 15;
106
    size_t rem, n = *xi;
107
    const union {
108
        long one;
109
        char little;
110
    } is_endian = { 1 };
111
    static const size_t rem_8bit[256] = {
112
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
113
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
114
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
115
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
116
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
117
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
118
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
119
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
120
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
121
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
122
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
123
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
124
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
125
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
126
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
127
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
128
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
129
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
130
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
131
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
132
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
133
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
134
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
135
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
136
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
137
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
138
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
139
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
140
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
141
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
142
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
143
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
144
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
145
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
146
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
147
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
148
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
149
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
150
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
151
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
152
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
153
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
154
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
155
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
156
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
157
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
158
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
159
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
160
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
161
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
162
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
163
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
164
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
165
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
166
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
167
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
168
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
169
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
170
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
171
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
172
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
173
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
174
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
175
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
176
    };
177
178
    while (1) {
179
        Z.hi ^= Htable[n].hi;
180
        Z.lo ^= Htable[n].lo;
181
182
        if ((u8 *)Xi == xi)
183
            break;
184
185
        n = *(--xi);
186
187
        rem = (size_t)Z.lo & 0xff;
188
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
189
        Z.hi = (Z.hi >> 8);
190
        if (sizeof(size_t) == 8)
191
            Z.hi ^= rem_8bit[rem];
192
        else
193
            Z.hi ^= (u64)rem_8bit[rem] << 32;
194
    }
195
196
    if (is_endian.little) {
197
# ifdef BSWAP8
198
        Xi[0] = BSWAP8(Z.hi);
199
        Xi[1] = BSWAP8(Z.lo);
200
# else
201
        u8 *p = (u8 *)Xi;
202
        u32 v;
203
        v = (u32)(Z.hi >> 32);
204
        PUTU32(p, v);
205
        v = (u32)(Z.hi);
206
        PUTU32(p + 4, v);
207
        v = (u32)(Z.lo >> 32);
208
        PUTU32(p + 8, v);
209
        v = (u32)(Z.lo);
210
        PUTU32(p + 12, v);
211
# endif
212
    } else {
213
        Xi[0] = Z.hi;
214
        Xi[1] = Z.lo;
215
    }
216
}
217
218
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
219
220
#elif   TABLE_BITS==4
221
222
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
223
0
{
224
0
    u128 V;
225
# if defined(OPENSSL_SMALL_FOOTPRINT)
226
    int i;
227
# endif
228
229
0
    Htable[0].hi = 0;
230
0
    Htable[0].lo = 0;
231
0
    V.hi = H[0];
232
0
    V.lo = H[1];
233
234
# if defined(OPENSSL_SMALL_FOOTPRINT)
235
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
236
        REDUCE1BIT(V);
237
        Htable[i] = V;
238
    }
239
240
    for (i = 2; i < 16; i <<= 1) {
241
        u128 *Hi = Htable + i;
242
        int j;
243
        for (V = *Hi, j = 1; j < i; ++j) {
244
            Hi[j].hi = V.hi ^ Htable[j].hi;
245
            Hi[j].lo = V.lo ^ Htable[j].lo;
246
        }
247
    }
248
# else
249
0
    Htable[8] = V;
250
0
    REDUCE1BIT(V);
251
0
    Htable[4] = V;
252
0
    REDUCE1BIT(V);
253
0
    Htable[2] = V;
254
0
    REDUCE1BIT(V);
255
0
    Htable[1] = V;
256
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
257
0
    V = Htable[4];
258
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
259
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
260
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
261
0
    V = Htable[8];
262
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
263
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
264
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
265
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
266
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
267
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
268
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
269
0
# endif
270
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
271
    /*
272
     * ARM assembler expects specific dword order in Htable.
273
     */
274
    {
275
        int j;
276
        const union {
277
            long one;
278
            char little;
279
        } is_endian = { 1 };
280
281
        if (is_endian.little)
282
            for (j = 0; j < 16; ++j) {
283
                V = Htable[j];
284
                Htable[j].hi = V.lo;
285
                Htable[j].lo = V.hi;
286
        } else
287
            for (j = 0; j < 16; ++j) {
288
                V = Htable[j];
289
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
290
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
291
            }
292
    }
293
# endif
294
0
}
295
296
# ifndef GHASH_ASM
297
static const size_t rem_4bit[16] = {
298
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
299
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
300
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
301
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
302
};
303
304
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
305
{
306
    u128 Z;
307
    int cnt = 15;
308
    size_t rem, nlo, nhi;
309
    const union {
310
        long one;
311
        char little;
312
    } is_endian = { 1 };
313
314
    nlo = ((const u8 *)Xi)[15];
315
    nhi = nlo >> 4;
316
    nlo &= 0xf;
317
318
    Z.hi = Htable[nlo].hi;
319
    Z.lo = Htable[nlo].lo;
320
321
    while (1) {
322
        rem = (size_t)Z.lo & 0xf;
323
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
324
        Z.hi = (Z.hi >> 4);
325
        if (sizeof(size_t) == 8)
326
            Z.hi ^= rem_4bit[rem];
327
        else
328
            Z.hi ^= (u64)rem_4bit[rem] << 32;
329
330
        Z.hi ^= Htable[nhi].hi;
331
        Z.lo ^= Htable[nhi].lo;
332
333
        if (--cnt < 0)
334
            break;
335
336
        nlo = ((const u8 *)Xi)[cnt];
337
        nhi = nlo >> 4;
338
        nlo &= 0xf;
339
340
        rem = (size_t)Z.lo & 0xf;
341
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
342
        Z.hi = (Z.hi >> 4);
343
        if (sizeof(size_t) == 8)
344
            Z.hi ^= rem_4bit[rem];
345
        else
346
            Z.hi ^= (u64)rem_4bit[rem] << 32;
347
348
        Z.hi ^= Htable[nlo].hi;
349
        Z.lo ^= Htable[nlo].lo;
350
    }
351
352
    if (is_endian.little) {
353
#  ifdef BSWAP8
354
        Xi[0] = BSWAP8(Z.hi);
355
        Xi[1] = BSWAP8(Z.lo);
356
#  else
357
        u8 *p = (u8 *)Xi;
358
        u32 v;
359
        v = (u32)(Z.hi >> 32);
360
        PUTU32(p, v);
361
        v = (u32)(Z.hi);
362
        PUTU32(p + 4, v);
363
        v = (u32)(Z.lo >> 32);
364
        PUTU32(p + 8, v);
365
        v = (u32)(Z.lo);
366
        PUTU32(p + 12, v);
367
#  endif
368
    } else {
369
        Xi[0] = Z.hi;
370
        Xi[1] = Z.lo;
371
    }
372
}
373
374
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
375
/*
376
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
377
 * details... Compiler-generated code doesn't seem to give any
378
 * performance improvement, at least not on x86[_64]. It's here
379
 * mostly as reference and a placeholder for possible future
380
 * non-trivial optimization[s]...
381
 */
382
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
383
                           const u8 *inp, size_t len)
384
{
385
    u128 Z;
386
    int cnt;
387
    size_t rem, nlo, nhi;
388
    const union {
389
        long one;
390
        char little;
391
    } is_endian = { 1 };
392
393
#   if 1
394
    do {
395
        cnt = 15;
396
        nlo = ((const u8 *)Xi)[15];
397
        nlo ^= inp[15];
398
        nhi = nlo >> 4;
399
        nlo &= 0xf;
400
401
        Z.hi = Htable[nlo].hi;
402
        Z.lo = Htable[nlo].lo;
403
404
        while (1) {
405
            rem = (size_t)Z.lo & 0xf;
406
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
407
            Z.hi = (Z.hi >> 4);
408
            if (sizeof(size_t) == 8)
409
                Z.hi ^= rem_4bit[rem];
410
            else
411
                Z.hi ^= (u64)rem_4bit[rem] << 32;
412
413
            Z.hi ^= Htable[nhi].hi;
414
            Z.lo ^= Htable[nhi].lo;
415
416
            if (--cnt < 0)
417
                break;
418
419
            nlo = ((const u8 *)Xi)[cnt];
420
            nlo ^= inp[cnt];
421
            nhi = nlo >> 4;
422
            nlo &= 0xf;
423
424
            rem = (size_t)Z.lo & 0xf;
425
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
426
            Z.hi = (Z.hi >> 4);
427
            if (sizeof(size_t) == 8)
428
                Z.hi ^= rem_4bit[rem];
429
            else
430
                Z.hi ^= (u64)rem_4bit[rem] << 32;
431
432
            Z.hi ^= Htable[nlo].hi;
433
            Z.lo ^= Htable[nlo].lo;
434
        }
435
#   else
436
    /*
437
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
438
     * [should] give ~50% improvement... One could have PACK()-ed
439
     * the rem_8bit even here, but the priority is to minimize
440
     * cache footprint...
441
     */
442
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
443
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
444
    static const unsigned short rem_8bit[256] = {
445
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
446
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
447
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
448
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
449
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
450
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
451
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
452
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
453
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
454
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
455
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
456
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
457
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
458
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
459
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
460
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
461
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
462
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
463
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
464
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
465
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
466
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
467
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
468
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
469
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
470
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
471
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
472
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
473
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
474
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
475
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
476
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
477
    };
478
    /*
479
     * This pre-processing phase slows down procedure by approximately
480
     * same time as it makes each loop spin faster. In other words
481
     * single block performance is approximately same as straightforward
482
     * "4-bit" implementation, and then it goes only faster...
483
     */
484
    for (cnt = 0; cnt < 16; ++cnt) {
485
        Z.hi = Htable[cnt].hi;
486
        Z.lo = Htable[cnt].lo;
487
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
488
        Hshr4[cnt].hi = (Z.hi >> 4);
489
        Hshl4[cnt] = (u8)(Z.lo << 4);
490
    }
491
492
    do {
493
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
494
            nlo = ((const u8 *)Xi)[cnt];
495
            nlo ^= inp[cnt];
496
            nhi = nlo >> 4;
497
            nlo &= 0xf;
498
499
            Z.hi ^= Htable[nlo].hi;
500
            Z.lo ^= Htable[nlo].lo;
501
502
            rem = (size_t)Z.lo & 0xff;
503
504
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
505
            Z.hi = (Z.hi >> 8);
506
507
            Z.hi ^= Hshr4[nhi].hi;
508
            Z.lo ^= Hshr4[nhi].lo;
509
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
510
        }
511
512
        nlo = ((const u8 *)Xi)[0];
513
        nlo ^= inp[0];
514
        nhi = nlo >> 4;
515
        nlo &= 0xf;
516
517
        Z.hi ^= Htable[nlo].hi;
518
        Z.lo ^= Htable[nlo].lo;
519
520
        rem = (size_t)Z.lo & 0xf;
521
522
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
523
        Z.hi = (Z.hi >> 4);
524
525
        Z.hi ^= Htable[nhi].hi;
526
        Z.lo ^= Htable[nhi].lo;
527
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
528
#   endif
529
530
        if (is_endian.little) {
531
#   ifdef BSWAP8
532
            Xi[0] = BSWAP8(Z.hi);
533
            Xi[1] = BSWAP8(Z.lo);
534
#   else
535
            u8 *p = (u8 *)Xi;
536
            u32 v;
537
            v = (u32)(Z.hi >> 32);
538
            PUTU32(p, v);
539
            v = (u32)(Z.hi);
540
            PUTU32(p + 4, v);
541
            v = (u32)(Z.lo >> 32);
542
            PUTU32(p + 8, v);
543
            v = (u32)(Z.lo);
544
            PUTU32(p + 12, v);
545
#   endif
546
        } else {
547
            Xi[0] = Z.hi;
548
            Xi[1] = Z.lo;
549
        }
550
    } while (inp += 16, len -= 16);
551
}
552
#  endif
553
# else
554
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
555
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
556
                    size_t len);
557
# endif
558
559
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
560
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
561
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
562
/*
563
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
564
 * effect. In other words idea is to hash data while it's still in L1 cache
565
 * after encryption pass...
566
 */
567
338
#  define GHASH_CHUNK       (3*1024)
568
# endif
569
570
#else                           /* TABLE_BITS */
571
572
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
573
{
574
    u128 V, Z = { 0, 0 };
575
    long X;
576
    int i, j;
577
    const long *xi = (const long *)Xi;
578
    const union {
579
        long one;
580
        char little;
581
    } is_endian = { 1 };
582
583
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
584
    V.lo = H[1];
585
586
    for (j = 0; j < 16 / sizeof(long); ++j) {
587
        if (is_endian.little) {
588
            if (sizeof(long) == 8) {
589
# ifdef BSWAP8
590
                X = (long)(BSWAP8(xi[j]));
591
# else
592
                const u8 *p = (const u8 *)(xi + j);
593
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
594
# endif
595
            } else {
596
                const u8 *p = (const u8 *)(xi + j);
597
                X = (long)GETU32(p);
598
            }
599
        } else
600
            X = xi[j];
601
602
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
603
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
604
            Z.hi ^= V.hi & M;
605
            Z.lo ^= V.lo & M;
606
607
            REDUCE1BIT(V);
608
        }
609
    }
610
611
    if (is_endian.little) {
612
# ifdef BSWAP8
613
        Xi[0] = BSWAP8(Z.hi);
614
        Xi[1] = BSWAP8(Z.lo);
615
# else
616
        u8 *p = (u8 *)Xi;
617
        u32 v;
618
        v = (u32)(Z.hi >> 32);
619
        PUTU32(p, v);
620
        v = (u32)(Z.hi);
621
        PUTU32(p + 4, v);
622
        v = (u32)(Z.lo >> 32);
623
        PUTU32(p + 8, v);
624
        v = (u32)(Z.lo);
625
        PUTU32(p + 12, v);
626
# endif
627
    } else {
628
        Xi[0] = Z.hi;
629
        Xi[1] = Z.lo;
630
    }
631
}
632
633
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
634
635
#endif
636
637
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
638
# if    !defined(I386_ONLY) && \
639
        (defined(__i386)        || defined(__i386__)    || \
640
         defined(__x86_64)      || defined(__x86_64__)  || \
641
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
642
#  define GHASH_ASM_X86_OR_64
643
#  define GCM_FUNCREF_4BIT
644
extern unsigned int OPENSSL_ia32cap_P[];
645
646
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
647
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
648
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
649
                     size_t len);
650
651
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
652
#   define gcm_init_avx   gcm_init_clmul
653
#   define gcm_gmult_avx  gcm_gmult_clmul
654
#   define gcm_ghash_avx  gcm_ghash_clmul
655
#  else
656
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
657
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
658
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
659
                   size_t len);
660
#  endif
661
662
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
663
#   define GHASH_ASM_X86
664
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
665
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
666
                        size_t len);
667
668
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                        size_t len);
671
#  endif
672
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
673
#  include "arm_arch.h"
674
#  if __ARM_MAX_ARCH__>=7
675
#   define GHASH_ASM_ARM
676
#   define GCM_FUNCREF_4BIT
677
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
678
#   if defined(__arm__) || defined(__arm)
679
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
680
#   endif
681
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
682
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
683
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
684
                    size_t len);
685
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
686
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
687
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
688
                  size_t len);
689
#  endif
690
# elif defined(__sparc__) || defined(__sparc)
691
#  include "sparc_arch.h"
692
#  define GHASH_ASM_SPARC
693
#  define GCM_FUNCREF_4BIT
694
extern unsigned int OPENSSL_sparcv9cap_P[];
695
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
696
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
697
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
698
                    size_t len);
699
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
700
#  include "ppc_arch.h"
701
#  define GHASH_ASM_PPC
702
#  define GCM_FUNCREF_4BIT
703
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
704
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
705
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
706
                  size_t len);
707
# endif
708
#endif
709
710
#ifdef GCM_FUNCREF_4BIT
711
# undef  GCM_MUL
712
41
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713
# ifdef GHASH
714
#  undef  GHASH
715
548
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716
# endif
717
#endif
718
719
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
720
213
{
721
213
    const union {
722
213
        long one;
723
213
        char little;
724
213
    } is_endian = { 1 };
725
726
213
    memset(ctx, 0, sizeof(*ctx));
727
213
    ctx->block = block;
728
213
    ctx->key = key;
729
730
213
    (*block) (ctx->H.c, ctx->H.c, key);
731
732
213
    if (is_endian.little) {
733
        /* H is stored in host byte order */
734
#ifdef BSWAP8
735
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
736
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
737
#else
738
213
        u8 *p = ctx->H.c;
739
213
        u64 hi, lo;
740
213
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
741
213
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
742
213
        ctx->H.u[0] = hi;
743
213
        ctx->H.u[1] = lo;
744
213
#endif
745
213
    }
746
#if     TABLE_BITS==8
747
    gcm_init_8bit(ctx->Htable, ctx->H.u);
748
#elif   TABLE_BITS==4
749
213
# if    defined(GHASH)
750
213
#  define CTX__GHASH(f) (ctx->ghash = (f))
751
# else
752
#  define CTX__GHASH(f) (ctx->ghash = NULL)
753
# endif
754
213
# if    defined(GHASH_ASM_X86_OR_64)
755
213
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
756
213
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
757
213
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
758
213
            gcm_init_avx(ctx->Htable, ctx->H.u);
759
213
            ctx->gmult = gcm_gmult_avx;
760
213
            CTX__GHASH(gcm_ghash_avx);
761
213
        } else {
762
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
763
0
            ctx->gmult = gcm_gmult_clmul;
764
0
            CTX__GHASH(gcm_ghash_clmul);
765
0
        }
766
213
        return;
767
213
    }
768
0
#  endif
769
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
770
#  if   defined(GHASH_ASM_X86)  /* x86 only */
771
#   if  defined(OPENSSL_IA32_SSE2)
772
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
773
#   else
774
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
775
#   endif
776
        ctx->gmult = gcm_gmult_4bit_mmx;
777
        CTX__GHASH(gcm_ghash_4bit_mmx);
778
    } else {
779
        ctx->gmult = gcm_gmult_4bit_x86;
780
        CTX__GHASH(gcm_ghash_4bit_x86);
781
    }
782
#  else
783
0
    ctx->gmult = gcm_gmult_4bit;
784
0
    CTX__GHASH(gcm_ghash_4bit);
785
0
#  endif
786
# elif  defined(GHASH_ASM_ARM)
787
#  ifdef PMULL_CAPABLE
788
    if (PMULL_CAPABLE) {
789
        gcm_init_v8(ctx->Htable, ctx->H.u);
790
        ctx->gmult = gcm_gmult_v8;
791
        CTX__GHASH(gcm_ghash_v8);
792
    } else
793
#  endif
794
#  ifdef NEON_CAPABLE
795
    if (NEON_CAPABLE) {
796
        gcm_init_neon(ctx->Htable, ctx->H.u);
797
        ctx->gmult = gcm_gmult_neon;
798
        CTX__GHASH(gcm_ghash_neon);
799
    } else
800
#  endif
801
    {
802
        gcm_init_4bit(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_4bit;
804
        CTX__GHASH(gcm_ghash_4bit);
805
    }
806
# elif  defined(GHASH_ASM_SPARC)
807
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
808
        gcm_init_vis3(ctx->Htable, ctx->H.u);
809
        ctx->gmult = gcm_gmult_vis3;
810
        CTX__GHASH(gcm_ghash_vis3);
811
    } else {
812
        gcm_init_4bit(ctx->Htable, ctx->H.u);
813
        ctx->gmult = gcm_gmult_4bit;
814
        CTX__GHASH(gcm_ghash_4bit);
815
    }
816
# elif  defined(GHASH_ASM_PPC)
817
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
818
        gcm_init_p8(ctx->Htable, ctx->H.u);
819
        ctx->gmult = gcm_gmult_p8;
820
        CTX__GHASH(gcm_ghash_p8);
821
    } else {
822
        gcm_init_4bit(ctx->Htable, ctx->H.u);
823
        ctx->gmult = gcm_gmult_4bit;
824
        CTX__GHASH(gcm_ghash_4bit);
825
    }
826
# else
827
    gcm_init_4bit(ctx->Htable, ctx->H.u);
828
# endif
829
0
# undef CTX__GHASH
830
0
#endif
831
0
}
832
833
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
834
                         size_t len)
835
245
{
836
245
    const union {
837
245
        long one;
838
245
        char little;
839
245
    } is_endian = { 1 };
840
245
    unsigned int ctr;
841
245
#ifdef GCM_FUNCREF_4BIT
842
245
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
843
245
#endif
844
845
245
    ctx->len.u[0] = 0;          /* AAD length */
846
245
    ctx->len.u[1] = 0;          /* message length */
847
245
    ctx->ares = 0;
848
245
    ctx->mres = 0;
849
850
245
    if (len == 12) {
851
245
        memcpy(ctx->Yi.c, iv, 12);
852
245
        ctx->Yi.c[12] = 0;
853
245
        ctx->Yi.c[13] = 0;
854
245
        ctx->Yi.c[14] = 0;
855
245
        ctx->Yi.c[15] = 1;
856
245
        ctr = 1;
857
245
    } else {
858
0
        size_t i;
859
0
        u64 len0 = len;
860
861
        /* Borrow ctx->Xi to calculate initial Yi */
862
0
        ctx->Xi.u[0] = 0;
863
0
        ctx->Xi.u[1] = 0;
864
865
0
        while (len >= 16) {
866
0
            for (i = 0; i < 16; ++i)
867
0
                ctx->Xi.c[i] ^= iv[i];
868
0
            GCM_MUL(ctx);
869
0
            iv += 16;
870
0
            len -= 16;
871
0
        }
872
0
        if (len) {
873
0
            for (i = 0; i < len; ++i)
874
0
                ctx->Xi.c[i] ^= iv[i];
875
0
            GCM_MUL(ctx);
876
0
        }
877
0
        len0 <<= 3;
878
0
        if (is_endian.little) {
879
#ifdef BSWAP8
880
            ctx->Xi.u[1] ^= BSWAP8(len0);
881
#else
882
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
883
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
884
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
885
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
886
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
887
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
888
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
889
0
            ctx->Xi.c[15] ^= (u8)(len0);
890
0
#endif
891
0
        } else {
892
0
            ctx->Xi.u[1] ^= len0;
893
0
        }
894
895
0
        GCM_MUL(ctx);
896
897
0
        if (is_endian.little)
898
#ifdef BSWAP4
899
            ctr = BSWAP4(ctx->Xi.d[3]);
900
#else
901
0
            ctr = GETU32(ctx->Xi.c + 12);
902
0
#endif
903
0
        else
904
0
            ctr = ctx->Xi.d[3];
905
906
        /* Copy borrowed Xi to Yi */
907
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
908
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
909
0
    }
910
911
245
    ctx->Xi.u[0] = 0;
912
245
    ctx->Xi.u[1] = 0;
913
914
245
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
915
245
    ++ctr;
916
245
    if (is_endian.little)
917
#ifdef BSWAP4
918
        ctx->Yi.d[3] = BSWAP4(ctr);
919
#else
920
245
        PUTU32(ctx->Yi.c + 12, ctr);
921
0
#endif
922
0
    else
923
0
        ctx->Yi.d[3] = ctr;
924
245
}
925
926
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
927
                      size_t len)
928
245
{
929
245
    size_t i;
930
245
    unsigned int n;
931
245
    u64 alen = ctx->len.u[0];
932
245
#ifdef GCM_FUNCREF_4BIT
933
245
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
934
245
# ifdef GHASH
935
245
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
936
245
                         const u8 *inp, size_t len) = ctx->ghash;
937
245
# endif
938
245
#endif
939
940
245
    if (ctx->len.u[1])
941
0
        return -2;
942
943
245
    alen += len;
944
245
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
945
0
        return -1;
946
245
    ctx->len.u[0] = alen;
947
948
245
    n = ctx->ares;
949
245
    if (n) {
950
0
        while (n && len) {
951
0
            ctx->Xi.c[n] ^= *(aad++);
952
0
            --len;
953
0
            n = (n + 1) % 16;
954
0
        }
955
0
        if (n == 0)
956
0
            GCM_MUL(ctx);
957
0
        else {
958
0
            ctx->ares = n;
959
0
            return 0;
960
0
        }
961
0
    }
962
245
#ifdef GHASH
963
245
    if ((i = (len & (size_t)-16))) {
964
0
        GHASH(ctx, aad, i);
965
0
        aad += i;
966
0
        len -= i;
967
0
    }
968
#else
969
    while (len >= 16) {
970
        for (i = 0; i < 16; ++i)
971
            ctx->Xi.c[i] ^= aad[i];
972
        GCM_MUL(ctx);
973
        aad += 16;
974
        len -= 16;
975
    }
976
#endif
977
245
    if (len) {
978
245
        n = (unsigned int)len;
979
3.11k
        for (i = 0; i < len; ++i)
980
2.86k
            ctx->Xi.c[i] ^= aad[i];
981
245
    }
982
983
245
    ctx->ares = n;
984
245
    return 0;
985
245
}
986
987
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
988
                          const unsigned char *in, unsigned char *out,
989
                          size_t len)
990
110
{
991
110
    const union {
992
110
        long one;
993
110
        char little;
994
110
    } is_endian = { 1 };
995
110
    unsigned int n, ctr, mres;
996
110
    size_t i;
997
110
    u64 mlen = ctx->len.u[1];
998
110
    block128_f block = ctx->block;
999
110
    void *key = ctx->key;
1000
110
#ifdef GCM_FUNCREF_4BIT
1001
110
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1002
110
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1003
110
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1004
110
                         const u8 *inp, size_t len) = ctx->ghash;
1005
110
# endif
1006
110
#endif
1007
1008
110
    mlen += len;
1009
110
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1010
0
        return -1;
1011
110
    ctx->len.u[1] = mlen;
1012
1013
110
    mres = ctx->mres;
1014
1015
110
    if (ctx->ares) {
1016
        /* First call to encrypt finalizes GHASH(AAD) */
1017
110
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1018
110
        if (len == 0) {
1019
0
            GCM_MUL(ctx);
1020
0
            ctx->ares = 0;
1021
0
            return 0;
1022
0
        }
1023
110
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1024
110
        ctx->Xi.u[0] = 0;
1025
110
        ctx->Xi.u[1] = 0;
1026
110
        mres = sizeof(ctx->Xi);
1027
#else
1028
        GCM_MUL(ctx);
1029
#endif
1030
110
        ctx->ares = 0;
1031
110
    }
1032
1033
110
    if (is_endian.little)
1034
#ifdef BSWAP4
1035
        ctr = BSWAP4(ctx->Yi.d[3]);
1036
#else
1037
110
        ctr = GETU32(ctx->Yi.c + 12);
1038
0
#endif
1039
0
    else
1040
0
        ctr = ctx->Yi.d[3];
1041
1042
110
    n = mres % 16;
1043
110
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1044
110
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1045
110
        do {
1046
110
            if (n) {
1047
0
# if defined(GHASH)
1048
0
                while (n && len) {
1049
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1050
0
                    --len;
1051
0
                    n = (n + 1) % 16;
1052
0
                }
1053
0
                if (n == 0) {
1054
0
                    GHASH(ctx, ctx->Xn, mres);
1055
0
                    mres = 0;
1056
0
                } else {
1057
0
                    ctx->mres = mres;
1058
0
                    return 0;
1059
0
                }
1060
# else
1061
                while (n && len) {
1062
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1063
                    --len;
1064
                    n = (n + 1) % 16;
1065
                }
1066
                if (n == 0) {
1067
                    GCM_MUL(ctx);
1068
                    mres = 0;
1069
                } else {
1070
                    ctx->mres = n;
1071
                    return 0;
1072
                }
1073
# endif
1074
0
            }
1075
110
# if defined(STRICT_ALIGNMENT)
1076
110
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1077
0
                break;
1078
110
# endif
1079
110
# if defined(GHASH)
1080
110
            if (len >= 16 && mres) {
1081
57
                GHASH(ctx, ctx->Xn, mres);
1082
57
                mres = 0;
1083
57
            }
1084
110
#  if defined(GHASH_CHUNK)
1085
110
            while (len >= GHASH_CHUNK) {
1086
0
                size_t j = GHASH_CHUNK;
1087
1088
0
                while (j) {
1089
0
                    size_t_aX *out_t = (size_t_aX *)out;
1090
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1091
1092
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1093
0
                    ++ctr;
1094
0
                    if (is_endian.little)
1095
#   ifdef BSWAP4
1096
                        ctx->Yi.d[3] = BSWAP4(ctr);
1097
#   else
1098
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1099
0
#   endif
1100
0
                    else
1101
0
                        ctx->Yi.d[3] = ctr;
1102
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1103
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1104
0
                    out += 16;
1105
0
                    in += 16;
1106
0
                    j -= 16;
1107
0
                }
1108
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1109
0
                len -= GHASH_CHUNK;
1110
0
            }
1111
110
#  endif
1112
110
            if ((i = (len & (size_t)-16))) {
1113
57
                size_t j = i;
1114
1115
114
                while (len >= 16) {
1116
57
                    size_t_aX *out_t = (size_t_aX *)out;
1117
57
                    const size_t_aX *in_t = (const size_t_aX *)in;
1118
1119
57
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1120
57
                    ++ctr;
1121
57
                    if (is_endian.little)
1122
#  ifdef BSWAP4
1123
                        ctx->Yi.d[3] = BSWAP4(ctr);
1124
#  else
1125
57
                        PUTU32(ctx->Yi.c + 12, ctr);
1126
0
#  endif
1127
0
                    else
1128
0
                        ctx->Yi.d[3] = ctr;
1129
171
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1130
114
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1131
57
                    out += 16;
1132
57
                    in += 16;
1133
57
                    len -= 16;
1134
57
                }
1135
57
                GHASH(ctx, out - j, j);
1136
57
            }
1137
# else
1138
            while (len >= 16) {
1139
                size_t *out_t = (size_t *)out;
1140
                const size_t *in_t = (const size_t *)in;
1141
1142
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143
                ++ctr;
1144
                if (is_endian.little)
1145
#  ifdef BSWAP4
1146
                    ctx->Yi.d[3] = BSWAP4(ctr);
1147
#  else
1148
                    PUTU32(ctx->Yi.c + 12, ctr);
1149
#  endif
1150
                else
1151
                    ctx->Yi.d[3] = ctr;
1152
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1153
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1154
                GCM_MUL(ctx);
1155
                out += 16;
1156
                in += 16;
1157
                len -= 16;
1158
            }
1159
# endif
1160
110
            if (len) {
1161
53
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1162
53
                ++ctr;
1163
53
                if (is_endian.little)
1164
# ifdef BSWAP4
1165
                    ctx->Yi.d[3] = BSWAP4(ctr);
1166
# else
1167
53
                    PUTU32(ctx->Yi.c + 12, ctr);
1168
0
# endif
1169
0
                else
1170
0
                    ctx->Yi.d[3] = ctr;
1171
53
# if defined(GHASH)
1172
159
                while (len--) {
1173
106
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1174
106
                    ++n;
1175
106
                }
1176
# else
1177
                while (len--) {
1178
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1179
                    ++n;
1180
                }
1181
                mres = n;
1182
# endif
1183
53
            }
1184
1185
110
            ctx->mres = mres;
1186
110
            return 0;
1187
110
        } while (0);
1188
110
    }
1189
0
#endif
1190
0
    for (i = 0; i < len; ++i) {
1191
0
        if (n == 0) {
1192
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1193
0
            ++ctr;
1194
0
            if (is_endian.little)
1195
#ifdef BSWAP4
1196
                ctx->Yi.d[3] = BSWAP4(ctr);
1197
#else
1198
0
                PUTU32(ctx->Yi.c + 12, ctr);
1199
0
#endif
1200
0
            else
1201
0
                ctx->Yi.d[3] = ctr;
1202
0
        }
1203
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1204
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1205
0
        n = (n + 1) % 16;
1206
0
        if (mres == sizeof(ctx->Xn)) {
1207
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1208
0
            mres = 0;
1209
0
        }
1210
#else
1211
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1212
        mres = n = (n + 1) % 16;
1213
        if (n == 0)
1214
            GCM_MUL(ctx);
1215
#endif
1216
0
    }
1217
1218
0
    ctx->mres = mres;
1219
0
    return 0;
1220
110
}
1221
1222
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1223
                          const unsigned char *in, unsigned char *out,
1224
                          size_t len)
1225
81
{
1226
81
    const union {
1227
81
        long one;
1228
81
        char little;
1229
81
    } is_endian = { 1 };
1230
81
    unsigned int n, ctr, mres;
1231
81
    size_t i;
1232
81
    u64 mlen = ctx->len.u[1];
1233
81
    block128_f block = ctx->block;
1234
81
    void *key = ctx->key;
1235
81
#ifdef GCM_FUNCREF_4BIT
1236
81
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1237
81
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1238
81
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1239
81
                         const u8 *inp, size_t len) = ctx->ghash;
1240
81
# endif
1241
81
#endif
1242
1243
81
    mlen += len;
1244
81
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1245
0
        return -1;
1246
81
    ctx->len.u[1] = mlen;
1247
1248
81
    mres = ctx->mres;
1249
1250
81
    if (ctx->ares) {
1251
        /* First call to decrypt finalizes GHASH(AAD) */
1252
81
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1253
81
        if (len == 0) {
1254
36
            GCM_MUL(ctx);
1255
36
            ctx->ares = 0;
1256
36
            return 0;
1257
36
        }
1258
45
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1259
45
        ctx->Xi.u[0] = 0;
1260
45
        ctx->Xi.u[1] = 0;
1261
45
        mres = sizeof(ctx->Xi);
1262
#else
1263
        GCM_MUL(ctx);
1264
#endif
1265
45
        ctx->ares = 0;
1266
45
    }
1267
1268
45
    if (is_endian.little)
1269
#ifdef BSWAP4
1270
        ctr = BSWAP4(ctx->Yi.d[3]);
1271
#else
1272
45
        ctr = GETU32(ctx->Yi.c + 12);
1273
0
#endif
1274
0
    else
1275
0
        ctr = ctx->Yi.d[3];
1276
1277
45
    n = mres % 16;
1278
45
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1279
45
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1280
45
        do {
1281
45
            if (n) {
1282
0
# if defined(GHASH)
1283
0
                while (n && len) {
1284
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1285
0
                    --len;
1286
0
                    n = (n + 1) % 16;
1287
0
                }
1288
0
                if (n == 0) {
1289
0
                    GHASH(ctx, ctx->Xn, mres);
1290
0
                    mres = 0;
1291
0
                } else {
1292
0
                    ctx->mres = mres;
1293
0
                    return 0;
1294
0
                }
1295
# else
1296
                while (n && len) {
1297
                    u8 c = *(in++);
1298
                    *(out++) = c ^ ctx->EKi.c[n];
1299
                    ctx->Xi.c[n] ^= c;
1300
                    --len;
1301
                    n = (n + 1) % 16;
1302
                }
1303
                if (n == 0) {
1304
                    GCM_MUL(ctx);
1305
                    mres = 0;
1306
                } else {
1307
                    ctx->mres = n;
1308
                    return 0;
1309
                }
1310
# endif
1311
0
            }
1312
45
# if defined(STRICT_ALIGNMENT)
1313
45
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1314
0
                break;
1315
45
# endif
1316
45
# if defined(GHASH)
1317
45
            if (len >= 16 && mres) {
1318
39
                GHASH(ctx, ctx->Xn, mres);
1319
39
                mres = 0;
1320
39
            }
1321
45
#  if defined(GHASH_CHUNK)
1322
78
            while (len >= GHASH_CHUNK) {
1323
33
                size_t j = GHASH_CHUNK;
1324
1325
33
                GHASH(ctx, in, GHASH_CHUNK);
1326
6.36k
                while (j) {
1327
6.33k
                    size_t_aX *out_t = (size_t_aX *)out;
1328
6.33k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1329
1330
6.33k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1331
6.33k
                    ++ctr;
1332
6.33k
                    if (is_endian.little)
1333
#   ifdef BSWAP4
1334
                        ctx->Yi.d[3] = BSWAP4(ctr);
1335
#   else
1336
6.33k
                        PUTU32(ctx->Yi.c + 12, ctr);
1337
0
#   endif
1338
0
                    else
1339
0
                        ctx->Yi.d[3] = ctr;
1340
19.0k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1341
12.6k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1342
6.33k
                    out += 16;
1343
6.33k
                    in += 16;
1344
6.33k
                    j -= 16;
1345
6.33k
                }
1346
33
                len -= GHASH_CHUNK;
1347
33
            }
1348
45
#  endif
1349
45
            if ((i = (len & (size_t)-16))) {
1350
37
                GHASH(ctx, in, i);
1351
1.87k
                while (len >= 16) {
1352
1.83k
                    size_t_aX *out_t = (size_t_aX *)out;
1353
1.83k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1354
1355
1.83k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1356
1.83k
                    ++ctr;
1357
1.83k
                    if (is_endian.little)
1358
#  ifdef BSWAP4
1359
                        ctx->Yi.d[3] = BSWAP4(ctr);
1360
#  else
1361
1.83k
                        PUTU32(ctx->Yi.c + 12, ctr);
1362
0
#  endif
1363
0
                    else
1364
0
                        ctx->Yi.d[3] = ctr;
1365
5.49k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1366
3.66k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1367
1.83k
                    out += 16;
1368
1.83k
                    in += 16;
1369
1.83k
                    len -= 16;
1370
1.83k
                }
1371
37
            }
1372
# else
1373
            while (len >= 16) {
1374
                size_t *out_t = (size_t *)out;
1375
                const size_t *in_t = (const size_t *)in;
1376
1377
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1378
                ++ctr;
1379
                if (is_endian.little)
1380
#  ifdef BSWAP4
1381
                    ctx->Yi.d[3] = BSWAP4(ctr);
1382
#  else
1383
                    PUTU32(ctx->Yi.c + 12, ctr);
1384
#  endif
1385
                else
1386
                    ctx->Yi.d[3] = ctr;
1387
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1388
                    size_t c = in_t[i];
1389
                    out_t[i] = c ^ ctx->EKi.t[i];
1390
                    ctx->Xi.t[i] ^= c;
1391
                }
1392
                GCM_MUL(ctx);
1393
                out += 16;
1394
                in += 16;
1395
                len -= 16;
1396
            }
1397
# endif
1398
45
            if (len) {
1399
37
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1400
37
                ++ctr;
1401
37
                if (is_endian.little)
1402
# ifdef BSWAP4
1403
                    ctx->Yi.d[3] = BSWAP4(ctr);
1404
# else
1405
37
                    PUTU32(ctx->Yi.c + 12, ctr);
1406
0
# endif
1407
0
                else
1408
0
                    ctx->Yi.d[3] = ctr;
1409
37
# if defined(GHASH)
1410
346
                while (len--) {
1411
309
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1412
309
                    ++n;
1413
309
                }
1414
# else
1415
                while (len--) {
1416
                    u8 c = in[n];
1417
                    ctx->Xi.c[n] ^= c;
1418
                    out[n] = c ^ ctx->EKi.c[n];
1419
                    ++n;
1420
                }
1421
                mres = n;
1422
# endif
1423
37
            }
1424
1425
45
            ctx->mres = mres;
1426
45
            return 0;
1427
45
        } while (0);
1428
45
    }
1429
0
#endif
1430
0
    for (i = 0; i < len; ++i) {
1431
0
        u8 c;
1432
0
        if (n == 0) {
1433
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1434
0
            ++ctr;
1435
0
            if (is_endian.little)
1436
#ifdef BSWAP4
1437
                ctx->Yi.d[3] = BSWAP4(ctr);
1438
#else
1439
0
                PUTU32(ctx->Yi.c + 12, ctr);
1440
0
#endif
1441
0
            else
1442
0
                ctx->Yi.d[3] = ctr;
1443
0
        }
1444
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1445
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1446
0
        n = (n + 1) % 16;
1447
0
        if (mres == sizeof(ctx->Xn)) {
1448
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1449
0
            mres = 0;
1450
0
        }
1451
#else
1452
        c = in[i];
1453
        out[i] = c ^ ctx->EKi.c[n];
1454
        ctx->Xi.c[n] ^= c;
1455
        mres = n = (n + 1) % 16;
1456
        if (n == 0)
1457
            GCM_MUL(ctx);
1458
#endif
1459
0
    }
1460
1461
0
    ctx->mres = mres;
1462
0
    return 0;
1463
45
}
1464
1465
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1466
                                const unsigned char *in, unsigned char *out,
1467
                                size_t len, ctr128_f stream)
1468
40
{
1469
#if defined(OPENSSL_SMALL_FOOTPRINT)
1470
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1471
#else
1472
40
    const union {
1473
40
        long one;
1474
40
        char little;
1475
40
    } is_endian = { 1 };
1476
40
    unsigned int n, ctr, mres;
1477
40
    size_t i;
1478
40
    u64 mlen = ctx->len.u[1];
1479
40
    void *key = ctx->key;
1480
40
# ifdef GCM_FUNCREF_4BIT
1481
40
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1482
40
#  ifdef GHASH
1483
40
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1484
40
                         const u8 *inp, size_t len) = ctx->ghash;
1485
40
#  endif
1486
40
# endif
1487
1488
40
    mlen += len;
1489
40
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1490
0
        return -1;
1491
40
    ctx->len.u[1] = mlen;
1492
1493
40
    mres = ctx->mres;
1494
1495
40
    if (ctx->ares) {
1496
        /* First call to encrypt finalizes GHASH(AAD) */
1497
40
#if defined(GHASH)
1498
40
        if (len == 0) {
1499
0
            GCM_MUL(ctx);
1500
0
            ctx->ares = 0;
1501
0
            return 0;
1502
0
        }
1503
40
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1504
40
        ctx->Xi.u[0] = 0;
1505
40
        ctx->Xi.u[1] = 0;
1506
40
        mres = sizeof(ctx->Xi);
1507
#else
1508
        GCM_MUL(ctx);
1509
#endif
1510
40
        ctx->ares = 0;
1511
40
    }
1512
1513
40
    if (is_endian.little)
1514
# ifdef BSWAP4
1515
        ctr = BSWAP4(ctx->Yi.d[3]);
1516
# else
1517
40
        ctr = GETU32(ctx->Yi.c + 12);
1518
0
# endif
1519
0
    else
1520
0
        ctr = ctx->Yi.d[3];
1521
1522
40
    n = mres % 16;
1523
40
    if (n) {
1524
0
# if defined(GHASH)
1525
0
        while (n && len) {
1526
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1527
0
            --len;
1528
0
            n = (n + 1) % 16;
1529
0
        }
1530
0
        if (n == 0) {
1531
0
            GHASH(ctx, ctx->Xn, mres);
1532
0
            mres = 0;
1533
0
        } else {
1534
0
            ctx->mres = mres;
1535
0
            return 0;
1536
0
        }
1537
# else
1538
        while (n && len) {
1539
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1540
            --len;
1541
            n = (n + 1) % 16;
1542
        }
1543
        if (n == 0) {
1544
            GCM_MUL(ctx);
1545
            mres = 0;
1546
        } else {
1547
            ctx->mres = n;
1548
            return 0;
1549
        }
1550
# endif
1551
0
    }
1552
40
# if defined(GHASH)
1553
40
        if (len >= 16 && mres) {
1554
26
            GHASH(ctx, ctx->Xn, mres);
1555
26
            mres = 0;
1556
26
        }
1557
40
#  if defined(GHASH_CHUNK)
1558
40
    while (len >= GHASH_CHUNK) {
1559
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1560
0
        ctr += GHASH_CHUNK / 16;
1561
0
        if (is_endian.little)
1562
#   ifdef BSWAP4
1563
            ctx->Yi.d[3] = BSWAP4(ctr);
1564
#   else
1565
0
            PUTU32(ctx->Yi.c + 12, ctr);
1566
0
#   endif
1567
0
        else
1568
0
            ctx->Yi.d[3] = ctr;
1569
0
        GHASH(ctx, out, GHASH_CHUNK);
1570
0
        out += GHASH_CHUNK;
1571
0
        in += GHASH_CHUNK;
1572
0
        len -= GHASH_CHUNK;
1573
0
    }
1574
40
#  endif
1575
40
# endif
1576
40
    if ((i = (len & (size_t)-16))) {
1577
26
        size_t j = i / 16;
1578
1579
26
        (*stream) (in, out, j, key, ctx->Yi.c);
1580
26
        ctr += (unsigned int)j;
1581
26
        if (is_endian.little)
1582
# ifdef BSWAP4
1583
            ctx->Yi.d[3] = BSWAP4(ctr);
1584
# else
1585
26
            PUTU32(ctx->Yi.c + 12, ctr);
1586
0
# endif
1587
0
        else
1588
0
            ctx->Yi.d[3] = ctr;
1589
26
        in += i;
1590
26
        len -= i;
1591
26
# if defined(GHASH)
1592
26
        GHASH(ctx, out, i);
1593
26
        out += i;
1594
# else
1595
        while (j--) {
1596
            for (i = 0; i < 16; ++i)
1597
                ctx->Xi.c[i] ^= out[i];
1598
            GCM_MUL(ctx);
1599
            out += 16;
1600
        }
1601
# endif
1602
26
    }
1603
40
    if (len) {
1604
14
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1605
14
        ++ctr;
1606
14
        if (is_endian.little)
1607
# ifdef BSWAP4
1608
            ctx->Yi.d[3] = BSWAP4(ctr);
1609
# else
1610
14
            PUTU32(ctx->Yi.c + 12, ctr);
1611
0
# endif
1612
0
        else
1613
0
            ctx->Yi.d[3] = ctr;
1614
42
        while (len--) {
1615
28
# if defined(GHASH)
1616
28
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1617
# else
1618
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1619
# endif
1620
28
            ++n;
1621
28
        }
1622
14
    }
1623
1624
40
    ctx->mres = mres;
1625
40
    return 0;
1626
40
#endif
1627
40
}
1628
1629
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1630
                                const unsigned char *in, unsigned char *out,
1631
                                size_t len, ctr128_f stream)
1632
49
{
1633
#if defined(OPENSSL_SMALL_FOOTPRINT)
1634
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1635
#else
1636
49
    const union {
1637
49
        long one;
1638
49
        char little;
1639
49
    } is_endian = { 1 };
1640
49
    unsigned int n, ctr, mres;
1641
49
    size_t i;
1642
49
    u64 mlen = ctx->len.u[1];
1643
49
    void *key = ctx->key;
1644
49
# ifdef GCM_FUNCREF_4BIT
1645
49
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1646
49
#  ifdef GHASH
1647
49
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1648
49
                         const u8 *inp, size_t len) = ctx->ghash;
1649
49
#  endif
1650
49
# endif
1651
1652
49
    mlen += len;
1653
49
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1654
0
        return -1;
1655
49
    ctx->len.u[1] = mlen;
1656
1657
49
    mres = ctx->mres;
1658
1659
49
    if (ctx->ares) {
1660
        /* First call to decrypt finalizes GHASH(AAD) */
1661
14
# if defined(GHASH)
1662
14
        if (len == 0) {
1663
5
            GCM_MUL(ctx);
1664
5
            ctx->ares = 0;
1665
5
            return 0;
1666
5
        }
1667
9
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1668
9
        ctx->Xi.u[0] = 0;
1669
9
        ctx->Xi.u[1] = 0;
1670
9
        mres = sizeof(ctx->Xi);
1671
# else
1672
        GCM_MUL(ctx);
1673
# endif
1674
9
        ctx->ares = 0;
1675
9
    }
1676
1677
44
    if (is_endian.little)
1678
# ifdef BSWAP4
1679
        ctr = BSWAP4(ctx->Yi.d[3]);
1680
# else
1681
44
        ctr = GETU32(ctx->Yi.c + 12);
1682
0
# endif
1683
0
    else
1684
0
        ctr = ctx->Yi.d[3];
1685
1686
44
    n = mres % 16;
1687
44
    if (n) {
1688
0
# if defined(GHASH)
1689
0
        while (n && len) {
1690
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1691
0
            --len;
1692
0
            n = (n + 1) % 16;
1693
0
        }
1694
0
        if (n == 0) {
1695
0
            GHASH(ctx, ctx->Xn, mres);
1696
0
            mres = 0;
1697
0
        } else {
1698
0
            ctx->mres = mres;
1699
0
            return 0;
1700
0
        }
1701
# else
1702
        while (n && len) {
1703
            u8 c = *(in++);
1704
            *(out++) = c ^ ctx->EKi.c[n];
1705
            ctx->Xi.c[n] ^= c;
1706
            --len;
1707
            n = (n + 1) % 16;
1708
        }
1709
        if (n == 0) {
1710
            GCM_MUL(ctx);
1711
            mres = 0;
1712
        } else {
1713
            ctx->mres = n;
1714
            return 0;
1715
        }
1716
# endif
1717
0
    }
1718
44
# if defined(GHASH)
1719
44
    if (len >= 16 && mres) {
1720
0
        GHASH(ctx, ctx->Xn, mres);
1721
0
        mres = 0;
1722
0
    }
1723
44
#  if defined(GHASH_CHUNK)
1724
44
    while (len >= GHASH_CHUNK) {
1725
0
        GHASH(ctx, in, GHASH_CHUNK);
1726
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1727
0
        ctr += GHASH_CHUNK / 16;
1728
0
        if (is_endian.little)
1729
#   ifdef BSWAP4
1730
            ctx->Yi.d[3] = BSWAP4(ctr);
1731
#   else
1732
0
            PUTU32(ctx->Yi.c + 12, ctr);
1733
0
#   endif
1734
0
        else
1735
0
            ctx->Yi.d[3] = ctr;
1736
0
        out += GHASH_CHUNK;
1737
0
        in += GHASH_CHUNK;
1738
0
        len -= GHASH_CHUNK;
1739
0
    }
1740
44
#  endif
1741
44
# endif
1742
44
    if ((i = (len & (size_t)-16))) {
1743
28
        size_t j = i / 16;
1744
1745
28
# if defined(GHASH)
1746
28
        GHASH(ctx, in, i);
1747
# else
1748
        while (j--) {
1749
            size_t k;
1750
            for (k = 0; k < 16; ++k)
1751
                ctx->Xi.c[k] ^= in[k];
1752
            GCM_MUL(ctx);
1753
            in += 16;
1754
        }
1755
        j = i / 16;
1756
        in -= i;
1757
# endif
1758
28
        (*stream) (in, out, j, key, ctx->Yi.c);
1759
28
        ctr += (unsigned int)j;
1760
28
        if (is_endian.little)
1761
# ifdef BSWAP4
1762
            ctx->Yi.d[3] = BSWAP4(ctr);
1763
# else
1764
28
            PUTU32(ctx->Yi.c + 12, ctr);
1765
0
# endif
1766
0
        else
1767
0
            ctx->Yi.d[3] = ctr;
1768
28
        out += i;
1769
28
        in += i;
1770
28
        len -= i;
1771
28
    }
1772
44
    if (len) {
1773
35
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1774
35
        ++ctr;
1775
35
        if (is_endian.little)
1776
# ifdef BSWAP4
1777
            ctx->Yi.d[3] = BSWAP4(ctr);
1778
# else
1779
35
            PUTU32(ctx->Yi.c + 12, ctr);
1780
0
# endif
1781
0
        else
1782
0
            ctx->Yi.d[3] = ctr;
1783
334
        while (len--) {
1784
299
# if defined(GHASH)
1785
299
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1786
# else
1787
            u8 c = in[n];
1788
            ctx->Xi.c[mres++] ^= c;
1789
            out[n] = c ^ ctx->EKi.c[n];
1790
# endif
1791
299
            ++n;
1792
299
        }
1793
35
    }
1794
1795
44
    ctx->mres = mres;
1796
44
    return 0;
1797
44
#endif
1798
44
}
1799
1800
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1801
                         size_t len)
1802
245
{
1803
245
    const union {
1804
245
        long one;
1805
245
        char little;
1806
245
    } is_endian = { 1 };
1807
245
    u64 alen = ctx->len.u[0] << 3;
1808
245
    u64 clen = ctx->len.u[1] << 3;
1809
245
#ifdef GCM_FUNCREF_4BIT
1810
245
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1811
245
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1812
245
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1813
245
                         const u8 *inp, size_t len) = ctx->ghash;
1814
245
# endif
1815
245
#endif
1816
1817
245
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
245
    u128 bitlen;
1819
245
    unsigned int mres = ctx->mres;
1820
1821
245
    if (mres) {
1822
139
        unsigned blocks = (mres + 15) & -16;
1823
1824
139
        memset(ctx->Xn + mres, 0, blocks - mres);
1825
139
        mres = blocks;
1826
139
        if (mres == sizeof(ctx->Xn)) {
1827
0
            GHASH(ctx, ctx->Xn, mres);
1828
0
            mres = 0;
1829
0
        }
1830
139
    } else if (ctx->ares) {
1831
0
        GCM_MUL(ctx);
1832
0
    }
1833
#else
1834
    if (ctx->mres || ctx->ares)
1835
        GCM_MUL(ctx);
1836
#endif
1837
1838
245
    if (is_endian.little) {
1839
#ifdef BSWAP8
1840
        alen = BSWAP8(alen);
1841
        clen = BSWAP8(clen);
1842
#else
1843
245
        u8 *p = ctx->len.c;
1844
1845
245
        ctx->len.u[0] = alen;
1846
245
        ctx->len.u[1] = clen;
1847
1848
245
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1849
245
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1850
245
#endif
1851
245
    }
1852
1853
245
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1854
245
    bitlen.hi = alen;
1855
245
    bitlen.lo = clen;
1856
245
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1857
245
    mres += sizeof(bitlen);
1858
245
    GHASH(ctx, ctx->Xn, mres);
1859
#else
1860
    ctx->Xi.u[0] ^= alen;
1861
    ctx->Xi.u[1] ^= clen;
1862
    GCM_MUL(ctx);
1863
#endif
1864
1865
245
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1866
245
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1867
1868
245
    if (tag && len <= sizeof(ctx->Xi))
1869
40
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1870
205
    else
1871
205
        return -1;
1872
245
}
1873
1874
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1875
205
{
1876
205
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1877
205
    memcpy(tag, ctx->Xi.c,
1878
205
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1879
205
}
1880
1881
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1882
0
{
1883
0
    GCM128_CONTEXT *ret;
1884
1885
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1886
0
        CRYPTO_gcm128_init(ret, key, block);
1887
1888
0
    return ret;
1889
0
}
1890
1891
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1892
0
{
1893
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1894
0
}