Coverage Report

Created: 2024-05-15 07:14

/src/openssl/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "crypto/modes.h"
14
15
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
16
/* redefine, because alignment is ensured */
17
# undef  GETU32
18
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
19
# undef  PUTU32
20
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
21
#endif
22
23
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
24
0
#define REDUCE1BIT(V)   do { \
25
0
        if (sizeof(size_t)==8) { \
26
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
27
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
28
0
                V.hi  = (V.hi>>1 )^T; \
29
0
        } \
30
0
        else { \
31
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
32
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
33
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
34
0
        } \
35
0
} while(0)
36
37
/*-
38
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
39
 * never be set to 8. 8 is effectively reserved for testing purposes.
40
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
41
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
42
 * whole spectrum of possible table driven implementations. Why? In
43
 * non-"Shoup's" case memory access pattern is segmented in such manner,
44
 * that it's trivial to see that cache timing information can reveal
45
 * fair portion of intermediate hash value. Given that ciphertext is
46
 * always available to attacker, it's possible for him to attempt to
47
 * deduce secret parameter H and if successful, tamper with messages
48
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
49
 * not as trivial, but there is no reason to believe that it's resistant
50
 * to cache-timing attack. And the thing about "8-bit" implementation is
51
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
52
 * key + 1KB shared. Well, on pros side it should be twice as fast as
53
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
54
 * was observed to run ~75% faster, closer to 100% for commercial
55
 * compilers... Yet "4-bit" procedure is preferred, because it's
56
 * believed to provide better security-performance balance and adequate
57
 * all-round performance. "All-round" refers to things like:
58
 *
59
 * - shorter setup time effectively improves overall timing for
60
 *   handling short messages;
61
 * - larger table allocation can become unbearable because of VM
62
 *   subsystem penalties (for example on Windows large enough free
63
 *   results in VM working set trimming, meaning that consequent
64
 *   malloc would immediately incur working set expansion);
65
 * - larger table has larger cache footprint, which can affect
66
 *   performance of other code paths (not necessarily even from same
67
 *   thread in Hyper-Threading world);
68
 *
69
 * Value of 1 is not appropriate for performance reasons.
70
 */
71
#if     TABLE_BITS==8
72
73
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
74
{
75
    int i, j;
76
    u128 V;
77
78
    Htable[0].hi = 0;
79
    Htable[0].lo = 0;
80
    V.hi = H[0];
81
    V.lo = H[1];
82
83
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
84
        REDUCE1BIT(V);
85
        Htable[i] = V;
86
    }
87
88
    for (i = 2; i < 256; i <<= 1) {
89
        u128 *Hi = Htable + i, H0 = *Hi;
90
        for (j = 1; j < i; ++j) {
91
            Hi[j].hi = H0.hi ^ Htable[j].hi;
92
            Hi[j].lo = H0.lo ^ Htable[j].lo;
93
        }
94
    }
95
}
96
97
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
98
{
99
    u128 Z = { 0, 0 };
100
    const u8 *xi = (const u8 *)Xi + 15;
101
    size_t rem, n = *xi;
102
    const union {
103
        long one;
104
        char little;
105
    } is_endian = { 1 };
106
    static const size_t rem_8bit[256] = {
107
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
108
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
109
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
110
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
111
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
112
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
113
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
114
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
115
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
116
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
117
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
118
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
119
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
120
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
121
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
122
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
123
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
124
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
125
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
126
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
127
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
128
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
129
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
130
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
131
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
132
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
133
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
134
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
135
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
136
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
137
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
138
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
139
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
140
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
141
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
142
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
143
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
144
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
145
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
146
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
147
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
148
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
149
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
150
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
151
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
152
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
153
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
154
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
155
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
156
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
157
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
158
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
159
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
160
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
161
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
162
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
163
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
164
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
165
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
166
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
167
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
168
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
169
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
170
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
171
    };
172
173
    while (1) {
174
        Z.hi ^= Htable[n].hi;
175
        Z.lo ^= Htable[n].lo;
176
177
        if ((u8 *)Xi == xi)
178
            break;
179
180
        n = *(--xi);
181
182
        rem = (size_t)Z.lo & 0xff;
183
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
184
        Z.hi = (Z.hi >> 8);
185
        if (sizeof(size_t) == 8)
186
            Z.hi ^= rem_8bit[rem];
187
        else
188
            Z.hi ^= (u64)rem_8bit[rem] << 32;
189
    }
190
191
    if (is_endian.little) {
192
# ifdef BSWAP8
193
        Xi[0] = BSWAP8(Z.hi);
194
        Xi[1] = BSWAP8(Z.lo);
195
# else
196
        u8 *p = (u8 *)Xi;
197
        u32 v;
198
        v = (u32)(Z.hi >> 32);
199
        PUTU32(p, v);
200
        v = (u32)(Z.hi);
201
        PUTU32(p + 4, v);
202
        v = (u32)(Z.lo >> 32);
203
        PUTU32(p + 8, v);
204
        v = (u32)(Z.lo);
205
        PUTU32(p + 12, v);
206
# endif
207
    } else {
208
        Xi[0] = Z.hi;
209
        Xi[1] = Z.lo;
210
    }
211
}
212
213
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
214
215
#elif   TABLE_BITS==4
216
217
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
218
0
{
219
0
    u128 V;
220
# if defined(OPENSSL_SMALL_FOOTPRINT)
221
    int i;
222
# endif
223
224
0
    Htable[0].hi = 0;
225
0
    Htable[0].lo = 0;
226
0
    V.hi = H[0];
227
0
    V.lo = H[1];
228
229
# if defined(OPENSSL_SMALL_FOOTPRINT)
230
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
231
        REDUCE1BIT(V);
232
        Htable[i] = V;
233
    }
234
235
    for (i = 2; i < 16; i <<= 1) {
236
        u128 *Hi = Htable + i;
237
        int j;
238
        for (V = *Hi, j = 1; j < i; ++j) {
239
            Hi[j].hi = V.hi ^ Htable[j].hi;
240
            Hi[j].lo = V.lo ^ Htable[j].lo;
241
        }
242
    }
243
# else
244
0
    Htable[8] = V;
245
0
    REDUCE1BIT(V);
246
0
    Htable[4] = V;
247
0
    REDUCE1BIT(V);
248
0
    Htable[2] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[1] = V;
251
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
252
0
    V = Htable[4];
253
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
254
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
255
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
256
0
    V = Htable[8];
257
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
260
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
261
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
262
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
263
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
264
0
# endif
265
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
266
    /*
267
     * ARM assembler expects specific dword order in Htable.
268
     */
269
    {
270
        int j;
271
        const union {
272
            long one;
273
            char little;
274
        } is_endian = { 1 };
275
276
        if (is_endian.little)
277
            for (j = 0; j < 16; ++j) {
278
                V = Htable[j];
279
                Htable[j].hi = V.lo;
280
                Htable[j].lo = V.hi;
281
        } else
282
            for (j = 0; j < 16; ++j) {
283
                V = Htable[j];
284
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
285
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
286
            }
287
    }
288
# endif
289
0
}
290
291
# ifndef GHASH_ASM
292
static const size_t rem_4bit[16] = {
293
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
294
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
295
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
296
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
297
};
298
299
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
300
{
301
    u128 Z;
302
    int cnt = 15;
303
    size_t rem, nlo, nhi;
304
    const union {
305
        long one;
306
        char little;
307
    } is_endian = { 1 };
308
309
    nlo = ((const u8 *)Xi)[15];
310
    nhi = nlo >> 4;
311
    nlo &= 0xf;
312
313
    Z.hi = Htable[nlo].hi;
314
    Z.lo = Htable[nlo].lo;
315
316
    while (1) {
317
        rem = (size_t)Z.lo & 0xf;
318
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
319
        Z.hi = (Z.hi >> 4);
320
        if (sizeof(size_t) == 8)
321
            Z.hi ^= rem_4bit[rem];
322
        else
323
            Z.hi ^= (u64)rem_4bit[rem] << 32;
324
325
        Z.hi ^= Htable[nhi].hi;
326
        Z.lo ^= Htable[nhi].lo;
327
328
        if (--cnt < 0)
329
            break;
330
331
        nlo = ((const u8 *)Xi)[cnt];
332
        nhi = nlo >> 4;
333
        nlo &= 0xf;
334
335
        rem = (size_t)Z.lo & 0xf;
336
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
337
        Z.hi = (Z.hi >> 4);
338
        if (sizeof(size_t) == 8)
339
            Z.hi ^= rem_4bit[rem];
340
        else
341
            Z.hi ^= (u64)rem_4bit[rem] << 32;
342
343
        Z.hi ^= Htable[nlo].hi;
344
        Z.lo ^= Htable[nlo].lo;
345
    }
346
347
    if (is_endian.little) {
348
#  ifdef BSWAP8
349
        Xi[0] = BSWAP8(Z.hi);
350
        Xi[1] = BSWAP8(Z.lo);
351
#  else
352
        u8 *p = (u8 *)Xi;
353
        u32 v;
354
        v = (u32)(Z.hi >> 32);
355
        PUTU32(p, v);
356
        v = (u32)(Z.hi);
357
        PUTU32(p + 4, v);
358
        v = (u32)(Z.lo >> 32);
359
        PUTU32(p + 8, v);
360
        v = (u32)(Z.lo);
361
        PUTU32(p + 12, v);
362
#  endif
363
    } else {
364
        Xi[0] = Z.hi;
365
        Xi[1] = Z.lo;
366
    }
367
}
368
369
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
370
/*
371
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
372
 * details... Compiler-generated code doesn't seem to give any
373
 * performance improvement, at least not on x86[_64]. It's here
374
 * mostly as reference and a placeholder for possible future
375
 * non-trivial optimization[s]...
376
 */
377
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
378
                           const u8 *inp, size_t len)
379
{
380
    u128 Z;
381
    int cnt;
382
    size_t rem, nlo, nhi;
383
    const union {
384
        long one;
385
        char little;
386
    } is_endian = { 1 };
387
388
#   if 1
389
    do {
390
        cnt = 15;
391
        nlo = ((const u8 *)Xi)[15];
392
        nlo ^= inp[15];
393
        nhi = nlo >> 4;
394
        nlo &= 0xf;
395
396
        Z.hi = Htable[nlo].hi;
397
        Z.lo = Htable[nlo].lo;
398
399
        while (1) {
400
            rem = (size_t)Z.lo & 0xf;
401
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
402
            Z.hi = (Z.hi >> 4);
403
            if (sizeof(size_t) == 8)
404
                Z.hi ^= rem_4bit[rem];
405
            else
406
                Z.hi ^= (u64)rem_4bit[rem] << 32;
407
408
            Z.hi ^= Htable[nhi].hi;
409
            Z.lo ^= Htable[nhi].lo;
410
411
            if (--cnt < 0)
412
                break;
413
414
            nlo = ((const u8 *)Xi)[cnt];
415
            nlo ^= inp[cnt];
416
            nhi = nlo >> 4;
417
            nlo &= 0xf;
418
419
            rem = (size_t)Z.lo & 0xf;
420
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
421
            Z.hi = (Z.hi >> 4);
422
            if (sizeof(size_t) == 8)
423
                Z.hi ^= rem_4bit[rem];
424
            else
425
                Z.hi ^= (u64)rem_4bit[rem] << 32;
426
427
            Z.hi ^= Htable[nlo].hi;
428
            Z.lo ^= Htable[nlo].lo;
429
        }
430
#   else
431
    /*
432
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
433
     * [should] give ~50% improvement... One could have PACK()-ed
434
     * the rem_8bit even here, but the priority is to minimize
435
     * cache footprint...
436
     */
437
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
438
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
439
    static const unsigned short rem_8bit[256] = {
440
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
441
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
442
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
443
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
444
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
445
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
446
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
447
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
448
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
449
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
450
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
451
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
452
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
453
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
454
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
455
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
456
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
457
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
458
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
459
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
460
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
461
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
462
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
463
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
464
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
465
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
466
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
467
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
468
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
469
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
470
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
471
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
472
    };
473
    /*
474
     * This pre-processing phase slows down procedure by approximately
475
     * same time as it makes each loop spin faster. In other words
476
     * single block performance is approximately same as straightforward
477
     * "4-bit" implementation, and then it goes only faster...
478
     */
479
    for (cnt = 0; cnt < 16; ++cnt) {
480
        Z.hi = Htable[cnt].hi;
481
        Z.lo = Htable[cnt].lo;
482
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
483
        Hshr4[cnt].hi = (Z.hi >> 4);
484
        Hshl4[cnt] = (u8)(Z.lo << 4);
485
    }
486
487
    do {
488
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
489
            nlo = ((const u8 *)Xi)[cnt];
490
            nlo ^= inp[cnt];
491
            nhi = nlo >> 4;
492
            nlo &= 0xf;
493
494
            Z.hi ^= Htable[nlo].hi;
495
            Z.lo ^= Htable[nlo].lo;
496
497
            rem = (size_t)Z.lo & 0xff;
498
499
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
500
            Z.hi = (Z.hi >> 8);
501
502
            Z.hi ^= Hshr4[nhi].hi;
503
            Z.lo ^= Hshr4[nhi].lo;
504
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
505
        }
506
507
        nlo = ((const u8 *)Xi)[0];
508
        nlo ^= inp[0];
509
        nhi = nlo >> 4;
510
        nlo &= 0xf;
511
512
        Z.hi ^= Htable[nlo].hi;
513
        Z.lo ^= Htable[nlo].lo;
514
515
        rem = (size_t)Z.lo & 0xf;
516
517
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
518
        Z.hi = (Z.hi >> 4);
519
520
        Z.hi ^= Htable[nhi].hi;
521
        Z.lo ^= Htable[nhi].lo;
522
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
523
#   endif
524
525
        if (is_endian.little) {
526
#   ifdef BSWAP8
527
            Xi[0] = BSWAP8(Z.hi);
528
            Xi[1] = BSWAP8(Z.lo);
529
#   else
530
            u8 *p = (u8 *)Xi;
531
            u32 v;
532
            v = (u32)(Z.hi >> 32);
533
            PUTU32(p, v);
534
            v = (u32)(Z.hi);
535
            PUTU32(p + 4, v);
536
            v = (u32)(Z.lo >> 32);
537
            PUTU32(p + 8, v);
538
            v = (u32)(Z.lo);
539
            PUTU32(p + 12, v);
540
#   endif
541
        } else {
542
            Xi[0] = Z.hi;
543
            Xi[1] = Z.lo;
544
        }
545
    } while (inp += 16, len -= 16);
546
}
547
#  endif
548
# else
549
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
550
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
551
                    size_t len);
552
# endif
553
554
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
555
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
556
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
557
/*
558
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
559
 * effect. In other words idea is to hash data while it's still in L1 cache
560
 * after encryption pass...
561
 */
562
0
#  define GHASH_CHUNK       (3*1024)
563
# endif
564
565
#else                           /* TABLE_BITS */
566
567
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
568
{
569
    u128 V, Z = { 0, 0 };
570
    long X;
571
    int i, j;
572
    const long *xi = (const long *)Xi;
573
    const union {
574
        long one;
575
        char little;
576
    } is_endian = { 1 };
577
578
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
579
    V.lo = H[1];
580
581
    for (j = 0; j < 16 / sizeof(long); ++j) {
582
        if (is_endian.little) {
583
            if (sizeof(long) == 8) {
584
# ifdef BSWAP8
585
                X = (long)(BSWAP8(xi[j]));
586
# else
587
                const u8 *p = (const u8 *)(xi + j);
588
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
589
# endif
590
            } else {
591
                const u8 *p = (const u8 *)(xi + j);
592
                X = (long)GETU32(p);
593
            }
594
        } else
595
            X = xi[j];
596
597
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
598
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
599
            Z.hi ^= V.hi & M;
600
            Z.lo ^= V.lo & M;
601
602
            REDUCE1BIT(V);
603
        }
604
    }
605
606
    if (is_endian.little) {
607
# ifdef BSWAP8
608
        Xi[0] = BSWAP8(Z.hi);
609
        Xi[1] = BSWAP8(Z.lo);
610
# else
611
        u8 *p = (u8 *)Xi;
612
        u32 v;
613
        v = (u32)(Z.hi >> 32);
614
        PUTU32(p, v);
615
        v = (u32)(Z.hi);
616
        PUTU32(p + 4, v);
617
        v = (u32)(Z.lo >> 32);
618
        PUTU32(p + 8, v);
619
        v = (u32)(Z.lo);
620
        PUTU32(p + 12, v);
621
# endif
622
    } else {
623
        Xi[0] = Z.hi;
624
        Xi[1] = Z.lo;
625
    }
626
}
627
628
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
629
630
#endif
631
632
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
633
# if    !defined(I386_ONLY) && \
634
        (defined(__i386)        || defined(__i386__)    || \
635
         defined(__x86_64)      || defined(__x86_64__)  || \
636
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
637
#  define GHASH_ASM_X86_OR_64
638
#  define GCM_FUNCREF_4BIT
639
640
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
641
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
642
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
643
                     size_t len);
644
645
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
646
#   define gcm_init_avx   gcm_init_clmul
647
#   define gcm_gmult_avx  gcm_gmult_clmul
648
#   define gcm_ghash_avx  gcm_ghash_clmul
649
#  else
650
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
651
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
652
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
653
                   size_t len);
654
#  endif
655
656
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
657
#   define GHASH_ASM_X86
658
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
659
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
660
                        size_t len);
661
662
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
663
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
664
                        size_t len);
665
#  endif
666
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
667
#  include "arm_arch.h"
668
#  if __ARM_MAX_ARCH__>=7
669
#   define GHASH_ASM_ARM
670
#   define GCM_FUNCREF_4BIT
671
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
672
#   if defined(__arm__) || defined(__arm)
673
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
674
#   endif
675
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
676
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
677
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
678
                    size_t len);
679
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
680
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
681
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
682
                  size_t len);
683
#  endif
684
# elif defined(__sparc__) || defined(__sparc)
685
#  include "sparc_arch.h"
686
#  define GHASH_ASM_SPARC
687
#  define GCM_FUNCREF_4BIT
688
extern unsigned int OPENSSL_sparcv9cap_P[];
689
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
690
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
691
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692
                    size_t len);
693
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
694
#  include "ppc_arch.h"
695
#  define GHASH_ASM_PPC
696
#  define GCM_FUNCREF_4BIT
697
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
698
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
699
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
700
                  size_t len);
701
# endif
702
#endif
703
704
#ifdef GCM_FUNCREF_4BIT
705
# undef  GCM_MUL
706
0
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
707
# ifdef GHASH
708
#  undef  GHASH
709
0
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
710
# endif
711
#endif
712
713
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
714
0
{
715
0
    const union {
716
0
        long one;
717
0
        char little;
718
0
    } is_endian = { 1 };
719
720
0
    memset(ctx, 0, sizeof(*ctx));
721
0
    ctx->block = block;
722
0
    ctx->key = key;
723
724
0
    (*block) (ctx->H.c, ctx->H.c, key);
725
726
0
    if (is_endian.little) {
727
        /* H is stored in host byte order */
728
#ifdef BSWAP8
729
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
730
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
731
#else
732
0
        u8 *p = ctx->H.c;
733
0
        u64 hi, lo;
734
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
735
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
736
0
        ctx->H.u[0] = hi;
737
0
        ctx->H.u[1] = lo;
738
0
#endif
739
0
    }
740
#if     TABLE_BITS==8
741
    gcm_init_8bit(ctx->Htable, ctx->H.u);
742
#elif   TABLE_BITS==4
743
0
# if    defined(GHASH)
744
0
#  define CTX__GHASH(f) (ctx->ghash = (f))
745
# else
746
#  define CTX__GHASH(f) (ctx->ghash = NULL)
747
# endif
748
0
# if    defined(GHASH_ASM_X86_OR_64)
749
0
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
750
0
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
751
0
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
752
0
            gcm_init_avx(ctx->Htable, ctx->H.u);
753
0
            ctx->gmult = gcm_gmult_avx;
754
0
            CTX__GHASH(gcm_ghash_avx);
755
0
        } else {
756
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
757
0
            ctx->gmult = gcm_gmult_clmul;
758
0
            CTX__GHASH(gcm_ghash_clmul);
759
0
        }
760
0
        return;
761
0
    }
762
0
#  endif
763
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
764
#  if   defined(GHASH_ASM_X86)  /* x86 only */
765
#   if  defined(OPENSSL_IA32_SSE2)
766
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
767
#   else
768
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
769
#   endif
770
        ctx->gmult = gcm_gmult_4bit_mmx;
771
        CTX__GHASH(gcm_ghash_4bit_mmx);
772
    } else {
773
        ctx->gmult = gcm_gmult_4bit_x86;
774
        CTX__GHASH(gcm_ghash_4bit_x86);
775
    }
776
#  else
777
0
    ctx->gmult = gcm_gmult_4bit;
778
0
    CTX__GHASH(gcm_ghash_4bit);
779
0
#  endif
780
# elif  defined(GHASH_ASM_ARM)
781
#  ifdef PMULL_CAPABLE
782
    if (PMULL_CAPABLE) {
783
        gcm_init_v8(ctx->Htable, ctx->H.u);
784
        ctx->gmult = gcm_gmult_v8;
785
        CTX__GHASH(gcm_ghash_v8);
786
    } else
787
#  endif
788
#  ifdef NEON_CAPABLE
789
    if (NEON_CAPABLE) {
790
        gcm_init_neon(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_neon;
792
        CTX__GHASH(gcm_ghash_neon);
793
    } else
794
#  endif
795
    {
796
        gcm_init_4bit(ctx->Htable, ctx->H.u);
797
        ctx->gmult = gcm_gmult_4bit;
798
        CTX__GHASH(gcm_ghash_4bit);
799
    }
800
# elif  defined(GHASH_ASM_SPARC)
801
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
802
        gcm_init_vis3(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_vis3;
804
        CTX__GHASH(gcm_ghash_vis3);
805
    } else {
806
        gcm_init_4bit(ctx->Htable, ctx->H.u);
807
        ctx->gmult = gcm_gmult_4bit;
808
        CTX__GHASH(gcm_ghash_4bit);
809
    }
810
# elif  defined(GHASH_ASM_PPC)
811
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812
        gcm_init_p8(ctx->Htable, ctx->H.u);
813
        ctx->gmult = gcm_gmult_p8;
814
        CTX__GHASH(gcm_ghash_p8);
815
    } else {
816
        gcm_init_4bit(ctx->Htable, ctx->H.u);
817
        ctx->gmult = gcm_gmult_4bit;
818
        CTX__GHASH(gcm_ghash_4bit);
819
    }
820
# else
821
    gcm_init_4bit(ctx->Htable, ctx->H.u);
822
# endif
823
0
# undef CTX__GHASH
824
0
#endif
825
0
}
826
827
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
828
                         size_t len)
829
0
{
830
0
    const union {
831
0
        long one;
832
0
        char little;
833
0
    } is_endian = { 1 };
834
0
    unsigned int ctr;
835
0
#ifdef GCM_FUNCREF_4BIT
836
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
837
0
#endif
838
839
0
    ctx->len.u[0] = 0;          /* AAD length */
840
0
    ctx->len.u[1] = 0;          /* message length */
841
0
    ctx->ares = 0;
842
0
    ctx->mres = 0;
843
844
0
    if (len == 12) {
845
0
        memcpy(ctx->Yi.c, iv, 12);
846
0
        ctx->Yi.c[12] = 0;
847
0
        ctx->Yi.c[13] = 0;
848
0
        ctx->Yi.c[14] = 0;
849
0
        ctx->Yi.c[15] = 1;
850
0
        ctr = 1;
851
0
    } else {
852
0
        size_t i;
853
0
        u64 len0 = len;
854
855
        /* Borrow ctx->Xi to calculate initial Yi */
856
0
        ctx->Xi.u[0] = 0;
857
0
        ctx->Xi.u[1] = 0;
858
859
0
        while (len >= 16) {
860
0
            for (i = 0; i < 16; ++i)
861
0
                ctx->Xi.c[i] ^= iv[i];
862
0
            GCM_MUL(ctx);
863
0
            iv += 16;
864
0
            len -= 16;
865
0
        }
866
0
        if (len) {
867
0
            for (i = 0; i < len; ++i)
868
0
                ctx->Xi.c[i] ^= iv[i];
869
0
            GCM_MUL(ctx);
870
0
        }
871
0
        len0 <<= 3;
872
0
        if (is_endian.little) {
873
#ifdef BSWAP8
874
            ctx->Xi.u[1] ^= BSWAP8(len0);
875
#else
876
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
877
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
878
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
879
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
880
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
881
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
882
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
883
0
            ctx->Xi.c[15] ^= (u8)(len0);
884
0
#endif
885
0
        } else {
886
0
            ctx->Xi.u[1] ^= len0;
887
0
        }
888
889
0
        GCM_MUL(ctx);
890
891
0
        if (is_endian.little)
892
#ifdef BSWAP4
893
            ctr = BSWAP4(ctx->Xi.d[3]);
894
#else
895
0
            ctr = GETU32(ctx->Xi.c + 12);
896
0
#endif
897
0
        else
898
0
            ctr = ctx->Xi.d[3];
899
900
        /* Copy borrowed Xi to Yi */
901
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
902
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
903
0
    }
904
905
0
    ctx->Xi.u[0] = 0;
906
0
    ctx->Xi.u[1] = 0;
907
908
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
909
0
    ++ctr;
910
0
    if (is_endian.little)
911
#ifdef BSWAP4
912
        ctx->Yi.d[3] = BSWAP4(ctr);
913
#else
914
0
        PUTU32(ctx->Yi.c + 12, ctr);
915
0
#endif
916
0
    else
917
0
        ctx->Yi.d[3] = ctr;
918
0
}
919
920
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
921
                      size_t len)
922
0
{
923
0
    size_t i;
924
0
    unsigned int n;
925
0
    u64 alen = ctx->len.u[0];
926
0
#ifdef GCM_FUNCREF_4BIT
927
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
928
0
# ifdef GHASH
929
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
930
0
                         const u8 *inp, size_t len) = ctx->ghash;
931
0
# endif
932
0
#endif
933
934
0
    if (ctx->len.u[1])
935
0
        return -2;
936
937
0
    alen += len;
938
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
939
0
        return -1;
940
0
    ctx->len.u[0] = alen;
941
942
0
    n = ctx->ares;
943
0
    if (n) {
944
0
        while (n && len) {
945
0
            ctx->Xi.c[n] ^= *(aad++);
946
0
            --len;
947
0
            n = (n + 1) % 16;
948
0
        }
949
0
        if (n == 0)
950
0
            GCM_MUL(ctx);
951
0
        else {
952
0
            ctx->ares = n;
953
0
            return 0;
954
0
        }
955
0
    }
956
0
#ifdef GHASH
957
0
    if ((i = (len & (size_t)-16))) {
958
0
        GHASH(ctx, aad, i);
959
0
        aad += i;
960
0
        len -= i;
961
0
    }
962
#else
963
    while (len >= 16) {
964
        for (i = 0; i < 16; ++i)
965
            ctx->Xi.c[i] ^= aad[i];
966
        GCM_MUL(ctx);
967
        aad += 16;
968
        len -= 16;
969
    }
970
#endif
971
0
    if (len) {
972
0
        n = (unsigned int)len;
973
0
        for (i = 0; i < len; ++i)
974
0
            ctx->Xi.c[i] ^= aad[i];
975
0
    }
976
977
0
    ctx->ares = n;
978
0
    return 0;
979
0
}
980
981
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
982
                          const unsigned char *in, unsigned char *out,
983
                          size_t len)
984
0
{
985
0
    const union {
986
0
        long one;
987
0
        char little;
988
0
    } is_endian = { 1 };
989
0
    unsigned int n, ctr, mres;
990
0
    size_t i;
991
0
    u64 mlen = ctx->len.u[1];
992
0
    block128_f block = ctx->block;
993
0
    void *key = ctx->key;
994
0
#ifdef GCM_FUNCREF_4BIT
995
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
996
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
997
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
998
0
                         const u8 *inp, size_t len) = ctx->ghash;
999
0
# endif
1000
0
#endif
1001
1002
0
    mlen += len;
1003
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1004
0
        return -1;
1005
0
    ctx->len.u[1] = mlen;
1006
1007
0
    mres = ctx->mres;
1008
1009
0
    if (ctx->ares) {
1010
        /* First call to encrypt finalizes GHASH(AAD) */
1011
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1012
0
        if (len == 0) {
1013
0
            GCM_MUL(ctx);
1014
0
            ctx->ares = 0;
1015
0
            return 0;
1016
0
        }
1017
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1018
0
        ctx->Xi.u[0] = 0;
1019
0
        ctx->Xi.u[1] = 0;
1020
0
        mres = sizeof(ctx->Xi);
1021
#else
1022
        GCM_MUL(ctx);
1023
#endif
1024
0
        ctx->ares = 0;
1025
0
    }
1026
1027
0
    if (is_endian.little)
1028
#ifdef BSWAP4
1029
        ctr = BSWAP4(ctx->Yi.d[3]);
1030
#else
1031
0
        ctr = GETU32(ctx->Yi.c + 12);
1032
0
#endif
1033
0
    else
1034
0
        ctr = ctx->Yi.d[3];
1035
1036
0
    n = mres % 16;
1037
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1038
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1039
0
        do {
1040
0
            if (n) {
1041
0
# if defined(GHASH)
1042
0
                while (n && len) {
1043
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1044
0
                    --len;
1045
0
                    n = (n + 1) % 16;
1046
0
                }
1047
0
                if (n == 0) {
1048
0
                    GHASH(ctx, ctx->Xn, mres);
1049
0
                    mres = 0;
1050
0
                } else {
1051
0
                    ctx->mres = mres;
1052
0
                    return 0;
1053
0
                }
1054
# else
1055
                while (n && len) {
1056
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1057
                    --len;
1058
                    n = (n + 1) % 16;
1059
                }
1060
                if (n == 0) {
1061
                    GCM_MUL(ctx);
1062
                    mres = 0;
1063
                } else {
1064
                    ctx->mres = n;
1065
                    return 0;
1066
                }
1067
# endif
1068
0
            }
1069
0
# if defined(STRICT_ALIGNMENT)
1070
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1071
0
                break;
1072
0
# endif
1073
0
# if defined(GHASH)
1074
0
            if (len >= 16 && mres) {
1075
0
                GHASH(ctx, ctx->Xn, mres);
1076
0
                mres = 0;
1077
0
            }
1078
0
#  if defined(GHASH_CHUNK)
1079
0
            while (len >= GHASH_CHUNK) {
1080
0
                size_t j = GHASH_CHUNK;
1081
1082
0
                while (j) {
1083
0
                    size_t *out_t = (size_t *)out;
1084
0
                    const size_t *in_t = (const size_t *)in;
1085
1086
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1087
0
                    ++ctr;
1088
0
                    if (is_endian.little)
1089
#   ifdef BSWAP4
1090
                        ctx->Yi.d[3] = BSWAP4(ctr);
1091
#   else
1092
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1093
0
#   endif
1094
0
                    else
1095
0
                        ctx->Yi.d[3] = ctr;
1096
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1097
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1098
0
                    out += 16;
1099
0
                    in += 16;
1100
0
                    j -= 16;
1101
0
                }
1102
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1103
0
                len -= GHASH_CHUNK;
1104
0
            }
1105
0
#  endif
1106
0
            if ((i = (len & (size_t)-16))) {
1107
0
                size_t j = i;
1108
1109
0
                while (len >= 16) {
1110
0
                    size_t *out_t = (size_t *)out;
1111
0
                    const size_t *in_t = (const size_t *)in;
1112
1113
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1114
0
                    ++ctr;
1115
0
                    if (is_endian.little)
1116
#  ifdef BSWAP4
1117
                        ctx->Yi.d[3] = BSWAP4(ctr);
1118
#  else
1119
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1120
0
#  endif
1121
0
                    else
1122
0
                        ctx->Yi.d[3] = ctr;
1123
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1124
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1125
0
                    out += 16;
1126
0
                    in += 16;
1127
0
                    len -= 16;
1128
0
                }
1129
0
                GHASH(ctx, out - j, j);
1130
0
            }
1131
# else
1132
            while (len >= 16) {
1133
                size_t *out_t = (size_t *)out;
1134
                const size_t *in_t = (const size_t *)in;
1135
1136
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1137
                ++ctr;
1138
                if (is_endian.little)
1139
#  ifdef BSWAP4
1140
                    ctx->Yi.d[3] = BSWAP4(ctr);
1141
#  else
1142
                    PUTU32(ctx->Yi.c + 12, ctr);
1143
#  endif
1144
                else
1145
                    ctx->Yi.d[3] = ctr;
1146
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1147
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1148
                GCM_MUL(ctx);
1149
                out += 16;
1150
                in += 16;
1151
                len -= 16;
1152
            }
1153
# endif
1154
0
            if (len) {
1155
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1156
0
                ++ctr;
1157
0
                if (is_endian.little)
1158
# ifdef BSWAP4
1159
                    ctx->Yi.d[3] = BSWAP4(ctr);
1160
# else
1161
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1162
0
# endif
1163
0
                else
1164
0
                    ctx->Yi.d[3] = ctr;
1165
0
# if defined(GHASH)
1166
0
                while (len--) {
1167
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1168
0
                    ++n;
1169
0
                }
1170
# else
1171
                while (len--) {
1172
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1173
                    ++n;
1174
                }
1175
                mres = n;
1176
# endif
1177
0
            }
1178
1179
0
            ctx->mres = mres;
1180
0
            return 0;
1181
0
        } while (0);
1182
0
    }
1183
0
#endif
1184
0
    for (i = 0; i < len; ++i) {
1185
0
        if (n == 0) {
1186
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1187
0
            ++ctr;
1188
0
            if (is_endian.little)
1189
#ifdef BSWAP4
1190
                ctx->Yi.d[3] = BSWAP4(ctr);
1191
#else
1192
0
                PUTU32(ctx->Yi.c + 12, ctr);
1193
0
#endif
1194
0
            else
1195
0
                ctx->Yi.d[3] = ctr;
1196
0
        }
1197
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1198
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1199
0
        n = (n + 1) % 16;
1200
0
        if (mres == sizeof(ctx->Xn)) {
1201
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1202
0
            mres = 0;
1203
0
        }
1204
#else
1205
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1206
        mres = n = (n + 1) % 16;
1207
        if (n == 0)
1208
            GCM_MUL(ctx);
1209
#endif
1210
0
    }
1211
1212
0
    ctx->mres = mres;
1213
0
    return 0;
1214
0
}
1215
1216
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1217
                          const unsigned char *in, unsigned char *out,
1218
                          size_t len)
1219
0
{
1220
0
    const union {
1221
0
        long one;
1222
0
        char little;
1223
0
    } is_endian = { 1 };
1224
0
    unsigned int n, ctr, mres;
1225
0
    size_t i;
1226
0
    u64 mlen = ctx->len.u[1];
1227
0
    block128_f block = ctx->block;
1228
0
    void *key = ctx->key;
1229
0
#ifdef GCM_FUNCREF_4BIT
1230
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1231
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1232
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1233
0
                         const u8 *inp, size_t len) = ctx->ghash;
1234
0
# endif
1235
0
#endif
1236
1237
0
    mlen += len;
1238
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1239
0
        return -1;
1240
0
    ctx->len.u[1] = mlen;
1241
1242
0
    mres = ctx->mres;
1243
1244
0
    if (ctx->ares) {
1245
        /* First call to decrypt finalizes GHASH(AAD) */
1246
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1247
0
        if (len == 0) {
1248
0
            GCM_MUL(ctx);
1249
0
            ctx->ares = 0;
1250
0
            return 0;
1251
0
        }
1252
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1253
0
        ctx->Xi.u[0] = 0;
1254
0
        ctx->Xi.u[1] = 0;
1255
0
        mres = sizeof(ctx->Xi);
1256
#else
1257
        GCM_MUL(ctx);
1258
#endif
1259
0
        ctx->ares = 0;
1260
0
    }
1261
1262
0
    if (is_endian.little)
1263
#ifdef BSWAP4
1264
        ctr = BSWAP4(ctx->Yi.d[3]);
1265
#else
1266
0
        ctr = GETU32(ctx->Yi.c + 12);
1267
0
#endif
1268
0
    else
1269
0
        ctr = ctx->Yi.d[3];
1270
1271
0
    n = mres % 16;
1272
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1273
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1274
0
        do {
1275
0
            if (n) {
1276
0
# if defined(GHASH)
1277
0
                while (n && len) {
1278
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1279
0
                    --len;
1280
0
                    n = (n + 1) % 16;
1281
0
                }
1282
0
                if (n == 0) {
1283
0
                    GHASH(ctx, ctx->Xn, mres);
1284
0
                    mres = 0;
1285
0
                } else {
1286
0
                    ctx->mres = mres;
1287
0
                    return 0;
1288
0
                }
1289
# else
1290
                while (n && len) {
1291
                    u8 c = *(in++);
1292
                    *(out++) = c ^ ctx->EKi.c[n];
1293
                    ctx->Xi.c[n] ^= c;
1294
                    --len;
1295
                    n = (n + 1) % 16;
1296
                }
1297
                if (n == 0) {
1298
                    GCM_MUL(ctx);
1299
                    mres = 0;
1300
                } else {
1301
                    ctx->mres = n;
1302
                    return 0;
1303
                }
1304
# endif
1305
0
            }
1306
0
# if defined(STRICT_ALIGNMENT)
1307
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1308
0
                break;
1309
0
# endif
1310
0
# if defined(GHASH)
1311
0
            if (len >= 16 && mres) {
1312
0
                GHASH(ctx, ctx->Xn, mres);
1313
0
                mres = 0;
1314
0
            }
1315
0
#  if defined(GHASH_CHUNK)
1316
0
            while (len >= GHASH_CHUNK) {
1317
0
                size_t j = GHASH_CHUNK;
1318
1319
0
                GHASH(ctx, in, GHASH_CHUNK);
1320
0
                while (j) {
1321
0
                    size_t *out_t = (size_t *)out;
1322
0
                    const size_t *in_t = (const size_t *)in;
1323
1324
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1325
0
                    ++ctr;
1326
0
                    if (is_endian.little)
1327
#   ifdef BSWAP4
1328
                        ctx->Yi.d[3] = BSWAP4(ctr);
1329
#   else
1330
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1331
0
#   endif
1332
0
                    else
1333
0
                        ctx->Yi.d[3] = ctr;
1334
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1335
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1336
0
                    out += 16;
1337
0
                    in += 16;
1338
0
                    j -= 16;
1339
0
                }
1340
0
                len -= GHASH_CHUNK;
1341
0
            }
1342
0
#  endif
1343
0
            if ((i = (len & (size_t)-16))) {
1344
0
                GHASH(ctx, in, i);
1345
0
                while (len >= 16) {
1346
0
                    size_t *out_t = (size_t *)out;
1347
0
                    const size_t *in_t = (const size_t *)in;
1348
1349
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1350
0
                    ++ctr;
1351
0
                    if (is_endian.little)
1352
#  ifdef BSWAP4
1353
                        ctx->Yi.d[3] = BSWAP4(ctr);
1354
#  else
1355
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1356
0
#  endif
1357
0
                    else
1358
0
                        ctx->Yi.d[3] = ctr;
1359
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1360
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1361
0
                    out += 16;
1362
0
                    in += 16;
1363
0
                    len -= 16;
1364
0
                }
1365
0
            }
1366
# else
1367
            while (len >= 16) {
1368
                size_t *out_t = (size_t *)out;
1369
                const size_t *in_t = (const size_t *)in;
1370
1371
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1372
                ++ctr;
1373
                if (is_endian.little)
1374
#  ifdef BSWAP4
1375
                    ctx->Yi.d[3] = BSWAP4(ctr);
1376
#  else
1377
                    PUTU32(ctx->Yi.c + 12, ctr);
1378
#  endif
1379
                else
1380
                    ctx->Yi.d[3] = ctr;
1381
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1382
                    size_t c = in[i];
1383
                    out[i] = c ^ ctx->EKi.t[i];
1384
                    ctx->Xi.t[i] ^= c;
1385
                }
1386
                GCM_MUL(ctx);
1387
                out += 16;
1388
                in += 16;
1389
                len -= 16;
1390
            }
1391
# endif
1392
0
            if (len) {
1393
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1394
0
                ++ctr;
1395
0
                if (is_endian.little)
1396
# ifdef BSWAP4
1397
                    ctx->Yi.d[3] = BSWAP4(ctr);
1398
# else
1399
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1400
0
# endif
1401
0
                else
1402
0
                    ctx->Yi.d[3] = ctr;
1403
0
# if defined(GHASH)
1404
0
                while (len--) {
1405
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1406
0
                    ++n;
1407
0
                }
1408
# else
1409
                while (len--) {
1410
                    u8 c = in[n];
1411
                    ctx->Xi.c[n] ^= c;
1412
                    out[n] = c ^ ctx->EKi.c[n];
1413
                    ++n;
1414
                }
1415
                mres = n;
1416
# endif
1417
0
            }
1418
1419
0
            ctx->mres = mres;
1420
0
            return 0;
1421
0
        } while (0);
1422
0
    }
1423
0
#endif
1424
0
    for (i = 0; i < len; ++i) {
1425
0
        u8 c;
1426
0
        if (n == 0) {
1427
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1428
0
            ++ctr;
1429
0
            if (is_endian.little)
1430
#ifdef BSWAP4
1431
                ctx->Yi.d[3] = BSWAP4(ctr);
1432
#else
1433
0
                PUTU32(ctx->Yi.c + 12, ctr);
1434
0
#endif
1435
0
            else
1436
0
                ctx->Yi.d[3] = ctr;
1437
0
        }
1438
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1439
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1440
0
        n = (n + 1) % 16;
1441
0
        if (mres == sizeof(ctx->Xn)) {
1442
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1443
0
            mres = 0;
1444
0
        }
1445
#else
1446
        c = in[i];
1447
        out[i] = c ^ ctx->EKi.c[n];
1448
        ctx->Xi.c[n] ^= c;
1449
        mres = n = (n + 1) % 16;
1450
        if (n == 0)
1451
            GCM_MUL(ctx);
1452
#endif
1453
0
    }
1454
1455
0
    ctx->mres = mres;
1456
0
    return 0;
1457
0
}
1458
1459
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1460
                                const unsigned char *in, unsigned char *out,
1461
                                size_t len, ctr128_f stream)
1462
0
{
1463
#if defined(OPENSSL_SMALL_FOOTPRINT)
1464
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1465
#else
1466
0
    const union {
1467
0
        long one;
1468
0
        char little;
1469
0
    } is_endian = { 1 };
1470
0
    unsigned int n, ctr, mres;
1471
0
    size_t i;
1472
0
    u64 mlen = ctx->len.u[1];
1473
0
    void *key = ctx->key;
1474
0
# ifdef GCM_FUNCREF_4BIT
1475
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1476
0
#  ifdef GHASH
1477
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1478
0
                         const u8 *inp, size_t len) = ctx->ghash;
1479
0
#  endif
1480
0
# endif
1481
1482
0
    mlen += len;
1483
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1484
0
        return -1;
1485
0
    ctx->len.u[1] = mlen;
1486
1487
0
    mres = ctx->mres;
1488
1489
0
    if (ctx->ares) {
1490
        /* First call to encrypt finalizes GHASH(AAD) */
1491
0
#if defined(GHASH)
1492
0
        if (len == 0) {
1493
0
            GCM_MUL(ctx);
1494
0
            ctx->ares = 0;
1495
0
            return 0;
1496
0
        }
1497
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1498
0
        ctx->Xi.u[0] = 0;
1499
0
        ctx->Xi.u[1] = 0;
1500
0
        mres = sizeof(ctx->Xi);
1501
#else
1502
        GCM_MUL(ctx);
1503
#endif
1504
0
        ctx->ares = 0;
1505
0
    }
1506
1507
0
    if (is_endian.little)
1508
# ifdef BSWAP4
1509
        ctr = BSWAP4(ctx->Yi.d[3]);
1510
# else
1511
0
        ctr = GETU32(ctx->Yi.c + 12);
1512
0
# endif
1513
0
    else
1514
0
        ctr = ctx->Yi.d[3];
1515
1516
0
    n = mres % 16;
1517
0
    if (n) {
1518
0
# if defined(GHASH)
1519
0
        while (n && len) {
1520
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1521
0
            --len;
1522
0
            n = (n + 1) % 16;
1523
0
        }
1524
0
        if (n == 0) {
1525
0
            GHASH(ctx, ctx->Xn, mres);
1526
0
            mres = 0;
1527
0
        } else {
1528
0
            ctx->mres = mres;
1529
0
            return 0;
1530
0
        }
1531
# else
1532
        while (n && len) {
1533
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1534
            --len;
1535
            n = (n + 1) % 16;
1536
        }
1537
        if (n == 0) {
1538
            GCM_MUL(ctx);
1539
            mres = 0;
1540
        } else {
1541
            ctx->mres = n;
1542
            return 0;
1543
        }
1544
# endif
1545
0
    }
1546
0
# if defined(GHASH)
1547
0
        if (len >= 16 && mres) {
1548
0
            GHASH(ctx, ctx->Xn, mres);
1549
0
            mres = 0;
1550
0
        }
1551
0
#  if defined(GHASH_CHUNK)
1552
0
    while (len >= GHASH_CHUNK) {
1553
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1554
0
        ctr += GHASH_CHUNK / 16;
1555
0
        if (is_endian.little)
1556
#   ifdef BSWAP4
1557
            ctx->Yi.d[3] = BSWAP4(ctr);
1558
#   else
1559
0
            PUTU32(ctx->Yi.c + 12, ctr);
1560
0
#   endif
1561
0
        else
1562
0
            ctx->Yi.d[3] = ctr;
1563
0
        GHASH(ctx, out, GHASH_CHUNK);
1564
0
        out += GHASH_CHUNK;
1565
0
        in += GHASH_CHUNK;
1566
0
        len -= GHASH_CHUNK;
1567
0
    }
1568
0
#  endif
1569
0
# endif
1570
0
    if ((i = (len & (size_t)-16))) {
1571
0
        size_t j = i / 16;
1572
1573
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1574
0
        ctr += (unsigned int)j;
1575
0
        if (is_endian.little)
1576
# ifdef BSWAP4
1577
            ctx->Yi.d[3] = BSWAP4(ctr);
1578
# else
1579
0
            PUTU32(ctx->Yi.c + 12, ctr);
1580
0
# endif
1581
0
        else
1582
0
            ctx->Yi.d[3] = ctr;
1583
0
        in += i;
1584
0
        len -= i;
1585
0
# if defined(GHASH)
1586
0
        GHASH(ctx, out, i);
1587
0
        out += i;
1588
# else
1589
        while (j--) {
1590
            for (i = 0; i < 16; ++i)
1591
                ctx->Xi.c[i] ^= out[i];
1592
            GCM_MUL(ctx);
1593
            out += 16;
1594
        }
1595
# endif
1596
0
    }
1597
0
    if (len) {
1598
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1599
0
        ++ctr;
1600
0
        if (is_endian.little)
1601
# ifdef BSWAP4
1602
            ctx->Yi.d[3] = BSWAP4(ctr);
1603
# else
1604
0
            PUTU32(ctx->Yi.c + 12, ctr);
1605
0
# endif
1606
0
        else
1607
0
            ctx->Yi.d[3] = ctr;
1608
0
        while (len--) {
1609
0
# if defined(GHASH)
1610
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1611
# else
1612
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1613
# endif
1614
0
            ++n;
1615
0
        }
1616
0
    }
1617
1618
0
    ctx->mres = mres;
1619
0
    return 0;
1620
0
#endif
1621
0
}
1622
1623
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1624
                                const unsigned char *in, unsigned char *out,
1625
                                size_t len, ctr128_f stream)
1626
0
{
1627
#if defined(OPENSSL_SMALL_FOOTPRINT)
1628
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1629
#else
1630
0
    const union {
1631
0
        long one;
1632
0
        char little;
1633
0
    } is_endian = { 1 };
1634
0
    unsigned int n, ctr, mres;
1635
0
    size_t i;
1636
0
    u64 mlen = ctx->len.u[1];
1637
0
    void *key = ctx->key;
1638
0
# ifdef GCM_FUNCREF_4BIT
1639
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1640
0
#  ifdef GHASH
1641
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1642
0
                         const u8 *inp, size_t len) = ctx->ghash;
1643
0
#  endif
1644
0
# endif
1645
1646
0
    mlen += len;
1647
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1648
0
        return -1;
1649
0
    ctx->len.u[1] = mlen;
1650
1651
0
    mres = ctx->mres;
1652
1653
0
    if (ctx->ares) {
1654
        /* First call to decrypt finalizes GHASH(AAD) */
1655
0
# if defined(GHASH)
1656
0
        if (len == 0) {
1657
0
            GCM_MUL(ctx);
1658
0
            ctx->ares = 0;
1659
0
            return 0;
1660
0
        }
1661
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1662
0
        ctx->Xi.u[0] = 0;
1663
0
        ctx->Xi.u[1] = 0;
1664
0
        mres = sizeof(ctx->Xi);
1665
# else
1666
        GCM_MUL(ctx);
1667
# endif
1668
0
        ctx->ares = 0;
1669
0
    }
1670
1671
0
    if (is_endian.little)
1672
# ifdef BSWAP4
1673
        ctr = BSWAP4(ctx->Yi.d[3]);
1674
# else
1675
0
        ctr = GETU32(ctx->Yi.c + 12);
1676
0
# endif
1677
0
    else
1678
0
        ctr = ctx->Yi.d[3];
1679
1680
0
    n = mres % 16;
1681
0
    if (n) {
1682
0
# if defined(GHASH)
1683
0
        while (n && len) {
1684
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1685
0
            --len;
1686
0
            n = (n + 1) % 16;
1687
0
        }
1688
0
        if (n == 0) {
1689
0
            GHASH(ctx, ctx->Xn, mres);
1690
0
            mres = 0;
1691
0
        } else {
1692
0
            ctx->mres = mres;
1693
0
            return 0;
1694
0
        }
1695
# else
1696
        while (n && len) {
1697
            u8 c = *(in++);
1698
            *(out++) = c ^ ctx->EKi.c[n];
1699
            ctx->Xi.c[n] ^= c;
1700
            --len;
1701
            n = (n + 1) % 16;
1702
        }
1703
        if (n == 0) {
1704
            GCM_MUL(ctx);
1705
            mres = 0;
1706
        } else {
1707
            ctx->mres = n;
1708
            return 0;
1709
        }
1710
# endif
1711
0
    }
1712
0
# if defined(GHASH)
1713
0
    if (len >= 16 && mres) {
1714
0
        GHASH(ctx, ctx->Xn, mres);
1715
0
        mres = 0;
1716
0
    }
1717
0
#  if defined(GHASH_CHUNK)
1718
0
    while (len >= GHASH_CHUNK) {
1719
0
        GHASH(ctx, in, GHASH_CHUNK);
1720
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1721
0
        ctr += GHASH_CHUNK / 16;
1722
0
        if (is_endian.little)
1723
#   ifdef BSWAP4
1724
            ctx->Yi.d[3] = BSWAP4(ctr);
1725
#   else
1726
0
            PUTU32(ctx->Yi.c + 12, ctr);
1727
0
#   endif
1728
0
        else
1729
0
            ctx->Yi.d[3] = ctr;
1730
0
        out += GHASH_CHUNK;
1731
0
        in += GHASH_CHUNK;
1732
0
        len -= GHASH_CHUNK;
1733
0
    }
1734
0
#  endif
1735
0
# endif
1736
0
    if ((i = (len & (size_t)-16))) {
1737
0
        size_t j = i / 16;
1738
1739
0
# if defined(GHASH)
1740
0
        GHASH(ctx, in, i);
1741
# else
1742
        while (j--) {
1743
            size_t k;
1744
            for (k = 0; k < 16; ++k)
1745
                ctx->Xi.c[k] ^= in[k];
1746
            GCM_MUL(ctx);
1747
            in += 16;
1748
        }
1749
        j = i / 16;
1750
        in -= i;
1751
# endif
1752
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1753
0
        ctr += (unsigned int)j;
1754
0
        if (is_endian.little)
1755
# ifdef BSWAP4
1756
            ctx->Yi.d[3] = BSWAP4(ctr);
1757
# else
1758
0
            PUTU32(ctx->Yi.c + 12, ctr);
1759
0
# endif
1760
0
        else
1761
0
            ctx->Yi.d[3] = ctr;
1762
0
        out += i;
1763
0
        in += i;
1764
0
        len -= i;
1765
0
    }
1766
0
    if (len) {
1767
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1768
0
        ++ctr;
1769
0
        if (is_endian.little)
1770
# ifdef BSWAP4
1771
            ctx->Yi.d[3] = BSWAP4(ctr);
1772
# else
1773
0
            PUTU32(ctx->Yi.c + 12, ctr);
1774
0
# endif
1775
0
        else
1776
0
            ctx->Yi.d[3] = ctr;
1777
0
        while (len--) {
1778
0
# if defined(GHASH)
1779
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1780
# else
1781
            u8 c = in[n];
1782
            ctx->Xi.c[mres++] ^= c;
1783
            out[n] = c ^ ctx->EKi.c[n];
1784
# endif
1785
0
            ++n;
1786
0
        }
1787
0
    }
1788
1789
0
    ctx->mres = mres;
1790
0
    return 0;
1791
0
#endif
1792
0
}
1793
1794
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1795
                         size_t len)
1796
0
{
1797
0
    const union {
1798
0
        long one;
1799
0
        char little;
1800
0
    } is_endian = { 1 };
1801
0
    u64 alen = ctx->len.u[0] << 3;
1802
0
    u64 clen = ctx->len.u[1] << 3;
1803
0
#ifdef GCM_FUNCREF_4BIT
1804
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1805
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1806
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1807
0
                         const u8 *inp, size_t len) = ctx->ghash;
1808
0
# endif
1809
0
#endif
1810
1811
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1812
0
    u128 bitlen;
1813
0
    unsigned int mres = ctx->mres;
1814
1815
0
    if (mres) {
1816
0
        unsigned blocks = (mres + 15) & -16;
1817
1818
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1819
0
        mres = blocks;
1820
0
        if (mres == sizeof(ctx->Xn)) {
1821
0
            GHASH(ctx, ctx->Xn, mres);
1822
0
            mres = 0;
1823
0
        }
1824
0
    } else if (ctx->ares) {
1825
0
        GCM_MUL(ctx);
1826
0
    }
1827
#else
1828
    if (ctx->mres || ctx->ares)
1829
        GCM_MUL(ctx);
1830
#endif
1831
1832
0
    if (is_endian.little) {
1833
#ifdef BSWAP8
1834
        alen = BSWAP8(alen);
1835
        clen = BSWAP8(clen);
1836
#else
1837
0
        u8 *p = ctx->len.c;
1838
1839
0
        ctx->len.u[0] = alen;
1840
0
        ctx->len.u[1] = clen;
1841
1842
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1843
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1844
0
#endif
1845
0
    }
1846
1847
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1848
0
    bitlen.hi = alen;
1849
0
    bitlen.lo = clen;
1850
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1851
0
    mres += sizeof(bitlen);
1852
0
    GHASH(ctx, ctx->Xn, mres);
1853
#else
1854
    ctx->Xi.u[0] ^= alen;
1855
    ctx->Xi.u[1] ^= clen;
1856
    GCM_MUL(ctx);
1857
#endif
1858
1859
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1860
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1861
1862
0
    if (tag && len <= sizeof(ctx->Xi))
1863
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1864
0
    else
1865
0
        return -1;
1866
0
}
1867
1868
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1869
0
{
1870
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1871
0
    memcpy(tag, ctx->Xi.c,
1872
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1873
0
}
1874
1875
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1876
0
{
1877
0
    GCM128_CONTEXT *ret;
1878
1879
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1880
0
        CRYPTO_gcm128_init(ret, key, block);
1881
1882
0
    return ret;
1883
0
}
1884
1885
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1886
0
{
1887
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1888
0
}