Coverage Report

Created: 2018-08-29 13:53

/src/openssl/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/crypto.h>
11
#include "modes_lcl.h"
12
#include <string.h>
13
14
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
15
/* redefine, because alignment is ensured */
16
# undef  GETU32
17
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
18
# undef  PUTU32
19
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
20
#endif
21
22
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
23
0
#define REDUCE1BIT(V)   do { \
24
0
        if (sizeof(size_t)==8) { \
25
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
26
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
27
0
                V.hi  = (V.hi>>1 )^T; \
28
0
        } \
29
0
        else { \
30
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
31
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
32
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
33
0
        } \
34
0
} while(0)
35
36
/*-
37
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
38
 * never be set to 8. 8 is effectively reserved for testing purposes.
39
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
40
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
41
 * whole spectrum of possible table driven implementations. Why? In
42
 * non-"Shoup's" case memory access pattern is segmented in such manner,
43
 * that it's trivial to see that cache timing information can reveal
44
 * fair portion of intermediate hash value. Given that ciphertext is
45
 * always available to attacker, it's possible for him to attempt to
46
 * deduce secret parameter H and if successful, tamper with messages
47
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
48
 * not as trivial, but there is no reason to believe that it's resistant
49
 * to cache-timing attack. And the thing about "8-bit" implementation is
50
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
51
 * key + 1KB shared. Well, on pros side it should be twice as fast as
52
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
53
 * was observed to run ~75% faster, closer to 100% for commercial
54
 * compilers... Yet "4-bit" procedure is preferred, because it's
55
 * believed to provide better security-performance balance and adequate
56
 * all-round performance. "All-round" refers to things like:
57
 *
58
 * - shorter setup time effectively improves overall timing for
59
 *   handling short messages;
60
 * - larger table allocation can become unbearable because of VM
61
 *   subsystem penalties (for example on Windows large enough free
62
 *   results in VM working set trimming, meaning that consequent
63
 *   malloc would immediately incur working set expansion);
64
 * - larger table has larger cache footprint, which can affect
65
 *   performance of other code paths (not necessarily even from same
66
 *   thread in Hyper-Threading world);
67
 *
68
 * Value of 1 is not appropriate for performance reasons.
69
 */
70
#if     TABLE_BITS==8
71
72
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
73
{
74
    int i, j;
75
    u128 V;
76
77
    Htable[0].hi = 0;
78
    Htable[0].lo = 0;
79
    V.hi = H[0];
80
    V.lo = H[1];
81
82
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
83
        REDUCE1BIT(V);
84
        Htable[i] = V;
85
    }
86
87
    for (i = 2; i < 256; i <<= 1) {
88
        u128 *Hi = Htable + i, H0 = *Hi;
89
        for (j = 1; j < i; ++j) {
90
            Hi[j].hi = H0.hi ^ Htable[j].hi;
91
            Hi[j].lo = H0.lo ^ Htable[j].lo;
92
        }
93
    }
94
}
95
96
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
97
{
98
    u128 Z = { 0, 0 };
99
    const u8 *xi = (const u8 *)Xi + 15;
100
    size_t rem, n = *xi;
101
    const union {
102
        long one;
103
        char little;
104
    } is_endian = { 1 };
105
    static const size_t rem_8bit[256] = {
106
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
107
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
108
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
109
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
110
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
111
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
112
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
113
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
114
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
115
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
116
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
117
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
118
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
119
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
120
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
121
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
122
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
123
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
124
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
125
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
126
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
127
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
128
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
129
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
130
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
131
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
132
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
133
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
134
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
135
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
136
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
137
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
138
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
139
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
140
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
141
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
142
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
143
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
144
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
145
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
146
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
147
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
148
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
149
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
150
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
151
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
152
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
153
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
154
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
155
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
156
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
157
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
158
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
159
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
160
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
161
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
162
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
163
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
164
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
165
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
166
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
167
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
168
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
169
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
170
    };
171
172
    while (1) {
173
        Z.hi ^= Htable[n].hi;
174
        Z.lo ^= Htable[n].lo;
175
176
        if ((u8 *)Xi == xi)
177
            break;
178
179
        n = *(--xi);
180
181
        rem = (size_t)Z.lo & 0xff;
182
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
183
        Z.hi = (Z.hi >> 8);
184
        if (sizeof(size_t) == 8)
185
            Z.hi ^= rem_8bit[rem];
186
        else
187
            Z.hi ^= (u64)rem_8bit[rem] << 32;
188
    }
189
190
    if (is_endian.little) {
191
# ifdef BSWAP8
192
        Xi[0] = BSWAP8(Z.hi);
193
        Xi[1] = BSWAP8(Z.lo);
194
# else
195
        u8 *p = (u8 *)Xi;
196
        u32 v;
197
        v = (u32)(Z.hi >> 32);
198
        PUTU32(p, v);
199
        v = (u32)(Z.hi);
200
        PUTU32(p + 4, v);
201
        v = (u32)(Z.lo >> 32);
202
        PUTU32(p + 8, v);
203
        v = (u32)(Z.lo);
204
        PUTU32(p + 12, v);
205
# endif
206
    } else {
207
        Xi[0] = Z.hi;
208
        Xi[1] = Z.lo;
209
    }
210
}
211
212
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
213
214
#elif   TABLE_BITS==4
215
216
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
217
0
{
218
0
    u128 V;
219
# if defined(OPENSSL_SMALL_FOOTPRINT)
220
    int i;
221
# endif
222
223
0
    Htable[0].hi = 0;
224
0
    Htable[0].lo = 0;
225
0
    V.hi = H[0];
226
0
    V.lo = H[1];
227
0
228
# if defined(OPENSSL_SMALL_FOOTPRINT)
229
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
230
        REDUCE1BIT(V);
231
        Htable[i] = V;
232
    }
233
234
    for (i = 2; i < 16; i <<= 1) {
235
        u128 *Hi = Htable + i;
236
        int j;
237
        for (V = *Hi, j = 1; j < i; ++j) {
238
            Hi[j].hi = V.hi ^ Htable[j].hi;
239
            Hi[j].lo = V.lo ^ Htable[j].lo;
240
        }
241
    }
242
# else
243
    Htable[8] = V;
244
0
    REDUCE1BIT(V);
245
0
    Htable[4] = V;
246
0
    REDUCE1BIT(V);
247
0
    Htable[2] = V;
248
0
    REDUCE1BIT(V);
249
0
    Htable[1] = V;
250
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
251
0
    V = Htable[4];
252
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
253
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
254
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
255
0
    V = Htable[8];
256
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
257
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
258
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
259
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
260
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
261
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
262
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
263
0
# endif
264
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
265
    /*
266
     * ARM assembler expects specific dword order in Htable.
267
     */
268
    {
269
        int j;
270
        const union {
271
            long one;
272
            char little;
273
        } is_endian = { 1 };
274
275
        if (is_endian.little)
276
            for (j = 0; j < 16; ++j) {
277
                V = Htable[j];
278
                Htable[j].hi = V.lo;
279
                Htable[j].lo = V.hi;
280
        } else
281
            for (j = 0; j < 16; ++j) {
282
                V = Htable[j];
283
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
284
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
285
            }
286
    }
287
# endif
288
}
289
290
# ifndef GHASH_ASM
291
static const size_t rem_4bit[16] = {
292
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
293
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
294
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
295
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
296
};
297
298
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
299
{
300
    u128 Z;
301
    int cnt = 15;
302
    size_t rem, nlo, nhi;
303
    const union {
304
        long one;
305
        char little;
306
    } is_endian = { 1 };
307
308
    nlo = ((const u8 *)Xi)[15];
309
    nhi = nlo >> 4;
310
    nlo &= 0xf;
311
312
    Z.hi = Htable[nlo].hi;
313
    Z.lo = Htable[nlo].lo;
314
315
    while (1) {
316
        rem = (size_t)Z.lo & 0xf;
317
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
318
        Z.hi = (Z.hi >> 4);
319
        if (sizeof(size_t) == 8)
320
            Z.hi ^= rem_4bit[rem];
321
        else
322
            Z.hi ^= (u64)rem_4bit[rem] << 32;
323
324
        Z.hi ^= Htable[nhi].hi;
325
        Z.lo ^= Htable[nhi].lo;
326
327
        if (--cnt < 0)
328
            break;
329
330
        nlo = ((const u8 *)Xi)[cnt];
331
        nhi = nlo >> 4;
332
        nlo &= 0xf;
333
334
        rem = (size_t)Z.lo & 0xf;
335
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
336
        Z.hi = (Z.hi >> 4);
337
        if (sizeof(size_t) == 8)
338
            Z.hi ^= rem_4bit[rem];
339
        else
340
            Z.hi ^= (u64)rem_4bit[rem] << 32;
341
342
        Z.hi ^= Htable[nlo].hi;
343
        Z.lo ^= Htable[nlo].lo;
344
    }
345
346
    if (is_endian.little) {
347
#  ifdef BSWAP8
348
        Xi[0] = BSWAP8(Z.hi);
349
        Xi[1] = BSWAP8(Z.lo);
350
#  else
351
        u8 *p = (u8 *)Xi;
352
        u32 v;
353
        v = (u32)(Z.hi >> 32);
354
        PUTU32(p, v);
355
        v = (u32)(Z.hi);
356
        PUTU32(p + 4, v);
357
        v = (u32)(Z.lo >> 32);
358
        PUTU32(p + 8, v);
359
        v = (u32)(Z.lo);
360
        PUTU32(p + 12, v);
361
#  endif
362
    } else {
363
        Xi[0] = Z.hi;
364
        Xi[1] = Z.lo;
365
    }
366
}
367
368
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
369
/*
370
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
371
 * details... Compiler-generated code doesn't seem to give any
372
 * performance improvement, at least not on x86[_64]. It's here
373
 * mostly as reference and a placeholder for possible future
374
 * non-trivial optimization[s]...
375
 */
376
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
377
                           const u8 *inp, size_t len)
378
{
379
    u128 Z;
380
    int cnt;
381
    size_t rem, nlo, nhi;
382
    const union {
383
        long one;
384
        char little;
385
    } is_endian = { 1 };
386
387
#   if 1
388
    do {
389
        cnt = 15;
390
        nlo = ((const u8 *)Xi)[15];
391
        nlo ^= inp[15];
392
        nhi = nlo >> 4;
393
        nlo &= 0xf;
394
395
        Z.hi = Htable[nlo].hi;
396
        Z.lo = Htable[nlo].lo;
397
398
        while (1) {
399
            rem = (size_t)Z.lo & 0xf;
400
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
401
            Z.hi = (Z.hi >> 4);
402
            if (sizeof(size_t) == 8)
403
                Z.hi ^= rem_4bit[rem];
404
            else
405
                Z.hi ^= (u64)rem_4bit[rem] << 32;
406
407
            Z.hi ^= Htable[nhi].hi;
408
            Z.lo ^= Htable[nhi].lo;
409
410
            if (--cnt < 0)
411
                break;
412
413
            nlo = ((const u8 *)Xi)[cnt];
414
            nlo ^= inp[cnt];
415
            nhi = nlo >> 4;
416
            nlo &= 0xf;
417
418
            rem = (size_t)Z.lo & 0xf;
419
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
420
            Z.hi = (Z.hi >> 4);
421
            if (sizeof(size_t) == 8)
422
                Z.hi ^= rem_4bit[rem];
423
            else
424
                Z.hi ^= (u64)rem_4bit[rem] << 32;
425
426
            Z.hi ^= Htable[nlo].hi;
427
            Z.lo ^= Htable[nlo].lo;
428
        }
429
#   else
430
    /*
431
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
432
     * [should] give ~50% improvement... One could have PACK()-ed
433
     * the rem_8bit even here, but the priority is to minimize
434
     * cache footprint...
435
     */
436
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
437
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
438
    static const unsigned short rem_8bit[256] = {
439
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
440
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
441
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
442
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
443
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
444
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
445
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
446
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
447
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
448
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
449
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
450
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
451
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
452
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
453
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
454
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
455
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
456
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
457
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
458
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
459
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
460
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
461
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
462
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
463
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
464
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
465
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
466
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
467
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
468
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
469
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
470
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
471
    };
472
    /*
473
     * This pre-processing phase slows down procedure by approximately
474
     * same time as it makes each loop spin faster. In other words
475
     * single block performance is approximately same as straightforward
476
     * "4-bit" implementation, and then it goes only faster...
477
     */
478
    for (cnt = 0; cnt < 16; ++cnt) {
479
        Z.hi = Htable[cnt].hi;
480
        Z.lo = Htable[cnt].lo;
481
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
482
        Hshr4[cnt].hi = (Z.hi >> 4);
483
        Hshl4[cnt] = (u8)(Z.lo << 4);
484
    }
485
486
    do {
487
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
488
            nlo = ((const u8 *)Xi)[cnt];
489
            nlo ^= inp[cnt];
490
            nhi = nlo >> 4;
491
            nlo &= 0xf;
492
493
            Z.hi ^= Htable[nlo].hi;
494
            Z.lo ^= Htable[nlo].lo;
495
496
            rem = (size_t)Z.lo & 0xff;
497
498
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
499
            Z.hi = (Z.hi >> 8);
500
501
            Z.hi ^= Hshr4[nhi].hi;
502
            Z.lo ^= Hshr4[nhi].lo;
503
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
504
        }
505
506
        nlo = ((const u8 *)Xi)[0];
507
        nlo ^= inp[0];
508
        nhi = nlo >> 4;
509
        nlo &= 0xf;
510
511
        Z.hi ^= Htable[nlo].hi;
512
        Z.lo ^= Htable[nlo].lo;
513
514
        rem = (size_t)Z.lo & 0xf;
515
516
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
517
        Z.hi = (Z.hi >> 4);
518
519
        Z.hi ^= Htable[nhi].hi;
520
        Z.lo ^= Htable[nhi].lo;
521
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
522
#   endif
523
524
        if (is_endian.little) {
525
#   ifdef BSWAP8
526
            Xi[0] = BSWAP8(Z.hi);
527
            Xi[1] = BSWAP8(Z.lo);
528
#   else
529
            u8 *p = (u8 *)Xi;
530
            u32 v;
531
            v = (u32)(Z.hi >> 32);
532
            PUTU32(p, v);
533
            v = (u32)(Z.hi);
534
            PUTU32(p + 4, v);
535
            v = (u32)(Z.lo >> 32);
536
            PUTU32(p + 8, v);
537
            v = (u32)(Z.lo);
538
            PUTU32(p + 12, v);
539
#   endif
540
        } else {
541
            Xi[0] = Z.hi;
542
            Xi[1] = Z.lo;
543
        }
544
    } while (inp += 16, len -= 16);
545
}
546
#  endif
547
# else
548
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
549
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
550
                    size_t len);
551
# endif
552
553
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
554
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
555
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
556
/*
557
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
558
 * effect. In other words idea is to hash data while it's still in L1 cache
559
 * after encryption pass...
560
 */
561
0
#  define GHASH_CHUNK       (3*1024)
562
# endif
563
564
#else                           /* TABLE_BITS */
565
566
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
567
{
568
    u128 V, Z = { 0, 0 };
569
    long X;
570
    int i, j;
571
    const long *xi = (const long *)Xi;
572
    const union {
573
        long one;
574
        char little;
575
    } is_endian = { 1 };
576
577
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
578
    V.lo = H[1];
579
580
    for (j = 0; j < 16 / sizeof(long); ++j) {
581
        if (is_endian.little) {
582
            if (sizeof(long) == 8) {
583
# ifdef BSWAP8
584
                X = (long)(BSWAP8(xi[j]));
585
# else
586
                const u8 *p = (const u8 *)(xi + j);
587
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
588
# endif
589
            } else {
590
                const u8 *p = (const u8 *)(xi + j);
591
                X = (long)GETU32(p);
592
            }
593
        } else
594
            X = xi[j];
595
596
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
597
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
598
            Z.hi ^= V.hi & M;
599
            Z.lo ^= V.lo & M;
600
601
            REDUCE1BIT(V);
602
        }
603
    }
604
605
    if (is_endian.little) {
606
# ifdef BSWAP8
607
        Xi[0] = BSWAP8(Z.hi);
608
        Xi[1] = BSWAP8(Z.lo);
609
# else
610
        u8 *p = (u8 *)Xi;
611
        u32 v;
612
        v = (u32)(Z.hi >> 32);
613
        PUTU32(p, v);
614
        v = (u32)(Z.hi);
615
        PUTU32(p + 4, v);
616
        v = (u32)(Z.lo >> 32);
617
        PUTU32(p + 8, v);
618
        v = (u32)(Z.lo);
619
        PUTU32(p + 12, v);
620
# endif
621
    } else {
622
        Xi[0] = Z.hi;
623
        Xi[1] = Z.lo;
624
    }
625
}
626
627
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
628
629
#endif
630
631
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
632
# if    !defined(I386_ONLY) && \
633
        (defined(__i386)        || defined(__i386__)    || \
634
         defined(__x86_64)      || defined(__x86_64__)  || \
635
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
636
#  define GHASH_ASM_X86_OR_64
637
#  define GCM_FUNCREF_4BIT
638
extern unsigned int OPENSSL_ia32cap_P[];
639
640
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
641
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
642
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
643
                     size_t len);
644
645
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
646
#   define gcm_init_avx   gcm_init_clmul
647
#   define gcm_gmult_avx  gcm_gmult_clmul
648
#   define gcm_ghash_avx  gcm_ghash_clmul
649
#  else
650
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
651
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
652
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
653
                   size_t len);
654
#  endif
655
656
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
657
#   define GHASH_ASM_X86
658
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
659
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
660
                        size_t len);
661
662
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
663
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
664
                        size_t len);
665
#  endif
666
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
667
#  include "arm_arch.h"
668
#  if __ARM_MAX_ARCH__>=7
669
#   define GHASH_ASM_ARM
670
#   define GCM_FUNCREF_4BIT
671
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
672
#   if defined(__arm__) || defined(__arm)
673
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
674
#   endif
675
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
676
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
677
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
678
                    size_t len);
679
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
680
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
681
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
682
                  size_t len);
683
#  endif
684
# elif defined(__sparc__) || defined(__sparc)
685
#  include "sparc_arch.h"
686
#  define GHASH_ASM_SPARC
687
#  define GCM_FUNCREF_4BIT
688
extern unsigned int OPENSSL_sparcv9cap_P[];
689
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
690
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
691
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692
                    size_t len);
693
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
694
#  include "ppc_arch.h"
695
#  define GHASH_ASM_PPC
696
#  define GCM_FUNCREF_4BIT
697
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
698
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
699
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
700
                  size_t len);
701
# endif
702
#endif
703
704
#ifdef GCM_FUNCREF_4BIT
705
# undef  GCM_MUL
706
0
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
707
# ifdef GHASH
708
#  undef  GHASH
709
0
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
710
# endif
711
#endif
712
713
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
714
0
{
715
0
    const union {
716
0
        long one;
717
0
        char little;
718
0
    } is_endian = { 1 };
719
0
720
0
    memset(ctx, 0, sizeof(*ctx));
721
0
    ctx->block = block;
722
0
    ctx->key = key;
723
0
724
0
    (*block) (ctx->H.c, ctx->H.c, key);
725
0
726
0
    if (is_endian.little) {
727
0
        /* H is stored in host byte order */
728
0
#ifdef BSWAP8
729
0
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
730
0
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
731
#else
732
        u8 *p = ctx->H.c;
733
        u64 hi, lo;
734
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
735
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
736
        ctx->H.u[0] = hi;
737
        ctx->H.u[1] = lo;
738
#endif
739
    }
740
#if     TABLE_BITS==8
741
    gcm_init_8bit(ctx->Htable, ctx->H.u);
742
#elif   TABLE_BITS==4
743
# if    defined(GHASH)
744
0
#  define CTX__GHASH(f) (ctx->ghash = (f))
745
# else
746
#  define CTX__GHASH(f) (ctx->ghash = NULL)
747
# endif
748
# if    defined(GHASH_ASM_X86_OR_64)
749
0
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
750
0
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
751
0
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
752
0
            gcm_init_avx(ctx->Htable, ctx->H.u);
753
0
            ctx->gmult = gcm_gmult_avx;
754
0
            CTX__GHASH(gcm_ghash_avx);
755
0
        } else {
756
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
757
0
            ctx->gmult = gcm_gmult_clmul;
758
0
            CTX__GHASH(gcm_ghash_clmul);
759
0
        }
760
0
        return;
761
0
    }
762
0
#  endif
763
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
764
#  if   defined(GHASH_ASM_X86)  /* x86 only */
765
#   if  defined(OPENSSL_IA32_SSE2)
766
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
767
#   else
768
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
769
#   endif
770
        ctx->gmult = gcm_gmult_4bit_mmx;
771
        CTX__GHASH(gcm_ghash_4bit_mmx);
772
    } else {
773
        ctx->gmult = gcm_gmult_4bit_x86;
774
        CTX__GHASH(gcm_ghash_4bit_x86);
775
    }
776
#  else
777
    ctx->gmult = gcm_gmult_4bit;
778
0
    CTX__GHASH(gcm_ghash_4bit);
779
0
#  endif
780
# elif  defined(GHASH_ASM_ARM)
781
#  ifdef PMULL_CAPABLE
782
    if (PMULL_CAPABLE) {
783
        gcm_init_v8(ctx->Htable, ctx->H.u);
784
        ctx->gmult = gcm_gmult_v8;
785
        CTX__GHASH(gcm_ghash_v8);
786
    } else
787
#  endif
788
#  ifdef NEON_CAPABLE
789
    if (NEON_CAPABLE) {
790
        gcm_init_neon(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_neon;
792
        CTX__GHASH(gcm_ghash_neon);
793
    } else
794
#  endif
795
    {
796
        gcm_init_4bit(ctx->Htable, ctx->H.u);
797
        ctx->gmult = gcm_gmult_4bit;
798
        CTX__GHASH(gcm_ghash_4bit);
799
    }
800
# elif  defined(GHASH_ASM_SPARC)
801
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
802
        gcm_init_vis3(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_vis3;
804
        CTX__GHASH(gcm_ghash_vis3);
805
    } else {
806
        gcm_init_4bit(ctx->Htable, ctx->H.u);
807
        ctx->gmult = gcm_gmult_4bit;
808
        CTX__GHASH(gcm_ghash_4bit);
809
    }
810
# elif  defined(GHASH_ASM_PPC)
811
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812
        gcm_init_p8(ctx->Htable, ctx->H.u);
813
        ctx->gmult = gcm_gmult_p8;
814
        CTX__GHASH(gcm_ghash_p8);
815
    } else {
816
        gcm_init_4bit(ctx->Htable, ctx->H.u);
817
        ctx->gmult = gcm_gmult_4bit;
818
        CTX__GHASH(gcm_ghash_4bit);
819
    }
820
# else
821
    gcm_init_4bit(ctx->Htable, ctx->H.u);
822
# endif
823
# undef CTX__GHASH
824
0
#endif
825
0
}
826
827
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
828
                         size_t len)
829
0
{
830
0
    const union {
831
0
        long one;
832
0
        char little;
833
0
    } is_endian = { 1 };
834
0
    unsigned int ctr;
835
0
#ifdef GCM_FUNCREF_4BIT
836
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
837
0
#endif
838
0
839
0
    ctx->len.u[0] = 0;          /* AAD length */
840
0
    ctx->len.u[1] = 0;          /* message length */
841
0
    ctx->ares = 0;
842
0
    ctx->mres = 0;
843
0
844
0
    if (len == 12) {
845
0
        memcpy(ctx->Yi.c, iv, 12);
846
0
        ctx->Yi.c[12] = 0;
847
0
        ctx->Yi.c[13] = 0;
848
0
        ctx->Yi.c[14] = 0;
849
0
        ctx->Yi.c[15] = 1;
850
0
        ctr = 1;
851
0
    } else {
852
0
        size_t i;
853
0
        u64 len0 = len;
854
0
855
0
        /* Borrow ctx->Xi to calculate initial Yi */
856
0
        ctx->Xi.u[0] = 0;
857
0
        ctx->Xi.u[1] = 0;
858
0
859
0
        while (len >= 16) {
860
0
            for (i = 0; i < 16; ++i)
861
0
                ctx->Xi.c[i] ^= iv[i];
862
0
            GCM_MUL(ctx);
863
0
            iv += 16;
864
0
            len -= 16;
865
0
        }
866
0
        if (len) {
867
0
            for (i = 0; i < len; ++i)
868
0
                ctx->Xi.c[i] ^= iv[i];
869
0
            GCM_MUL(ctx);
870
0
        }
871
0
        len0 <<= 3;
872
0
        if (is_endian.little) {
873
0
#ifdef BSWAP8
874
0
            ctx->Xi.u[1] ^= BSWAP8(len0);
875
#else
876
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
877
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
878
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
879
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
880
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
881
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
882
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
883
            ctx->Xi.c[15] ^= (u8)(len0);
884
#endif
885
0
        } else {
886
0
            ctx->Xi.u[1] ^= len0;
887
0
        }
888
0
889
0
        GCM_MUL(ctx);
890
0
891
0
        if (is_endian.little)
892
0
#ifdef BSWAP4
893
0
            ctr = BSWAP4(ctx->Xi.d[3]);
894
#else
895
            ctr = GETU32(ctx->Xi.c + 12);
896
#endif
897
        else
898
0
            ctr = ctx->Xi.d[3];
899
0
900
0
        /* Copy borrowed Xi to Yi */
901
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
902
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
903
0
    }
904
0
905
0
    ctx->Xi.u[0] = 0;
906
0
    ctx->Xi.u[1] = 0;
907
0
908
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
909
0
    ++ctr;
910
0
    if (is_endian.little)
911
0
#ifdef BSWAP4
912
0
        ctx->Yi.d[3] = BSWAP4(ctr);
913
#else
914
        PUTU32(ctx->Yi.c + 12, ctr);
915
#endif
916
    else
917
0
        ctx->Yi.d[3] = ctr;
918
0
}
919
920
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
921
                      size_t len)
922
0
{
923
0
    size_t i;
924
0
    unsigned int n;
925
0
    u64 alen = ctx->len.u[0];
926
0
#ifdef GCM_FUNCREF_4BIT
927
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
928
0
# ifdef GHASH
929
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
930
0
                         const u8 *inp, size_t len) = ctx->ghash;
931
0
# endif
932
0
#endif
933
0
934
0
    if (ctx->len.u[1])
935
0
        return -2;
936
0
937
0
    alen += len;
938
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
939
0
        return -1;
940
0
    ctx->len.u[0] = alen;
941
0
942
0
    n = ctx->ares;
943
0
    if (n) {
944
0
        while (n && len) {
945
0
            ctx->Xi.c[n] ^= *(aad++);
946
0
            --len;
947
0
            n = (n + 1) % 16;
948
0
        }
949
0
        if (n == 0)
950
0
            GCM_MUL(ctx);
951
0
        else {
952
0
            ctx->ares = n;
953
0
            return 0;
954
0
        }
955
0
    }
956
0
#ifdef GHASH
957
0
    if ((i = (len & (size_t)-16))) {
958
0
        GHASH(ctx, aad, i);
959
0
        aad += i;
960
0
        len -= i;
961
0
    }
962
#else
963
    while (len >= 16) {
964
        for (i = 0; i < 16; ++i)
965
            ctx->Xi.c[i] ^= aad[i];
966
        GCM_MUL(ctx);
967
        aad += 16;
968
        len -= 16;
969
    }
970
#endif
971
0
    if (len) {
972
0
        n = (unsigned int)len;
973
0
        for (i = 0; i < len; ++i)
974
0
            ctx->Xi.c[i] ^= aad[i];
975
0
    }
976
0
977
0
    ctx->ares = n;
978
0
    return 0;
979
0
}
980
981
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
982
                          const unsigned char *in, unsigned char *out,
983
                          size_t len)
984
0
{
985
0
    const union {
986
0
        long one;
987
0
        char little;
988
0
    } is_endian = { 1 };
989
0
    unsigned int n, ctr, mres;
990
0
    size_t i;
991
0
    u64 mlen = ctx->len.u[1];
992
0
    block128_f block = ctx->block;
993
0
    void *key = ctx->key;
994
0
#ifdef GCM_FUNCREF_4BIT
995
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
996
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
997
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
998
0
                         const u8 *inp, size_t len) = ctx->ghash;
999
0
# endif
1000
0
#endif
1001
0
1002
0
    mlen += len;
1003
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1004
0
        return -1;
1005
0
    ctx->len.u[1] = mlen;
1006
0
1007
0
    mres = ctx->mres;
1008
0
1009
0
    if (ctx->ares) {
1010
0
        /* First call to encrypt finalizes GHASH(AAD) */
1011
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1012
0
        if (len == 0) {
1013
0
            GCM_MUL(ctx);
1014
0
            ctx->ares = 0;
1015
0
            return 0;
1016
0
        }
1017
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1018
0
        ctx->Xi.u[0] = 0;
1019
0
        ctx->Xi.u[1] = 0;
1020
0
        mres = sizeof(ctx->Xi);
1021
#else
1022
        GCM_MUL(ctx);
1023
#endif
1024
        ctx->ares = 0;
1025
0
    }
1026
0
1027
0
    if (is_endian.little)
1028
0
#ifdef BSWAP4
1029
0
        ctr = BSWAP4(ctx->Yi.d[3]);
1030
#else
1031
        ctr = GETU32(ctx->Yi.c + 12);
1032
#endif
1033
    else
1034
0
        ctr = ctx->Yi.d[3];
1035
0
1036
0
    n = mres % 16;
1037
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1038
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1039
0
        do {
1040
0
            if (n) {
1041
0
# if defined(GHASH)
1042
0
                while (n && len) {
1043
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1044
0
                    --len;
1045
0
                    n = (n + 1) % 16;
1046
0
                }
1047
0
                if (n == 0) {
1048
0
                    GHASH(ctx, ctx->Xn, mres);
1049
0
                    mres = 0;
1050
0
                } else {
1051
0
                    ctx->mres = mres;
1052
0
                    return 0;
1053
0
                }
1054
# else
1055
                while (n && len) {
1056
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1057
                    --len;
1058
                    n = (n + 1) % 16;
1059
                }
1060
                if (n == 0) {
1061
                    GCM_MUL(ctx);
1062
                    mres = 0;
1063
                } else {
1064
                    ctx->mres = n;
1065
                    return 0;
1066
                }
1067
# endif
1068
            }
1069
# if defined(STRICT_ALIGNMENT)
1070
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1071
                break;
1072
# endif
1073
# if defined(GHASH)
1074
0
            if (len >= 16 && mres) {
1075
0
                GHASH(ctx, ctx->Xn, mres);
1076
0
                mres = 0;
1077
0
            }
1078
0
#  if defined(GHASH_CHUNK)
1079
0
            while (len >= GHASH_CHUNK) {
1080
0
                size_t j = GHASH_CHUNK;
1081
0
1082
0
                while (j) {
1083
0
                    size_t *out_t = (size_t *)out;
1084
0
                    const size_t *in_t = (const size_t *)in;
1085
0
1086
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1087
0
                    ++ctr;
1088
0
                    if (is_endian.little)
1089
0
#   ifdef BSWAP4
1090
0
                        ctx->Yi.d[3] = BSWAP4(ctr);
1091
#   else
1092
                        PUTU32(ctx->Yi.c + 12, ctr);
1093
#   endif
1094
                    else
1095
0
                        ctx->Yi.d[3] = ctr;
1096
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1097
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1098
0
                    out += 16;
1099
0
                    in += 16;
1100
0
                    j -= 16;
1101
0
                }
1102
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1103
0
                len -= GHASH_CHUNK;
1104
0
            }
1105
0
#  endif
1106
0
            if ((i = (len & (size_t)-16))) {
1107
0
                size_t j = i;
1108
0
1109
0
                while (len >= 16) {
1110
0
                    size_t *out_t = (size_t *)out;
1111
0
                    const size_t *in_t = (const size_t *)in;
1112
0
1113
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1114
0
                    ++ctr;
1115
0
                    if (is_endian.little)
1116
0
#  ifdef BSWAP4
1117
0
                        ctx->Yi.d[3] = BSWAP4(ctr);
1118
#  else
1119
                        PUTU32(ctx->Yi.c + 12, ctr);
1120
#  endif
1121
                    else
1122
0
                        ctx->Yi.d[3] = ctr;
1123
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1124
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1125
0
                    out += 16;
1126
0
                    in += 16;
1127
0
                    len -= 16;
1128
0
                }
1129
0
                GHASH(ctx, out - j, j);
1130
0
            }
1131
# else
1132
            while (len >= 16) {
1133
                size_t *out_t = (size_t *)out;
1134
                const size_t *in_t = (const size_t *)in;
1135
1136
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1137
                ++ctr;
1138
                if (is_endian.little)
1139
#  ifdef BSWAP4
1140
                    ctx->Yi.d[3] = BSWAP4(ctr);
1141
#  else
1142
                    PUTU32(ctx->Yi.c + 12, ctr);
1143
#  endif
1144
                else
1145
                    ctx->Yi.d[3] = ctr;
1146
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1147
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1148
                GCM_MUL(ctx);
1149
                out += 16;
1150
                in += 16;
1151
                len -= 16;
1152
            }
1153
# endif
1154
0
            if (len) {
1155
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1156
0
                ++ctr;
1157
0
                if (is_endian.little)
1158
0
# ifdef BSWAP4
1159
0
                    ctx->Yi.d[3] = BSWAP4(ctr);
1160
# else
1161
                    PUTU32(ctx->Yi.c + 12, ctr);
1162
# endif
1163
                else
1164
0
                    ctx->Yi.d[3] = ctr;
1165
0
# if defined(GHASH)
1166
0
                while (len--) {
1167
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1168
0
                    ++n;
1169
0
                }
1170
# else
1171
                while (len--) {
1172
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1173
                    ++n;
1174
                }
1175
                mres = n;
1176
# endif
1177
            }
1178
0
1179
0
            ctx->mres = mres;
1180
0
            return 0;
1181
0
        } while (0);
1182
0
    }
1183
0
#endif
1184
0
    for (i = 0; i < len; ++i) {
1185
0
        if (n == 0) {
1186
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1187
0
            ++ctr;
1188
0
            if (is_endian.little)
1189
0
#ifdef BSWAP4
1190
0
                ctx->Yi.d[3] = BSWAP4(ctr);
1191
#else
1192
                PUTU32(ctx->Yi.c + 12, ctr);
1193
#endif
1194
            else
1195
0
                ctx->Yi.d[3] = ctr;
1196
0
        }
1197
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1198
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1199
0
        n = (n + 1) % 16;
1200
0
        if (mres == sizeof(ctx->Xn)) {
1201
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1202
0
            mres = 0;
1203
0
        }
1204
#else
1205
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1206
        mres = n = (n + 1) % 16;
1207
        if (n == 0)
1208
            GCM_MUL(ctx);
1209
#endif
1210
    }
1211
0
1212
0
    ctx->mres = mres;
1213
0
    return 0;
1214
0
}
1215
1216
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1217
                          const unsigned char *in, unsigned char *out,
1218
                          size_t len)
1219
0
{
1220
0
    const union {
1221
0
        long one;
1222
0
        char little;
1223
0
    } is_endian = { 1 };
1224
0
    unsigned int n, ctr, mres;
1225
0
    size_t i;
1226
0
    u64 mlen = ctx->len.u[1];
1227
0
    block128_f block = ctx->block;
1228
0
    void *key = ctx->key;
1229
0
#ifdef GCM_FUNCREF_4BIT
1230
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1231
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1232
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1233
0
                         const u8 *inp, size_t len) = ctx->ghash;
1234
0
# endif
1235
0
#endif
1236
0
1237
0
    mlen += len;
1238
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1239
0
        return -1;
1240
0
    ctx->len.u[1] = mlen;
1241
0
1242
0
    mres = ctx->mres;
1243
0
1244
0
    if (ctx->ares) {
1245
0
        /* First call to decrypt finalizes GHASH(AAD) */
1246
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1247
0
        if (len == 0) {
1248
0
            GCM_MUL(ctx);
1249
0
            ctx->ares = 0;
1250
0
            return 0;
1251
0
        }
1252
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1253
0
        ctx->Xi.u[0] = 0;
1254
0
        ctx->Xi.u[1] = 0;
1255
0
        mres = sizeof(ctx->Xi);
1256
#else
1257
        GCM_MUL(ctx);
1258
#endif
1259
        ctx->ares = 0;
1260
0
    }
1261
0
1262
0
    if (is_endian.little)
1263
0
#ifdef BSWAP4
1264
0
        ctr = BSWAP4(ctx->Yi.d[3]);
1265
#else
1266
        ctr = GETU32(ctx->Yi.c + 12);
1267
#endif
1268
    else
1269
0
        ctr = ctx->Yi.d[3];
1270
0
1271
0
    n = mres % 16;
1272
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1273
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1274
0
        do {
1275
0
            if (n) {
1276
0
# if defined(GHASH)
1277
0
                while (n && len) {
1278
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1279
0
                    --len;
1280
0
                    n = (n + 1) % 16;
1281
0
                }
1282
0
                if (n == 0) {
1283
0
                    GHASH(ctx, ctx->Xn, mres);
1284
0
                    mres = 0;
1285
0
                } else {
1286
0
                    ctx->mres = mres;
1287
0
                    return 0;
1288
0
                }
1289
# else
1290
                while (n && len) {
1291
                    u8 c = *(in++);
1292
                    *(out++) = c ^ ctx->EKi.c[n];
1293
                    ctx->Xi.c[n] ^= c;
1294
                    --len;
1295
                    n = (n + 1) % 16;
1296
                }
1297
                if (n == 0) {
1298
                    GCM_MUL(ctx);
1299
                    mres = 0;
1300
                } else {
1301
                    ctx->mres = n;
1302
                    return 0;
1303
                }
1304
# endif
1305
            }
1306
# if defined(STRICT_ALIGNMENT)
1307
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1308
                break;
1309
# endif
1310
# if defined(GHASH)
1311
0
            if (len >= 16 && mres) {
1312
0
                GHASH(ctx, ctx->Xn, mres);
1313
0
                mres = 0;
1314
0
            }
1315
0
#  if defined(GHASH_CHUNK)
1316
0
            while (len >= GHASH_CHUNK) {
1317
0
                size_t j = GHASH_CHUNK;
1318
0
1319
0
                GHASH(ctx, in, GHASH_CHUNK);
1320
0
                while (j) {
1321
0
                    size_t *out_t = (size_t *)out;
1322
0
                    const size_t *in_t = (const size_t *)in;
1323
0
1324
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1325
0
                    ++ctr;
1326
0
                    if (is_endian.little)
1327
0
#   ifdef BSWAP4
1328
0
                        ctx->Yi.d[3] = BSWAP4(ctr);
1329
#   else
1330
                        PUTU32(ctx->Yi.c + 12, ctr);
1331
#   endif
1332
                    else
1333
0
                        ctx->Yi.d[3] = ctr;
1334
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1335
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1336
0
                    out += 16;
1337
0
                    in += 16;
1338
0
                    j -= 16;
1339
0
                }
1340
0
                len -= GHASH_CHUNK;
1341
0
            }
1342
0
#  endif
1343
0
            if ((i = (len & (size_t)-16))) {
1344
0
                GHASH(ctx, in, i);
1345
0
                while (len >= 16) {
1346
0
                    size_t *out_t = (size_t *)out;
1347
0
                    const size_t *in_t = (const size_t *)in;
1348
0
1349
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1350
0
                    ++ctr;
1351
0
                    if (is_endian.little)
1352
0
#  ifdef BSWAP4
1353
0
                        ctx->Yi.d[3] = BSWAP4(ctr);
1354
#  else
1355
                        PUTU32(ctx->Yi.c + 12, ctr);
1356
#  endif
1357
                    else
1358
0
                        ctx->Yi.d[3] = ctr;
1359
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1360
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1361
0
                    out += 16;
1362
0
                    in += 16;
1363
0
                    len -= 16;
1364
0
                }
1365
0
            }
1366
# else
1367
            while (len >= 16) {
1368
                size_t *out_t = (size_t *)out;
1369
                const size_t *in_t = (const size_t *)in;
1370
1371
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1372
                ++ctr;
1373
                if (is_endian.little)
1374
#  ifdef BSWAP4
1375
                    ctx->Yi.d[3] = BSWAP4(ctr);
1376
#  else
1377
                    PUTU32(ctx->Yi.c + 12, ctr);
1378
#  endif
1379
                else
1380
                    ctx->Yi.d[3] = ctr;
1381
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1382
                    size_t c = in[i];
1383
                    out[i] = c ^ ctx->EKi.t[i];
1384
                    ctx->Xi.t[i] ^= c;
1385
                }
1386
                GCM_MUL(ctx);
1387
                out += 16;
1388
                in += 16;
1389
                len -= 16;
1390
            }
1391
# endif
1392
0
            if (len) {
1393
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1394
0
                ++ctr;
1395
0
                if (is_endian.little)
1396
0
# ifdef BSWAP4
1397
0
                    ctx->Yi.d[3] = BSWAP4(ctr);
1398
# else
1399
                    PUTU32(ctx->Yi.c + 12, ctr);
1400
# endif
1401
                else
1402
0
                    ctx->Yi.d[3] = ctr;
1403
0
# if defined(GHASH)
1404
0
                while (len--) {
1405
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1406
0
                    ++n;
1407
0
                }
1408
# else
1409
                while (len--) {
1410
                    u8 c = in[n];
1411
                    ctx->Xi.c[n] ^= c;
1412
                    out[n] = c ^ ctx->EKi.c[n];
1413
                    ++n;
1414
                }
1415
                mres = n;
1416
# endif
1417
            }
1418
0
1419
0
            ctx->mres = mres;
1420
0
            return 0;
1421
0
        } while (0);
1422
0
    }
1423
0
#endif
1424
0
    for (i = 0; i < len; ++i) {
1425
0
        u8 c;
1426
0
        if (n == 0) {
1427
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1428
0
            ++ctr;
1429
0
            if (is_endian.little)
1430
0
#ifdef BSWAP4
1431
0
                ctx->Yi.d[3] = BSWAP4(ctr);
1432
#else
1433
                PUTU32(ctx->Yi.c + 12, ctr);
1434
#endif
1435
            else
1436
0
                ctx->Yi.d[3] = ctr;
1437
0
        }
1438
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1439
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1440
0
        n = (n + 1) % 16;
1441
0
        if (mres == sizeof(ctx->Xn)) {
1442
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1443
0
            mres = 0;
1444
0
        }
1445
#else
1446
        c = in[i];
1447
        out[i] = c ^ ctx->EKi.c[n];
1448
        ctx->Xi.c[n] ^= c;
1449
        mres = n = (n + 1) % 16;
1450
        if (n == 0)
1451
            GCM_MUL(ctx);
1452
#endif
1453
    }
1454
0
1455
0
    ctx->mres = mres;
1456
0
    return 0;
1457
0
}
1458
1459
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1460
                                const unsigned char *in, unsigned char *out,
1461
                                size_t len, ctr128_f stream)
1462
0
{
1463
#if defined(OPENSSL_SMALL_FOOTPRINT)
1464
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1465
#else
1466
    const union {
1467
0
        long one;
1468
0
        char little;
1469
0
    } is_endian = { 1 };
1470
0
    unsigned int n, ctr, mres;
1471
0
    size_t i;
1472
0
    u64 mlen = ctx->len.u[1];
1473
0
    void *key = ctx->key;
1474
0
# ifdef GCM_FUNCREF_4BIT
1475
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1476
0
#  ifdef GHASH
1477
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1478
0
                         const u8 *inp, size_t len) = ctx->ghash;
1479
0
#  endif
1480
0
# endif
1481
0
1482
0
    mlen += len;
1483
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1484
0
        return -1;
1485
0
    ctx->len.u[1] = mlen;
1486
0
1487
0
    mres = ctx->mres;
1488
0
1489
0
    if (ctx->ares) {
1490
0
        /* First call to encrypt finalizes GHASH(AAD) */
1491
0
#if defined(GHASH)
1492
0
        if (len == 0) {
1493
0
            GCM_MUL(ctx);
1494
0
            ctx->ares = 0;
1495
0
            return 0;
1496
0
        }
1497
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1498
0
        ctx->Xi.u[0] = 0;
1499
0
        ctx->Xi.u[1] = 0;
1500
0
        mres = sizeof(ctx->Xi);
1501
#else
1502
        GCM_MUL(ctx);
1503
#endif
1504
        ctx->ares = 0;
1505
0
    }
1506
0
1507
0
    if (is_endian.little)
1508
0
# ifdef BSWAP4
1509
0
        ctr = BSWAP4(ctx->Yi.d[3]);
1510
# else
1511
        ctr = GETU32(ctx->Yi.c + 12);
1512
# endif
1513
    else
1514
0
        ctr = ctx->Yi.d[3];
1515
0
1516
0
    n = mres % 16;
1517
0
    if (n) {
1518
0
# if defined(GHASH)
1519
0
        while (n && len) {
1520
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1521
0
            --len;
1522
0
            n = (n + 1) % 16;
1523
0
        }
1524
0
        if (n == 0) {
1525
0
            GHASH(ctx, ctx->Xn, mres);
1526
0
            mres = 0;
1527
0
        } else {
1528
0
            ctx->mres = mres;
1529
0
            return 0;
1530
0
        }
1531
# else
1532
        while (n && len) {
1533
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1534
            --len;
1535
            n = (n + 1) % 16;
1536
        }
1537
        if (n == 0) {
1538
            GCM_MUL(ctx);
1539
            mres = 0;
1540
        } else {
1541
            ctx->mres = n;
1542
            return 0;
1543
        }
1544
# endif
1545
    }
1546
0
# if defined(GHASH)
1547
0
        if (len >= 16 && mres) {
1548
0
            GHASH(ctx, ctx->Xn, mres);
1549
0
            mres = 0;
1550
0
        }
1551
0
#  if defined(GHASH_CHUNK)
1552
0
    while (len >= GHASH_CHUNK) {
1553
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1554
0
        ctr += GHASH_CHUNK / 16;
1555
0
        if (is_endian.little)
1556
0
#   ifdef BSWAP4
1557
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1558
#   else
1559
            PUTU32(ctx->Yi.c + 12, ctr);
1560
#   endif
1561
        else
1562
0
            ctx->Yi.d[3] = ctr;
1563
0
        GHASH(ctx, out, GHASH_CHUNK);
1564
0
        out += GHASH_CHUNK;
1565
0
        in += GHASH_CHUNK;
1566
0
        len -= GHASH_CHUNK;
1567
0
    }
1568
0
#  endif
1569
0
# endif
1570
0
    if ((i = (len & (size_t)-16))) {
1571
0
        size_t j = i / 16;
1572
0
1573
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1574
0
        ctr += (unsigned int)j;
1575
0
        if (is_endian.little)
1576
0
# ifdef BSWAP4
1577
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1578
# else
1579
            PUTU32(ctx->Yi.c + 12, ctr);
1580
# endif
1581
        else
1582
0
            ctx->Yi.d[3] = ctr;
1583
0
        in += i;
1584
0
        len -= i;
1585
0
# if defined(GHASH)
1586
0
        GHASH(ctx, out, i);
1587
0
        out += i;
1588
# else
1589
        while (j--) {
1590
            for (i = 0; i < 16; ++i)
1591
                ctx->Xi.c[i] ^= out[i];
1592
            GCM_MUL(ctx);
1593
            out += 16;
1594
        }
1595
# endif
1596
    }
1597
0
    if (len) {
1598
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1599
0
        ++ctr;
1600
0
        if (is_endian.little)
1601
0
# ifdef BSWAP4
1602
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1603
# else
1604
            PUTU32(ctx->Yi.c + 12, ctr);
1605
# endif
1606
        else
1607
0
            ctx->Yi.d[3] = ctr;
1608
0
        while (len--) {
1609
0
# if defined(GHASH)
1610
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1611
# else
1612
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1613
# endif
1614
            ++n;
1615
0
        }
1616
0
    }
1617
0
1618
0
    ctx->mres = mres;
1619
0
    return 0;
1620
0
#endif
1621
0
}
1622
1623
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1624
                                const unsigned char *in, unsigned char *out,
1625
                                size_t len, ctr128_f stream)
1626
0
{
1627
#if defined(OPENSSL_SMALL_FOOTPRINT)
1628
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1629
#else
1630
    const union {
1631
0
        long one;
1632
0
        char little;
1633
0
    } is_endian = { 1 };
1634
0
    unsigned int n, ctr, mres;
1635
0
    size_t i;
1636
0
    u64 mlen = ctx->len.u[1];
1637
0
    void *key = ctx->key;
1638
0
# ifdef GCM_FUNCREF_4BIT
1639
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1640
0
#  ifdef GHASH
1641
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1642
0
                         const u8 *inp, size_t len) = ctx->ghash;
1643
0
#  endif
1644
0
# endif
1645
0
1646
0
    mlen += len;
1647
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1648
0
        return -1;
1649
0
    ctx->len.u[1] = mlen;
1650
0
1651
0
    mres = ctx->mres;
1652
0
1653
0
    if (ctx->ares) {
1654
0
        /* First call to decrypt finalizes GHASH(AAD) */
1655
0
# if defined(GHASH)
1656
0
        if (len == 0) {
1657
0
            GCM_MUL(ctx);
1658
0
            ctx->ares = 0;
1659
0
            return 0;
1660
0
        }
1661
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1662
0
        ctx->Xi.u[0] = 0;
1663
0
        ctx->Xi.u[1] = 0;
1664
0
        mres = sizeof(ctx->Xi);
1665
# else
1666
        GCM_MUL(ctx);
1667
# endif
1668
        ctx->ares = 0;
1669
0
    }
1670
0
1671
0
    if (is_endian.little)
1672
0
# ifdef BSWAP4
1673
0
        ctr = BSWAP4(ctx->Yi.d[3]);
1674
# else
1675
        ctr = GETU32(ctx->Yi.c + 12);
1676
# endif
1677
    else
1678
0
        ctr = ctx->Yi.d[3];
1679
0
1680
0
    n = mres % 16;
1681
0
    if (n) {
1682
0
# if defined(GHASH)
1683
0
        while (n && len) {
1684
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1685
0
            --len;
1686
0
            n = (n + 1) % 16;
1687
0
        }
1688
0
        if (n == 0) {
1689
0
            GHASH(ctx, ctx->Xn, mres);
1690
0
            mres = 0;
1691
0
        } else {
1692
0
            ctx->mres = mres;
1693
0
            return 0;
1694
0
        }
1695
# else
1696
        while (n && len) {
1697
            u8 c = *(in++);
1698
            *(out++) = c ^ ctx->EKi.c[n];
1699
            ctx->Xi.c[n] ^= c;
1700
            --len;
1701
            n = (n + 1) % 16;
1702
        }
1703
        if (n == 0) {
1704
            GCM_MUL(ctx);
1705
            mres = 0;
1706
        } else {
1707
            ctx->mres = n;
1708
            return 0;
1709
        }
1710
# endif
1711
    }
1712
0
# if defined(GHASH)
1713
0
    if (len >= 16 && mres) {
1714
0
        GHASH(ctx, ctx->Xn, mres);
1715
0
        mres = 0;
1716
0
    }
1717
0
#  if defined(GHASH_CHUNK)
1718
0
    while (len >= GHASH_CHUNK) {
1719
0
        GHASH(ctx, in, GHASH_CHUNK);
1720
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1721
0
        ctr += GHASH_CHUNK / 16;
1722
0
        if (is_endian.little)
1723
0
#   ifdef BSWAP4
1724
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1725
#   else
1726
            PUTU32(ctx->Yi.c + 12, ctr);
1727
#   endif
1728
        else
1729
0
            ctx->Yi.d[3] = ctr;
1730
0
        out += GHASH_CHUNK;
1731
0
        in += GHASH_CHUNK;
1732
0
        len -= GHASH_CHUNK;
1733
0
    }
1734
0
#  endif
1735
0
# endif
1736
0
    if ((i = (len & (size_t)-16))) {
1737
0
        size_t j = i / 16;
1738
0
1739
0
# if defined(GHASH)
1740
0
        GHASH(ctx, in, i);
1741
# else
1742
        while (j--) {
1743
            size_t k;
1744
            for (k = 0; k < 16; ++k)
1745
                ctx->Xi.c[k] ^= in[k];
1746
            GCM_MUL(ctx);
1747
            in += 16;
1748
        }
1749
        j = i / 16;
1750
        in -= i;
1751
# endif
1752
        (*stream) (in, out, j, key, ctx->Yi.c);
1753
0
        ctr += (unsigned int)j;
1754
0
        if (is_endian.little)
1755
0
# ifdef BSWAP4
1756
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1757
# else
1758
            PUTU32(ctx->Yi.c + 12, ctr);
1759
# endif
1760
        else
1761
0
            ctx->Yi.d[3] = ctr;
1762
0
        out += i;
1763
0
        in += i;
1764
0
        len -= i;
1765
0
    }
1766
0
    if (len) {
1767
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1768
0
        ++ctr;
1769
0
        if (is_endian.little)
1770
0
# ifdef BSWAP4
1771
0
            ctx->Yi.d[3] = BSWAP4(ctr);
1772
# else
1773
            PUTU32(ctx->Yi.c + 12, ctr);
1774
# endif
1775
        else
1776
0
            ctx->Yi.d[3] = ctr;
1777
0
        while (len--) {
1778
0
# if defined(GHASH)
1779
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1780
# else
1781
            u8 c = in[n];
1782
            ctx->Xi.c[mres++] ^= c;
1783
            out[n] = c ^ ctx->EKi.c[n];
1784
# endif
1785
            ++n;
1786
0
        }
1787
0
    }
1788
0
1789
0
    ctx->mres = mres;
1790
0
    return 0;
1791
0
#endif
1792
0
}
1793
1794
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1795
                         size_t len)
1796
0
{
1797
0
    const union {
1798
0
        long one;
1799
0
        char little;
1800
0
    } is_endian = { 1 };
1801
0
    u64 alen = ctx->len.u[0] << 3;
1802
0
    u64 clen = ctx->len.u[1] << 3;
1803
0
#ifdef GCM_FUNCREF_4BIT
1804
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1805
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1806
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1807
0
                         const u8 *inp, size_t len) = ctx->ghash;
1808
0
# endif
1809
0
#endif
1810
0
1811
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1812
0
    u128 bitlen;
1813
0
    unsigned int mres = ctx->mres;
1814
0
1815
0
    if (mres) {
1816
0
        unsigned blocks = (mres + 15) & -16;
1817
0
1818
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1819
0
        mres = blocks;
1820
0
        if (mres == sizeof(ctx->Xn)) {
1821
0
            GHASH(ctx, ctx->Xn, mres);
1822
0
            mres = 0;
1823
0
        }
1824
0
    } else if (ctx->ares) {
1825
0
        GCM_MUL(ctx);
1826
0
    }
1827
#else
1828
    if (ctx->mres || ctx->ares)
1829
        GCM_MUL(ctx);
1830
#endif
1831
1832
0
    if (is_endian.little) {
1833
0
#ifdef BSWAP8
1834
0
        alen = BSWAP8(alen);
1835
0
        clen = BSWAP8(clen);
1836
#else
1837
        u8 *p = ctx->len.c;
1838
1839
        ctx->len.u[0] = alen;
1840
        ctx->len.u[1] = clen;
1841
1842
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1843
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1844
#endif
1845
    }
1846
0
1847
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1848
0
    bitlen.hi = alen;
1849
0
    bitlen.lo = clen;
1850
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1851
0
    mres += sizeof(bitlen);
1852
0
    GHASH(ctx, ctx->Xn, mres);
1853
#else
1854
    ctx->Xi.u[0] ^= alen;
1855
    ctx->Xi.u[1] ^= clen;
1856
    GCM_MUL(ctx);
1857
#endif
1858
1859
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1860
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1861
0
1862
0
    if (tag && len <= sizeof(ctx->Xi))
1863
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1864
0
    else
1865
0
        return -1;
1866
0
}
1867
1868
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1869
0
{
1870
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1871
0
    memcpy(tag, ctx->Xi.c,
1872
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1873
0
}
1874
1875
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1876
0
{
1877
0
    GCM128_CONTEXT *ret;
1878
0
1879
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1880
0
        CRYPTO_gcm128_init(ret, key, block);
1881
0
1882
0
    return ret;
1883
0
}
1884
1885
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1886
0
{
1887
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1888
0
}