Coverage Report

Created: 2025-07-01 06:23

/src/irssi/subprojects/openssl-1.1.1l/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <openssl/crypto.h>
11
#include "modes_local.h"
12
#include <string.h>
13
14
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
15
typedef size_t size_t_aX __attribute((__aligned__(1)));
16
#else
17
typedef size_t size_t_aX;
18
#endif
19
20
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
21
/* redefine, because alignment is ensured */
22
# undef  GETU32
23
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
24
# undef  PUTU32
25
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
26
#endif
27
28
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
29
0
#define REDUCE1BIT(V)   do { \
30
0
        if (sizeof(size_t)==8) { \
31
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
32
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
33
0
                V.hi  = (V.hi>>1 )^T; \
34
0
        } \
35
0
        else { \
36
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
37
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
38
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
39
0
        } \
40
0
} while(0)
41
42
/*-
43
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
44
 * never be set to 8. 8 is effectively reserved for testing purposes.
45
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
46
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
47
 * whole spectrum of possible table driven implementations. Why? In
48
 * non-"Shoup's" case memory access pattern is segmented in such manner,
49
 * that it's trivial to see that cache timing information can reveal
50
 * fair portion of intermediate hash value. Given that ciphertext is
51
 * always available to attacker, it's possible for him to attempt to
52
 * deduce secret parameter H and if successful, tamper with messages
53
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
54
 * not as trivial, but there is no reason to believe that it's resistant
55
 * to cache-timing attack. And the thing about "8-bit" implementation is
56
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
57
 * key + 1KB shared. Well, on pros side it should be twice as fast as
58
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
59
 * was observed to run ~75% faster, closer to 100% for commercial
60
 * compilers... Yet "4-bit" procedure is preferred, because it's
61
 * believed to provide better security-performance balance and adequate
62
 * all-round performance. "All-round" refers to things like:
63
 *
64
 * - shorter setup time effectively improves overall timing for
65
 *   handling short messages;
66
 * - larger table allocation can become unbearable because of VM
67
 *   subsystem penalties (for example on Windows large enough free
68
 *   results in VM working set trimming, meaning that consequent
69
 *   malloc would immediately incur working set expansion);
70
 * - larger table has larger cache footprint, which can affect
71
 *   performance of other code paths (not necessarily even from same
72
 *   thread in Hyper-Threading world);
73
 *
74
 * Value of 1 is not appropriate for performance reasons.
75
 */
76
#if     TABLE_BITS==8
77
78
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
79
{
80
    int i, j;
81
    u128 V;
82
83
    Htable[0].hi = 0;
84
    Htable[0].lo = 0;
85
    V.hi = H[0];
86
    V.lo = H[1];
87
88
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
89
        REDUCE1BIT(V);
90
        Htable[i] = V;
91
    }
92
93
    for (i = 2; i < 256; i <<= 1) {
94
        u128 *Hi = Htable + i, H0 = *Hi;
95
        for (j = 1; j < i; ++j) {
96
            Hi[j].hi = H0.hi ^ Htable[j].hi;
97
            Hi[j].lo = H0.lo ^ Htable[j].lo;
98
        }
99
    }
100
}
101
102
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
103
{
104
    u128 Z = { 0, 0 };
105
    const u8 *xi = (const u8 *)Xi + 15;
106
    size_t rem, n = *xi;
107
    const union {
108
        long one;
109
        char little;
110
    } is_endian = { 1 };
111
    static const size_t rem_8bit[256] = {
112
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
113
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
114
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
115
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
116
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
117
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
118
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
119
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
120
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
121
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
122
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
123
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
124
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
125
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
126
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
127
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
128
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
129
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
130
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
131
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
132
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
133
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
134
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
135
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
136
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
137
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
138
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
139
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
140
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
141
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
142
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
143
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
144
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
145
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
146
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
147
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
148
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
149
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
150
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
151
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
152
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
153
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
154
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
155
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
156
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
157
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
158
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
159
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
160
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
161
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
162
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
163
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
164
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
165
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
166
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
167
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
168
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
169
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
170
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
171
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
172
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
173
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
174
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
175
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
176
    };
177
178
    while (1) {
179
        Z.hi ^= Htable[n].hi;
180
        Z.lo ^= Htable[n].lo;
181
182
        if ((u8 *)Xi == xi)
183
            break;
184
185
        n = *(--xi);
186
187
        rem = (size_t)Z.lo & 0xff;
188
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
189
        Z.hi = (Z.hi >> 8);
190
        if (sizeof(size_t) == 8)
191
            Z.hi ^= rem_8bit[rem];
192
        else
193
            Z.hi ^= (u64)rem_8bit[rem] << 32;
194
    }
195
196
    if (is_endian.little) {
197
# ifdef BSWAP8
198
        Xi[0] = BSWAP8(Z.hi);
199
        Xi[1] = BSWAP8(Z.lo);
200
# else
201
        u8 *p = (u8 *)Xi;
202
        u32 v;
203
        v = (u32)(Z.hi >> 32);
204
        PUTU32(p, v);
205
        v = (u32)(Z.hi);
206
        PUTU32(p + 4, v);
207
        v = (u32)(Z.lo >> 32);
208
        PUTU32(p + 8, v);
209
        v = (u32)(Z.lo);
210
        PUTU32(p + 12, v);
211
# endif
212
    } else {
213
        Xi[0] = Z.hi;
214
        Xi[1] = Z.lo;
215
    }
216
}
217
218
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
219
220
#elif   TABLE_BITS==4
221
222
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
223
0
{
224
0
    u128 V;
225
# if defined(OPENSSL_SMALL_FOOTPRINT)
226
    int i;
227
# endif
228
229
0
    Htable[0].hi = 0;
230
0
    Htable[0].lo = 0;
231
0
    V.hi = H[0];
232
0
    V.lo = H[1];
233
234
# if defined(OPENSSL_SMALL_FOOTPRINT)
235
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
236
        REDUCE1BIT(V);
237
        Htable[i] = V;
238
    }
239
240
    for (i = 2; i < 16; i <<= 1) {
241
        u128 *Hi = Htable + i;
242
        int j;
243
        for (V = *Hi, j = 1; j < i; ++j) {
244
            Hi[j].hi = V.hi ^ Htable[j].hi;
245
            Hi[j].lo = V.lo ^ Htable[j].lo;
246
        }
247
    }
248
# else
249
0
    Htable[8] = V;
250
0
    REDUCE1BIT(V);
251
0
    Htable[4] = V;
252
0
    REDUCE1BIT(V);
253
0
    Htable[2] = V;
254
0
    REDUCE1BIT(V);
255
0
    Htable[1] = V;
256
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
257
0
    V = Htable[4];
258
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
259
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
260
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
261
0
    V = Htable[8];
262
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
263
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
264
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
265
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
266
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
267
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
268
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
269
0
# endif
270
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
271
    /*
272
     * ARM assembler expects specific dword order in Htable.
273
     */
274
    {
275
        int j;
276
        const union {
277
            long one;
278
            char little;
279
        } is_endian = { 1 };
280
281
        if (is_endian.little)
282
            for (j = 0; j < 16; ++j) {
283
                V = Htable[j];
284
                Htable[j].hi = V.lo;
285
                Htable[j].lo = V.hi;
286
        } else
287
            for (j = 0; j < 16; ++j) {
288
                V = Htable[j];
289
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
290
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
291
            }
292
    }
293
# endif
294
0
}
295
296
# ifndef GHASH_ASM
297
static const size_t rem_4bit[16] = {
298
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
299
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
300
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
301
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
302
};
303
304
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
305
0
{
306
0
    u128 Z;
307
0
    int cnt = 15;
308
0
    size_t rem, nlo, nhi;
309
0
    const union {
310
0
        long one;
311
0
        char little;
312
0
    } is_endian = { 1 };
313
314
0
    nlo = ((const u8 *)Xi)[15];
315
0
    nhi = nlo >> 4;
316
0
    nlo &= 0xf;
317
318
0
    Z.hi = Htable[nlo].hi;
319
0
    Z.lo = Htable[nlo].lo;
320
321
0
    while (1) {
322
0
        rem = (size_t)Z.lo & 0xf;
323
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
324
0
        Z.hi = (Z.hi >> 4);
325
0
        if (sizeof(size_t) == 8)
326
0
            Z.hi ^= rem_4bit[rem];
327
0
        else
328
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
329
330
0
        Z.hi ^= Htable[nhi].hi;
331
0
        Z.lo ^= Htable[nhi].lo;
332
333
0
        if (--cnt < 0)
334
0
            break;
335
336
0
        nlo = ((const u8 *)Xi)[cnt];
337
0
        nhi = nlo >> 4;
338
0
        nlo &= 0xf;
339
340
0
        rem = (size_t)Z.lo & 0xf;
341
0
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
342
0
        Z.hi = (Z.hi >> 4);
343
0
        if (sizeof(size_t) == 8)
344
0
            Z.hi ^= rem_4bit[rem];
345
0
        else
346
0
            Z.hi ^= (u64)rem_4bit[rem] << 32;
347
348
0
        Z.hi ^= Htable[nlo].hi;
349
0
        Z.lo ^= Htable[nlo].lo;
350
0
    }
351
352
0
    if (is_endian.little) {
353
#  ifdef BSWAP8
354
        Xi[0] = BSWAP8(Z.hi);
355
        Xi[1] = BSWAP8(Z.lo);
356
#  else
357
0
        u8 *p = (u8 *)Xi;
358
0
        u32 v;
359
0
        v = (u32)(Z.hi >> 32);
360
0
        PUTU32(p, v);
361
0
        v = (u32)(Z.hi);
362
0
        PUTU32(p + 4, v);
363
0
        v = (u32)(Z.lo >> 32);
364
0
        PUTU32(p + 8, v);
365
0
        v = (u32)(Z.lo);
366
0
        PUTU32(p + 12, v);
367
0
#  endif
368
0
    } else {
369
0
        Xi[0] = Z.hi;
370
0
        Xi[1] = Z.lo;
371
0
    }
372
0
}
373
374
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
375
/*
376
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
377
 * details... Compiler-generated code doesn't seem to give any
378
 * performance improvement, at least not on x86[_64]. It's here
379
 * mostly as reference and a placeholder for possible future
380
 * non-trivial optimization[s]...
381
 */
382
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
383
                           const u8 *inp, size_t len)
384
0
{
385
0
    u128 Z;
386
0
    int cnt;
387
0
    size_t rem, nlo, nhi;
388
0
    const union {
389
0
        long one;
390
0
        char little;
391
0
    } is_endian = { 1 };
392
393
0
#   if 1
394
0
    do {
395
0
        cnt = 15;
396
0
        nlo = ((const u8 *)Xi)[15];
397
0
        nlo ^= inp[15];
398
0
        nhi = nlo >> 4;
399
0
        nlo &= 0xf;
400
401
0
        Z.hi = Htable[nlo].hi;
402
0
        Z.lo = Htable[nlo].lo;
403
404
0
        while (1) {
405
0
            rem = (size_t)Z.lo & 0xf;
406
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
407
0
            Z.hi = (Z.hi >> 4);
408
0
            if (sizeof(size_t) == 8)
409
0
                Z.hi ^= rem_4bit[rem];
410
0
            else
411
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
412
413
0
            Z.hi ^= Htable[nhi].hi;
414
0
            Z.lo ^= Htable[nhi].lo;
415
416
0
            if (--cnt < 0)
417
0
                break;
418
419
0
            nlo = ((const u8 *)Xi)[cnt];
420
0
            nlo ^= inp[cnt];
421
0
            nhi = nlo >> 4;
422
0
            nlo &= 0xf;
423
424
0
            rem = (size_t)Z.lo & 0xf;
425
0
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
426
0
            Z.hi = (Z.hi >> 4);
427
0
            if (sizeof(size_t) == 8)
428
0
                Z.hi ^= rem_4bit[rem];
429
0
            else
430
0
                Z.hi ^= (u64)rem_4bit[rem] << 32;
431
432
0
            Z.hi ^= Htable[nlo].hi;
433
0
            Z.lo ^= Htable[nlo].lo;
434
0
        }
435
#   else
436
    /*
437
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
438
     * [should] give ~50% improvement... One could have PACK()-ed
439
     * the rem_8bit even here, but the priority is to minimize
440
     * cache footprint...
441
     */
442
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
443
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
444
    static const unsigned short rem_8bit[256] = {
445
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
446
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
447
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
448
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
449
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
450
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
451
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
452
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
453
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
454
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
455
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
456
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
457
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
458
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
459
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
460
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
461
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
462
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
463
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
464
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
465
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
466
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
467
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
468
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
469
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
470
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
471
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
472
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
473
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
474
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
475
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
476
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
477
    };
478
    /*
479
     * This pre-processing phase slows down procedure by approximately
480
     * same time as it makes each loop spin faster. In other words
481
     * single block performance is approximately same as straightforward
482
     * "4-bit" implementation, and then it goes only faster...
483
     */
484
    for (cnt = 0; cnt < 16; ++cnt) {
485
        Z.hi = Htable[cnt].hi;
486
        Z.lo = Htable[cnt].lo;
487
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
488
        Hshr4[cnt].hi = (Z.hi >> 4);
489
        Hshl4[cnt] = (u8)(Z.lo << 4);
490
    }
491
492
    do {
493
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
494
            nlo = ((const u8 *)Xi)[cnt];
495
            nlo ^= inp[cnt];
496
            nhi = nlo >> 4;
497
            nlo &= 0xf;
498
499
            Z.hi ^= Htable[nlo].hi;
500
            Z.lo ^= Htable[nlo].lo;
501
502
            rem = (size_t)Z.lo & 0xff;
503
504
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
505
            Z.hi = (Z.hi >> 8);
506
507
            Z.hi ^= Hshr4[nhi].hi;
508
            Z.lo ^= Hshr4[nhi].lo;
509
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
510
        }
511
512
        nlo = ((const u8 *)Xi)[0];
513
        nlo ^= inp[0];
514
        nhi = nlo >> 4;
515
        nlo &= 0xf;
516
517
        Z.hi ^= Htable[nlo].hi;
518
        Z.lo ^= Htable[nlo].lo;
519
520
        rem = (size_t)Z.lo & 0xf;
521
522
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
523
        Z.hi = (Z.hi >> 4);
524
525
        Z.hi ^= Htable[nhi].hi;
526
        Z.lo ^= Htable[nhi].lo;
527
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
528
#   endif
529
530
0
        if (is_endian.little) {
531
#   ifdef BSWAP8
532
            Xi[0] = BSWAP8(Z.hi);
533
            Xi[1] = BSWAP8(Z.lo);
534
#   else
535
0
            u8 *p = (u8 *)Xi;
536
0
            u32 v;
537
0
            v = (u32)(Z.hi >> 32);
538
0
            PUTU32(p, v);
539
0
            v = (u32)(Z.hi);
540
0
            PUTU32(p + 4, v);
541
0
            v = (u32)(Z.lo >> 32);
542
0
            PUTU32(p + 8, v);
543
0
            v = (u32)(Z.lo);
544
0
            PUTU32(p + 12, v);
545
0
#   endif
546
0
        } else {
547
0
            Xi[0] = Z.hi;
548
0
            Xi[1] = Z.lo;
549
0
        }
550
0
    } while (inp += 16, len -= 16);
551
0
}
552
#  endif
553
# else
554
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
555
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
556
                    size_t len);
557
# endif
558
559
0
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
560
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
561
0
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
562
/*
563
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
564
 * effect. In other words idea is to hash data while it's still in L1 cache
565
 * after encryption pass...
566
 */
567
0
#  define GHASH_CHUNK       (3*1024)
568
# endif
569
570
#else                           /* TABLE_BITS */
571
572
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
573
{
574
    u128 V, Z = { 0, 0 };
575
    long X;
576
    int i, j;
577
    const long *xi = (const long *)Xi;
578
    const union {
579
        long one;
580
        char little;
581
    } is_endian = { 1 };
582
583
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
584
    V.lo = H[1];
585
586
    for (j = 0; j < 16 / sizeof(long); ++j) {
587
        if (is_endian.little) {
588
            if (sizeof(long) == 8) {
589
# ifdef BSWAP8
590
                X = (long)(BSWAP8(xi[j]));
591
# else
592
                const u8 *p = (const u8 *)(xi + j);
593
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
594
# endif
595
            } else {
596
                const u8 *p = (const u8 *)(xi + j);
597
                X = (long)GETU32(p);
598
            }
599
        } else
600
            X = xi[j];
601
602
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
603
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
604
            Z.hi ^= V.hi & M;
605
            Z.lo ^= V.lo & M;
606
607
            REDUCE1BIT(V);
608
        }
609
    }
610
611
    if (is_endian.little) {
612
# ifdef BSWAP8
613
        Xi[0] = BSWAP8(Z.hi);
614
        Xi[1] = BSWAP8(Z.lo);
615
# else
616
        u8 *p = (u8 *)Xi;
617
        u32 v;
618
        v = (u32)(Z.hi >> 32);
619
        PUTU32(p, v);
620
        v = (u32)(Z.hi);
621
        PUTU32(p + 4, v);
622
        v = (u32)(Z.lo >> 32);
623
        PUTU32(p + 8, v);
624
        v = (u32)(Z.lo);
625
        PUTU32(p + 12, v);
626
# endif
627
    } else {
628
        Xi[0] = Z.hi;
629
        Xi[1] = Z.lo;
630
    }
631
}
632
633
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
634
635
#endif
636
637
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
638
# if    !defined(I386_ONLY) && \
639
        (defined(__i386)        || defined(__i386__)    || \
640
         defined(__x86_64)      || defined(__x86_64__)  || \
641
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
642
#  define GHASH_ASM_X86_OR_64
643
#  define GCM_FUNCREF_4BIT
644
extern unsigned int OPENSSL_ia32cap_P[];
645
646
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
647
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
648
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
649
                     size_t len);
650
651
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
652
#   define gcm_init_avx   gcm_init_clmul
653
#   define gcm_gmult_avx  gcm_gmult_clmul
654
#   define gcm_ghash_avx  gcm_ghash_clmul
655
#  else
656
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
657
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
658
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
659
                   size_t len);
660
#  endif
661
662
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
663
#   define GHASH_ASM_X86
664
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
665
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
666
                        size_t len);
667
668
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                        size_t len);
671
#  endif
672
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
673
#  include "arm_arch.h"
674
#  if __ARM_MAX_ARCH__>=7
675
#   define GHASH_ASM_ARM
676
#   define GCM_FUNCREF_4BIT
677
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
678
#   if defined(__arm__) || defined(__arm)
679
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
680
#   endif
681
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
682
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
683
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
684
                    size_t len);
685
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
686
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
687
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
688
                  size_t len);
689
#  endif
690
# elif defined(__sparc__) || defined(__sparc)
691
#  include "sparc_arch.h"
692
#  define GHASH_ASM_SPARC
693
#  define GCM_FUNCREF_4BIT
694
extern unsigned int OPENSSL_sparcv9cap_P[];
695
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
696
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
697
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
698
                    size_t len);
699
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
700
#  include "ppc_arch.h"
701
#  define GHASH_ASM_PPC
702
#  define GCM_FUNCREF_4BIT
703
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
704
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
705
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
706
                  size_t len);
707
# endif
708
#endif
709
710
#ifdef GCM_FUNCREF_4BIT
711
# undef  GCM_MUL
712
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713
# ifdef GHASH
714
#  undef  GHASH
715
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716
# endif
717
#endif
718
719
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
720
0
{
721
0
    const union {
722
0
        long one;
723
0
        char little;
724
0
    } is_endian = { 1 };
725
726
0
    memset(ctx, 0, sizeof(*ctx));
727
0
    ctx->block = block;
728
0
    ctx->key = key;
729
730
0
    (*block) (ctx->H.c, ctx->H.c, key);
731
732
0
    if (is_endian.little) {
733
        /* H is stored in host byte order */
734
#ifdef BSWAP8
735
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
736
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
737
#else
738
0
        u8 *p = ctx->H.c;
739
0
        u64 hi, lo;
740
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
741
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
742
0
        ctx->H.u[0] = hi;
743
0
        ctx->H.u[1] = lo;
744
0
#endif
745
0
    }
746
#if     TABLE_BITS==8
747
    gcm_init_8bit(ctx->Htable, ctx->H.u);
748
#elif   TABLE_BITS==4
749
# if    defined(GHASH)
750
0
#  define CTX__GHASH(f) (ctx->ghash = (f))
751
# else
752
#  define CTX__GHASH(f) (ctx->ghash = NULL)
753
# endif
754
# if    defined(GHASH_ASM_X86_OR_64)
755
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
756
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
757
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
758
            gcm_init_avx(ctx->Htable, ctx->H.u);
759
            ctx->gmult = gcm_gmult_avx;
760
            CTX__GHASH(gcm_ghash_avx);
761
        } else {
762
            gcm_init_clmul(ctx->Htable, ctx->H.u);
763
            ctx->gmult = gcm_gmult_clmul;
764
            CTX__GHASH(gcm_ghash_clmul);
765
        }
766
        return;
767
    }
768
#  endif
769
    gcm_init_4bit(ctx->Htable, ctx->H.u);
770
#  if   defined(GHASH_ASM_X86)  /* x86 only */
771
#   if  defined(OPENSSL_IA32_SSE2)
772
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
773
#   else
774
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
775
#   endif
776
        ctx->gmult = gcm_gmult_4bit_mmx;
777
        CTX__GHASH(gcm_ghash_4bit_mmx);
778
    } else {
779
        ctx->gmult = gcm_gmult_4bit_x86;
780
        CTX__GHASH(gcm_ghash_4bit_x86);
781
    }
782
#  else
783
    ctx->gmult = gcm_gmult_4bit;
784
    CTX__GHASH(gcm_ghash_4bit);
785
#  endif
786
# elif  defined(GHASH_ASM_ARM)
787
#  ifdef PMULL_CAPABLE
788
    if (PMULL_CAPABLE) {
789
        gcm_init_v8(ctx->Htable, ctx->H.u);
790
        ctx->gmult = gcm_gmult_v8;
791
        CTX__GHASH(gcm_ghash_v8);
792
    } else
793
#  endif
794
#  ifdef NEON_CAPABLE
795
    if (NEON_CAPABLE) {
796
        gcm_init_neon(ctx->Htable, ctx->H.u);
797
        ctx->gmult = gcm_gmult_neon;
798
        CTX__GHASH(gcm_ghash_neon);
799
    } else
800
#  endif
801
    {
802
        gcm_init_4bit(ctx->Htable, ctx->H.u);
803
        ctx->gmult = gcm_gmult_4bit;
804
        CTX__GHASH(gcm_ghash_4bit);
805
    }
806
# elif  defined(GHASH_ASM_SPARC)
807
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
808
        gcm_init_vis3(ctx->Htable, ctx->H.u);
809
        ctx->gmult = gcm_gmult_vis3;
810
        CTX__GHASH(gcm_ghash_vis3);
811
    } else {
812
        gcm_init_4bit(ctx->Htable, ctx->H.u);
813
        ctx->gmult = gcm_gmult_4bit;
814
        CTX__GHASH(gcm_ghash_4bit);
815
    }
816
# elif  defined(GHASH_ASM_PPC)
817
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
818
        gcm_init_p8(ctx->Htable, ctx->H.u);
819
        ctx->gmult = gcm_gmult_p8;
820
        CTX__GHASH(gcm_ghash_p8);
821
    } else {
822
        gcm_init_4bit(ctx->Htable, ctx->H.u);
823
        ctx->gmult = gcm_gmult_4bit;
824
        CTX__GHASH(gcm_ghash_4bit);
825
    }
826
# else
827
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
828
0
# endif
829
0
# undef CTX__GHASH
830
0
#endif
831
0
}
832
833
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
834
                         size_t len)
835
0
{
836
0
    const union {
837
0
        long one;
838
0
        char little;
839
0
    } is_endian = { 1 };
840
0
    unsigned int ctr;
841
#ifdef GCM_FUNCREF_4BIT
842
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
843
#endif
844
845
0
    ctx->len.u[0] = 0;          /* AAD length */
846
0
    ctx->len.u[1] = 0;          /* message length */
847
0
    ctx->ares = 0;
848
0
    ctx->mres = 0;
849
850
0
    if (len == 12) {
851
0
        memcpy(ctx->Yi.c, iv, 12);
852
0
        ctx->Yi.c[12] = 0;
853
0
        ctx->Yi.c[13] = 0;
854
0
        ctx->Yi.c[14] = 0;
855
0
        ctx->Yi.c[15] = 1;
856
0
        ctr = 1;
857
0
    } else {
858
0
        size_t i;
859
0
        u64 len0 = len;
860
861
        /* Borrow ctx->Xi to calculate initial Yi */
862
0
        ctx->Xi.u[0] = 0;
863
0
        ctx->Xi.u[1] = 0;
864
865
0
        while (len >= 16) {
866
0
            for (i = 0; i < 16; ++i)
867
0
                ctx->Xi.c[i] ^= iv[i];
868
0
            GCM_MUL(ctx);
869
0
            iv += 16;
870
0
            len -= 16;
871
0
        }
872
0
        if (len) {
873
0
            for (i = 0; i < len; ++i)
874
0
                ctx->Xi.c[i] ^= iv[i];
875
0
            GCM_MUL(ctx);
876
0
        }
877
0
        len0 <<= 3;
878
0
        if (is_endian.little) {
879
#ifdef BSWAP8
880
            ctx->Xi.u[1] ^= BSWAP8(len0);
881
#else
882
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
883
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
884
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
885
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
886
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
887
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
888
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
889
0
            ctx->Xi.c[15] ^= (u8)(len0);
890
0
#endif
891
0
        } else {
892
0
            ctx->Xi.u[1] ^= len0;
893
0
        }
894
895
0
        GCM_MUL(ctx);
896
897
0
        if (is_endian.little)
898
#ifdef BSWAP4
899
            ctr = BSWAP4(ctx->Xi.d[3]);
900
#else
901
0
            ctr = GETU32(ctx->Xi.c + 12);
902
0
#endif
903
0
        else
904
0
            ctr = ctx->Xi.d[3];
905
906
        /* Copy borrowed Xi to Yi */
907
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
908
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
909
0
    }
910
911
0
    ctx->Xi.u[0] = 0;
912
0
    ctx->Xi.u[1] = 0;
913
914
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
915
0
    ++ctr;
916
0
    if (is_endian.little)
917
#ifdef BSWAP4
918
        ctx->Yi.d[3] = BSWAP4(ctr);
919
#else
920
0
        PUTU32(ctx->Yi.c + 12, ctr);
921
0
#endif
922
0
    else
923
0
        ctx->Yi.d[3] = ctr;
924
0
}
925
926
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
927
                      size_t len)
928
0
{
929
0
    size_t i;
930
0
    unsigned int n;
931
0
    u64 alen = ctx->len.u[0];
932
#ifdef GCM_FUNCREF_4BIT
933
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
934
# ifdef GHASH
935
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
936
                         const u8 *inp, size_t len) = ctx->ghash;
937
# endif
938
#endif
939
940
0
    if (ctx->len.u[1])
941
0
        return -2;
942
943
0
    alen += len;
944
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
945
0
        return -1;
946
0
    ctx->len.u[0] = alen;
947
948
0
    n = ctx->ares;
949
0
    if (n) {
950
0
        while (n && len) {
951
0
            ctx->Xi.c[n] ^= *(aad++);
952
0
            --len;
953
0
            n = (n + 1) % 16;
954
0
        }
955
0
        if (n == 0)
956
0
            GCM_MUL(ctx);
957
0
        else {
958
0
            ctx->ares = n;
959
0
            return 0;
960
0
        }
961
0
    }
962
0
#ifdef GHASH
963
0
    if ((i = (len & (size_t)-16))) {
964
0
        GHASH(ctx, aad, i);
965
0
        aad += i;
966
0
        len -= i;
967
0
    }
968
#else
969
    while (len >= 16) {
970
        for (i = 0; i < 16; ++i)
971
            ctx->Xi.c[i] ^= aad[i];
972
        GCM_MUL(ctx);
973
        aad += 16;
974
        len -= 16;
975
    }
976
#endif
977
0
    if (len) {
978
0
        n = (unsigned int)len;
979
0
        for (i = 0; i < len; ++i)
980
0
            ctx->Xi.c[i] ^= aad[i];
981
0
    }
982
983
0
    ctx->ares = n;
984
0
    return 0;
985
0
}
986
987
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
988
                          const unsigned char *in, unsigned char *out,
989
                          size_t len)
990
0
{
991
0
    const union {
992
0
        long one;
993
0
        char little;
994
0
    } is_endian = { 1 };
995
0
    unsigned int n, ctr, mres;
996
0
    size_t i;
997
0
    u64 mlen = ctx->len.u[1];
998
0
    block128_f block = ctx->block;
999
0
    void *key = ctx->key;
1000
#ifdef GCM_FUNCREF_4BIT
1001
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1002
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1003
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1004
                         const u8 *inp, size_t len) = ctx->ghash;
1005
# endif
1006
#endif
1007
1008
0
    mlen += len;
1009
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1010
0
        return -1;
1011
0
    ctx->len.u[1] = mlen;
1012
1013
0
    mres = ctx->mres;
1014
1015
0
    if (ctx->ares) {
1016
        /* First call to encrypt finalizes GHASH(AAD) */
1017
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1018
0
        if (len == 0) {
1019
0
            GCM_MUL(ctx);
1020
0
            ctx->ares = 0;
1021
0
            return 0;
1022
0
        }
1023
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1024
0
        ctx->Xi.u[0] = 0;
1025
0
        ctx->Xi.u[1] = 0;
1026
0
        mres = sizeof(ctx->Xi);
1027
#else
1028
        GCM_MUL(ctx);
1029
#endif
1030
0
        ctx->ares = 0;
1031
0
    }
1032
1033
0
    if (is_endian.little)
1034
#ifdef BSWAP4
1035
        ctr = BSWAP4(ctx->Yi.d[3]);
1036
#else
1037
0
        ctr = GETU32(ctx->Yi.c + 12);
1038
0
#endif
1039
0
    else
1040
0
        ctr = ctx->Yi.d[3];
1041
1042
0
    n = mres % 16;
1043
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1044
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1045
0
        do {
1046
0
            if (n) {
1047
0
# if defined(GHASH)
1048
0
                while (n && len) {
1049
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1050
0
                    --len;
1051
0
                    n = (n + 1) % 16;
1052
0
                }
1053
0
                if (n == 0) {
1054
0
                    GHASH(ctx, ctx->Xn, mres);
1055
0
                    mres = 0;
1056
0
                } else {
1057
0
                    ctx->mres = mres;
1058
0
                    return 0;
1059
0
                }
1060
# else
1061
                while (n && len) {
1062
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1063
                    --len;
1064
                    n = (n + 1) % 16;
1065
                }
1066
                if (n == 0) {
1067
                    GCM_MUL(ctx);
1068
                    mres = 0;
1069
                } else {
1070
                    ctx->mres = n;
1071
                    return 0;
1072
                }
1073
# endif
1074
0
            }
1075
# if defined(STRICT_ALIGNMENT)
1076
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1077
                break;
1078
# endif
1079
0
# if defined(GHASH)
1080
0
            if (len >= 16 && mres) {
1081
0
                GHASH(ctx, ctx->Xn, mres);
1082
0
                mres = 0;
1083
0
            }
1084
0
#  if defined(GHASH_CHUNK)
1085
0
            while (len >= GHASH_CHUNK) {
1086
0
                size_t j = GHASH_CHUNK;
1087
1088
0
                while (j) {
1089
0
                    size_t_aX *out_t = (size_t_aX *)out;
1090
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1091
1092
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1093
0
                    ++ctr;
1094
0
                    if (is_endian.little)
1095
#   ifdef BSWAP4
1096
                        ctx->Yi.d[3] = BSWAP4(ctr);
1097
#   else
1098
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1099
0
#   endif
1100
0
                    else
1101
0
                        ctx->Yi.d[3] = ctr;
1102
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1103
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1104
0
                    out += 16;
1105
0
                    in += 16;
1106
0
                    j -= 16;
1107
0
                }
1108
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1109
0
                len -= GHASH_CHUNK;
1110
0
            }
1111
0
#  endif
1112
0
            if ((i = (len & (size_t)-16))) {
1113
0
                size_t j = i;
1114
1115
0
                while (len >= 16) {
1116
0
                    size_t_aX *out_t = (size_t_aX *)out;
1117
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1118
1119
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1120
0
                    ++ctr;
1121
0
                    if (is_endian.little)
1122
#  ifdef BSWAP4
1123
                        ctx->Yi.d[3] = BSWAP4(ctr);
1124
#  else
1125
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1126
0
#  endif
1127
0
                    else
1128
0
                        ctx->Yi.d[3] = ctr;
1129
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1130
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1131
0
                    out += 16;
1132
0
                    in += 16;
1133
0
                    len -= 16;
1134
0
                }
1135
0
                GHASH(ctx, out - j, j);
1136
0
            }
1137
# else
1138
            while (len >= 16) {
1139
                size_t *out_t = (size_t *)out;
1140
                const size_t *in_t = (const size_t *)in;
1141
1142
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143
                ++ctr;
1144
                if (is_endian.little)
1145
#  ifdef BSWAP4
1146
                    ctx->Yi.d[3] = BSWAP4(ctr);
1147
#  else
1148
                    PUTU32(ctx->Yi.c + 12, ctr);
1149
#  endif
1150
                else
1151
                    ctx->Yi.d[3] = ctr;
1152
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1153
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1154
                GCM_MUL(ctx);
1155
                out += 16;
1156
                in += 16;
1157
                len -= 16;
1158
            }
1159
# endif
1160
0
            if (len) {
1161
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1162
0
                ++ctr;
1163
0
                if (is_endian.little)
1164
# ifdef BSWAP4
1165
                    ctx->Yi.d[3] = BSWAP4(ctr);
1166
# else
1167
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1168
0
# endif
1169
0
                else
1170
0
                    ctx->Yi.d[3] = ctr;
1171
0
# if defined(GHASH)
1172
0
                while (len--) {
1173
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1174
0
                    ++n;
1175
0
                }
1176
# else
1177
                while (len--) {
1178
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1179
                    ++n;
1180
                }
1181
                mres = n;
1182
# endif
1183
0
            }
1184
1185
0
            ctx->mres = mres;
1186
0
            return 0;
1187
0
        } while (0);
1188
0
    }
1189
0
#endif
1190
0
    for (i = 0; i < len; ++i) {
1191
0
        if (n == 0) {
1192
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1193
0
            ++ctr;
1194
0
            if (is_endian.little)
1195
#ifdef BSWAP4
1196
                ctx->Yi.d[3] = BSWAP4(ctr);
1197
#else
1198
0
                PUTU32(ctx->Yi.c + 12, ctr);
1199
0
#endif
1200
0
            else
1201
0
                ctx->Yi.d[3] = ctr;
1202
0
        }
1203
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1204
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1205
0
        n = (n + 1) % 16;
1206
0
        if (mres == sizeof(ctx->Xn)) {
1207
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1208
0
            mres = 0;
1209
0
        }
1210
#else
1211
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1212
        mres = n = (n + 1) % 16;
1213
        if (n == 0)
1214
            GCM_MUL(ctx);
1215
#endif
1216
0
    }
1217
1218
0
    ctx->mres = mres;
1219
0
    return 0;
1220
0
}
1221
1222
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1223
                          const unsigned char *in, unsigned char *out,
1224
                          size_t len)
1225
0
{
1226
0
    const union {
1227
0
        long one;
1228
0
        char little;
1229
0
    } is_endian = { 1 };
1230
0
    unsigned int n, ctr, mres;
1231
0
    size_t i;
1232
0
    u64 mlen = ctx->len.u[1];
1233
0
    block128_f block = ctx->block;
1234
0
    void *key = ctx->key;
1235
#ifdef GCM_FUNCREF_4BIT
1236
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1237
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1238
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1239
                         const u8 *inp, size_t len) = ctx->ghash;
1240
# endif
1241
#endif
1242
1243
0
    mlen += len;
1244
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1245
0
        return -1;
1246
0
    ctx->len.u[1] = mlen;
1247
1248
0
    mres = ctx->mres;
1249
1250
0
    if (ctx->ares) {
1251
        /* First call to decrypt finalizes GHASH(AAD) */
1252
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1253
0
        if (len == 0) {
1254
0
            GCM_MUL(ctx);
1255
0
            ctx->ares = 0;
1256
0
            return 0;
1257
0
        }
1258
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1259
0
        ctx->Xi.u[0] = 0;
1260
0
        ctx->Xi.u[1] = 0;
1261
0
        mres = sizeof(ctx->Xi);
1262
#else
1263
        GCM_MUL(ctx);
1264
#endif
1265
0
        ctx->ares = 0;
1266
0
    }
1267
1268
0
    if (is_endian.little)
1269
#ifdef BSWAP4
1270
        ctr = BSWAP4(ctx->Yi.d[3]);
1271
#else
1272
0
        ctr = GETU32(ctx->Yi.c + 12);
1273
0
#endif
1274
0
    else
1275
0
        ctr = ctx->Yi.d[3];
1276
1277
0
    n = mres % 16;
1278
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1279
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1280
0
        do {
1281
0
            if (n) {
1282
0
# if defined(GHASH)
1283
0
                while (n && len) {
1284
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1285
0
                    --len;
1286
0
                    n = (n + 1) % 16;
1287
0
                }
1288
0
                if (n == 0) {
1289
0
                    GHASH(ctx, ctx->Xn, mres);
1290
0
                    mres = 0;
1291
0
                } else {
1292
0
                    ctx->mres = mres;
1293
0
                    return 0;
1294
0
                }
1295
# else
1296
                while (n && len) {
1297
                    u8 c = *(in++);
1298
                    *(out++) = c ^ ctx->EKi.c[n];
1299
                    ctx->Xi.c[n] ^= c;
1300
                    --len;
1301
                    n = (n + 1) % 16;
1302
                }
1303
                if (n == 0) {
1304
                    GCM_MUL(ctx);
1305
                    mres = 0;
1306
                } else {
1307
                    ctx->mres = n;
1308
                    return 0;
1309
                }
1310
# endif
1311
0
            }
1312
# if defined(STRICT_ALIGNMENT)
1313
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1314
                break;
1315
# endif
1316
0
# if defined(GHASH)
1317
0
            if (len >= 16 && mres) {
1318
0
                GHASH(ctx, ctx->Xn, mres);
1319
0
                mres = 0;
1320
0
            }
1321
0
#  if defined(GHASH_CHUNK)
1322
0
            while (len >= GHASH_CHUNK) {
1323
0
                size_t j = GHASH_CHUNK;
1324
1325
0
                GHASH(ctx, in, GHASH_CHUNK);
1326
0
                while (j) {
1327
0
                    size_t_aX *out_t = (size_t_aX *)out;
1328
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1329
1330
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1331
0
                    ++ctr;
1332
0
                    if (is_endian.little)
1333
#   ifdef BSWAP4
1334
                        ctx->Yi.d[3] = BSWAP4(ctr);
1335
#   else
1336
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1337
0
#   endif
1338
0
                    else
1339
0
                        ctx->Yi.d[3] = ctr;
1340
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1341
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1342
0
                    out += 16;
1343
0
                    in += 16;
1344
0
                    j -= 16;
1345
0
                }
1346
0
                len -= GHASH_CHUNK;
1347
0
            }
1348
0
#  endif
1349
0
            if ((i = (len & (size_t)-16))) {
1350
0
                GHASH(ctx, in, i);
1351
0
                while (len >= 16) {
1352
0
                    size_t_aX *out_t = (size_t_aX *)out;
1353
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1354
1355
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1356
0
                    ++ctr;
1357
0
                    if (is_endian.little)
1358
#  ifdef BSWAP4
1359
                        ctx->Yi.d[3] = BSWAP4(ctr);
1360
#  else
1361
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1362
0
#  endif
1363
0
                    else
1364
0
                        ctx->Yi.d[3] = ctr;
1365
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1366
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1367
0
                    out += 16;
1368
0
                    in += 16;
1369
0
                    len -= 16;
1370
0
                }
1371
0
            }
1372
# else
1373
            while (len >= 16) {
1374
                size_t *out_t = (size_t *)out;
1375
                const size_t *in_t = (const size_t *)in;
1376
1377
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1378
                ++ctr;
1379
                if (is_endian.little)
1380
#  ifdef BSWAP4
1381
                    ctx->Yi.d[3] = BSWAP4(ctr);
1382
#  else
1383
                    PUTU32(ctx->Yi.c + 12, ctr);
1384
#  endif
1385
                else
1386
                    ctx->Yi.d[3] = ctr;
1387
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1388
                    size_t c = in_t[i];
1389
                    out_t[i] = c ^ ctx->EKi.t[i];
1390
                    ctx->Xi.t[i] ^= c;
1391
                }
1392
                GCM_MUL(ctx);
1393
                out += 16;
1394
                in += 16;
1395
                len -= 16;
1396
            }
1397
# endif
1398
0
            if (len) {
1399
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1400
0
                ++ctr;
1401
0
                if (is_endian.little)
1402
# ifdef BSWAP4
1403
                    ctx->Yi.d[3] = BSWAP4(ctr);
1404
# else
1405
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1406
0
# endif
1407
0
                else
1408
0
                    ctx->Yi.d[3] = ctr;
1409
0
# if defined(GHASH)
1410
0
                while (len--) {
1411
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1412
0
                    ++n;
1413
0
                }
1414
# else
1415
                while (len--) {
1416
                    u8 c = in[n];
1417
                    ctx->Xi.c[n] ^= c;
1418
                    out[n] = c ^ ctx->EKi.c[n];
1419
                    ++n;
1420
                }
1421
                mres = n;
1422
# endif
1423
0
            }
1424
1425
0
            ctx->mres = mres;
1426
0
            return 0;
1427
0
        } while (0);
1428
0
    }
1429
0
#endif
1430
0
    for (i = 0; i < len; ++i) {
1431
0
        u8 c;
1432
0
        if (n == 0) {
1433
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1434
0
            ++ctr;
1435
0
            if (is_endian.little)
1436
#ifdef BSWAP4
1437
                ctx->Yi.d[3] = BSWAP4(ctr);
1438
#else
1439
0
                PUTU32(ctx->Yi.c + 12, ctr);
1440
0
#endif
1441
0
            else
1442
0
                ctx->Yi.d[3] = ctr;
1443
0
        }
1444
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1445
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1446
0
        n = (n + 1) % 16;
1447
0
        if (mres == sizeof(ctx->Xn)) {
1448
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1449
0
            mres = 0;
1450
0
        }
1451
#else
1452
        c = in[i];
1453
        out[i] = c ^ ctx->EKi.c[n];
1454
        ctx->Xi.c[n] ^= c;
1455
        mres = n = (n + 1) % 16;
1456
        if (n == 0)
1457
            GCM_MUL(ctx);
1458
#endif
1459
0
    }
1460
1461
0
    ctx->mres = mres;
1462
0
    return 0;
1463
0
}
1464
1465
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1466
                                const unsigned char *in, unsigned char *out,
1467
                                size_t len, ctr128_f stream)
1468
0
{
1469
#if defined(OPENSSL_SMALL_FOOTPRINT)
1470
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1471
#else
1472
0
    const union {
1473
0
        long one;
1474
0
        char little;
1475
0
    } is_endian = { 1 };
1476
0
    unsigned int n, ctr, mres;
1477
0
    size_t i;
1478
0
    u64 mlen = ctx->len.u[1];
1479
0
    void *key = ctx->key;
1480
# ifdef GCM_FUNCREF_4BIT
1481
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1482
#  ifdef GHASH
1483
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1484
                         const u8 *inp, size_t len) = ctx->ghash;
1485
#  endif
1486
# endif
1487
1488
0
    mlen += len;
1489
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1490
0
        return -1;
1491
0
    ctx->len.u[1] = mlen;
1492
1493
0
    mres = ctx->mres;
1494
1495
0
    if (ctx->ares) {
1496
        /* First call to encrypt finalizes GHASH(AAD) */
1497
0
#if defined(GHASH)
1498
0
        if (len == 0) {
1499
0
            GCM_MUL(ctx);
1500
0
            ctx->ares = 0;
1501
0
            return 0;
1502
0
        }
1503
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1504
0
        ctx->Xi.u[0] = 0;
1505
0
        ctx->Xi.u[1] = 0;
1506
0
        mres = sizeof(ctx->Xi);
1507
#else
1508
        GCM_MUL(ctx);
1509
#endif
1510
0
        ctx->ares = 0;
1511
0
    }
1512
1513
0
    if (is_endian.little)
1514
# ifdef BSWAP4
1515
        ctr = BSWAP4(ctx->Yi.d[3]);
1516
# else
1517
0
        ctr = GETU32(ctx->Yi.c + 12);
1518
0
# endif
1519
0
    else
1520
0
        ctr = ctx->Yi.d[3];
1521
1522
0
    n = mres % 16;
1523
0
    if (n) {
1524
0
# if defined(GHASH)
1525
0
        while (n && len) {
1526
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1527
0
            --len;
1528
0
            n = (n + 1) % 16;
1529
0
        }
1530
0
        if (n == 0) {
1531
0
            GHASH(ctx, ctx->Xn, mres);
1532
0
            mres = 0;
1533
0
        } else {
1534
0
            ctx->mres = mres;
1535
0
            return 0;
1536
0
        }
1537
# else
1538
        while (n && len) {
1539
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1540
            --len;
1541
            n = (n + 1) % 16;
1542
        }
1543
        if (n == 0) {
1544
            GCM_MUL(ctx);
1545
            mres = 0;
1546
        } else {
1547
            ctx->mres = n;
1548
            return 0;
1549
        }
1550
# endif
1551
0
    }
1552
0
# if defined(GHASH)
1553
0
        if (len >= 16 && mres) {
1554
0
            GHASH(ctx, ctx->Xn, mres);
1555
0
            mres = 0;
1556
0
        }
1557
0
#  if defined(GHASH_CHUNK)
1558
0
    while (len >= GHASH_CHUNK) {
1559
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1560
0
        ctr += GHASH_CHUNK / 16;
1561
0
        if (is_endian.little)
1562
#   ifdef BSWAP4
1563
            ctx->Yi.d[3] = BSWAP4(ctr);
1564
#   else
1565
0
            PUTU32(ctx->Yi.c + 12, ctr);
1566
0
#   endif
1567
0
        else
1568
0
            ctx->Yi.d[3] = ctr;
1569
0
        GHASH(ctx, out, GHASH_CHUNK);
1570
0
        out += GHASH_CHUNK;
1571
0
        in += GHASH_CHUNK;
1572
0
        len -= GHASH_CHUNK;
1573
0
    }
1574
0
#  endif
1575
0
# endif
1576
0
    if ((i = (len & (size_t)-16))) {
1577
0
        size_t j = i / 16;
1578
1579
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1580
0
        ctr += (unsigned int)j;
1581
0
        if (is_endian.little)
1582
# ifdef BSWAP4
1583
            ctx->Yi.d[3] = BSWAP4(ctr);
1584
# else
1585
0
            PUTU32(ctx->Yi.c + 12, ctr);
1586
0
# endif
1587
0
        else
1588
0
            ctx->Yi.d[3] = ctr;
1589
0
        in += i;
1590
0
        len -= i;
1591
0
# if defined(GHASH)
1592
0
        GHASH(ctx, out, i);
1593
0
        out += i;
1594
# else
1595
        while (j--) {
1596
            for (i = 0; i < 16; ++i)
1597
                ctx->Xi.c[i] ^= out[i];
1598
            GCM_MUL(ctx);
1599
            out += 16;
1600
        }
1601
# endif
1602
0
    }
1603
0
    if (len) {
1604
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1605
0
        ++ctr;
1606
0
        if (is_endian.little)
1607
# ifdef BSWAP4
1608
            ctx->Yi.d[3] = BSWAP4(ctr);
1609
# else
1610
0
            PUTU32(ctx->Yi.c + 12, ctr);
1611
0
# endif
1612
0
        else
1613
0
            ctx->Yi.d[3] = ctr;
1614
0
        while (len--) {
1615
0
# if defined(GHASH)
1616
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1617
# else
1618
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1619
# endif
1620
0
            ++n;
1621
0
        }
1622
0
    }
1623
1624
0
    ctx->mres = mres;
1625
0
    return 0;
1626
0
#endif
1627
0
}
1628
1629
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1630
                                const unsigned char *in, unsigned char *out,
1631
                                size_t len, ctr128_f stream)
1632
0
{
1633
#if defined(OPENSSL_SMALL_FOOTPRINT)
1634
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1635
#else
1636
0
    const union {
1637
0
        long one;
1638
0
        char little;
1639
0
    } is_endian = { 1 };
1640
0
    unsigned int n, ctr, mres;
1641
0
    size_t i;
1642
0
    u64 mlen = ctx->len.u[1];
1643
0
    void *key = ctx->key;
1644
# ifdef GCM_FUNCREF_4BIT
1645
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1646
#  ifdef GHASH
1647
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1648
                         const u8 *inp, size_t len) = ctx->ghash;
1649
#  endif
1650
# endif
1651
1652
0
    mlen += len;
1653
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1654
0
        return -1;
1655
0
    ctx->len.u[1] = mlen;
1656
1657
0
    mres = ctx->mres;
1658
1659
0
    if (ctx->ares) {
1660
        /* First call to decrypt finalizes GHASH(AAD) */
1661
0
# if defined(GHASH)
1662
0
        if (len == 0) {
1663
0
            GCM_MUL(ctx);
1664
0
            ctx->ares = 0;
1665
0
            return 0;
1666
0
        }
1667
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1668
0
        ctx->Xi.u[0] = 0;
1669
0
        ctx->Xi.u[1] = 0;
1670
0
        mres = sizeof(ctx->Xi);
1671
# else
1672
        GCM_MUL(ctx);
1673
# endif
1674
0
        ctx->ares = 0;
1675
0
    }
1676
1677
0
    if (is_endian.little)
1678
# ifdef BSWAP4
1679
        ctr = BSWAP4(ctx->Yi.d[3]);
1680
# else
1681
0
        ctr = GETU32(ctx->Yi.c + 12);
1682
0
# endif
1683
0
    else
1684
0
        ctr = ctx->Yi.d[3];
1685
1686
0
    n = mres % 16;
1687
0
    if (n) {
1688
0
# if defined(GHASH)
1689
0
        while (n && len) {
1690
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1691
0
            --len;
1692
0
            n = (n + 1) % 16;
1693
0
        }
1694
0
        if (n == 0) {
1695
0
            GHASH(ctx, ctx->Xn, mres);
1696
0
            mres = 0;
1697
0
        } else {
1698
0
            ctx->mres = mres;
1699
0
            return 0;
1700
0
        }
1701
# else
1702
        while (n && len) {
1703
            u8 c = *(in++);
1704
            *(out++) = c ^ ctx->EKi.c[n];
1705
            ctx->Xi.c[n] ^= c;
1706
            --len;
1707
            n = (n + 1) % 16;
1708
        }
1709
        if (n == 0) {
1710
            GCM_MUL(ctx);
1711
            mres = 0;
1712
        } else {
1713
            ctx->mres = n;
1714
            return 0;
1715
        }
1716
# endif
1717
0
    }
1718
0
# if defined(GHASH)
1719
0
    if (len >= 16 && mres) {
1720
0
        GHASH(ctx, ctx->Xn, mres);
1721
0
        mres = 0;
1722
0
    }
1723
0
#  if defined(GHASH_CHUNK)
1724
0
    while (len >= GHASH_CHUNK) {
1725
0
        GHASH(ctx, in, GHASH_CHUNK);
1726
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1727
0
        ctr += GHASH_CHUNK / 16;
1728
0
        if (is_endian.little)
1729
#   ifdef BSWAP4
1730
            ctx->Yi.d[3] = BSWAP4(ctr);
1731
#   else
1732
0
            PUTU32(ctx->Yi.c + 12, ctr);
1733
0
#   endif
1734
0
        else
1735
0
            ctx->Yi.d[3] = ctr;
1736
0
        out += GHASH_CHUNK;
1737
0
        in += GHASH_CHUNK;
1738
0
        len -= GHASH_CHUNK;
1739
0
    }
1740
0
#  endif
1741
0
# endif
1742
0
    if ((i = (len & (size_t)-16))) {
1743
0
        size_t j = i / 16;
1744
1745
0
# if defined(GHASH)
1746
0
        GHASH(ctx, in, i);
1747
# else
1748
        while (j--) {
1749
            size_t k;
1750
            for (k = 0; k < 16; ++k)
1751
                ctx->Xi.c[k] ^= in[k];
1752
            GCM_MUL(ctx);
1753
            in += 16;
1754
        }
1755
        j = i / 16;
1756
        in -= i;
1757
# endif
1758
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1759
0
        ctr += (unsigned int)j;
1760
0
        if (is_endian.little)
1761
# ifdef BSWAP4
1762
            ctx->Yi.d[3] = BSWAP4(ctr);
1763
# else
1764
0
            PUTU32(ctx->Yi.c + 12, ctr);
1765
0
# endif
1766
0
        else
1767
0
            ctx->Yi.d[3] = ctr;
1768
0
        out += i;
1769
0
        in += i;
1770
0
        len -= i;
1771
0
    }
1772
0
    if (len) {
1773
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1774
0
        ++ctr;
1775
0
        if (is_endian.little)
1776
# ifdef BSWAP4
1777
            ctx->Yi.d[3] = BSWAP4(ctr);
1778
# else
1779
0
            PUTU32(ctx->Yi.c + 12, ctr);
1780
0
# endif
1781
0
        else
1782
0
            ctx->Yi.d[3] = ctr;
1783
0
        while (len--) {
1784
0
# if defined(GHASH)
1785
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1786
# else
1787
            u8 c = in[n];
1788
            ctx->Xi.c[mres++] ^= c;
1789
            out[n] = c ^ ctx->EKi.c[n];
1790
# endif
1791
0
            ++n;
1792
0
        }
1793
0
    }
1794
1795
0
    ctx->mres = mres;
1796
0
    return 0;
1797
0
#endif
1798
0
}
1799
1800
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1801
                         size_t len)
1802
0
{
1803
0
    const union {
1804
0
        long one;
1805
0
        char little;
1806
0
    } is_endian = { 1 };
1807
0
    u64 alen = ctx->len.u[0] << 3;
1808
0
    u64 clen = ctx->len.u[1] << 3;
1809
#ifdef GCM_FUNCREF_4BIT
1810
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1811
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1812
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1813
                         const u8 *inp, size_t len) = ctx->ghash;
1814
# endif
1815
#endif
1816
1817
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
0
    u128 bitlen;
1819
0
    unsigned int mres = ctx->mres;
1820
1821
0
    if (mres) {
1822
0
        unsigned blocks = (mres + 15) & -16;
1823
1824
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1825
0
        mres = blocks;
1826
0
        if (mres == sizeof(ctx->Xn)) {
1827
0
            GHASH(ctx, ctx->Xn, mres);
1828
0
            mres = 0;
1829
0
        }
1830
0
    } else if (ctx->ares) {
1831
0
        GCM_MUL(ctx);
1832
0
    }
1833
#else
1834
    if (ctx->mres || ctx->ares)
1835
        GCM_MUL(ctx);
1836
#endif
1837
1838
0
    if (is_endian.little) {
1839
#ifdef BSWAP8
1840
        alen = BSWAP8(alen);
1841
        clen = BSWAP8(clen);
1842
#else
1843
0
        u8 *p = ctx->len.c;
1844
1845
0
        ctx->len.u[0] = alen;
1846
0
        ctx->len.u[1] = clen;
1847
1848
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1849
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1850
0
#endif
1851
0
    }
1852
1853
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1854
0
    bitlen.hi = alen;
1855
0
    bitlen.lo = clen;
1856
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1857
0
    mres += sizeof(bitlen);
1858
0
    GHASH(ctx, ctx->Xn, mres);
1859
#else
1860
    ctx->Xi.u[0] ^= alen;
1861
    ctx->Xi.u[1] ^= clen;
1862
    GCM_MUL(ctx);
1863
#endif
1864
1865
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1866
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1867
1868
0
    if (tag && len <= sizeof(ctx->Xi))
1869
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1870
0
    else
1871
0
        return -1;
1872
0
}
1873
1874
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1875
0
{
1876
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1877
0
    memcpy(tag, ctx->Xi.c,
1878
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1879
0
}
1880
1881
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1882
0
{
1883
0
    GCM128_CONTEXT *ret;
1884
1885
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1886
0
        CRYPTO_gcm128_init(ret, key, block);
1887
1888
0
    return ret;
1889
0
}
1890
1891
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1892
0
{
1893
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1894
0
}