Coverage Report

Created: 2025-11-16 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31
0
#define REDUCE1BIT(V)   do { \
32
0
        if (sizeof(size_t)==8) { \
33
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
35
0
                V.hi  = (V.hi>>1 )^T; \
36
0
        } \
37
0
        else { \
38
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41
0
        } \
42
0
} while(0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if     TABLE_BITS==8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
# ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
# else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
# endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219
#elif   TABLE_BITS==4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
# if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
# endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
# if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
# else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
# endif
269
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
        } else
283
            for (j = 0; j < 16; ++j) {
284
                V = Htable[j];
285
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287
            }
288
    }
289
# endif
290
0
}
291
292
# ifndef GHASH_ASM
293
static const size_t rem_4bit[16] = {
294
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298
};
299
300
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301
{
302
    u128 Z;
303
    int cnt = 15;
304
    size_t rem, nlo, nhi;
305
    DECLARE_IS_ENDIAN;
306
307
    nlo = ((const u8 *)Xi)[15];
308
    nhi = nlo >> 4;
309
    nlo &= 0xf;
310
311
    Z.hi = Htable[nlo].hi;
312
    Z.lo = Htable[nlo].lo;
313
314
    while (1) {
315
        rem = (size_t)Z.lo & 0xf;
316
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317
        Z.hi = (Z.hi >> 4);
318
        if (sizeof(size_t) == 8)
319
            Z.hi ^= rem_4bit[rem];
320
        else
321
            Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323
        Z.hi ^= Htable[nhi].hi;
324
        Z.lo ^= Htable[nhi].lo;
325
326
        if (--cnt < 0)
327
            break;
328
329
        nlo = ((const u8 *)Xi)[cnt];
330
        nhi = nlo >> 4;
331
        nlo &= 0xf;
332
333
        rem = (size_t)Z.lo & 0xf;
334
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335
        Z.hi = (Z.hi >> 4);
336
        if (sizeof(size_t) == 8)
337
            Z.hi ^= rem_4bit[rem];
338
        else
339
            Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341
        Z.hi ^= Htable[nlo].hi;
342
        Z.lo ^= Htable[nlo].lo;
343
    }
344
345
    if (IS_LITTLE_ENDIAN) {
346
#  ifdef BSWAP8
347
        Xi[0] = BSWAP8(Z.hi);
348
        Xi[1] = BSWAP8(Z.lo);
349
#  else
350
        u8 *p = (u8 *)Xi;
351
        u32 v;
352
        v = (u32)(Z.hi >> 32);
353
        PUTU32(p, v);
354
        v = (u32)(Z.hi);
355
        PUTU32(p + 4, v);
356
        v = (u32)(Z.lo >> 32);
357
        PUTU32(p + 8, v);
358
        v = (u32)(Z.lo);
359
        PUTU32(p + 12, v);
360
#  endif
361
    } else {
362
        Xi[0] = Z.hi;
363
        Xi[1] = Z.lo;
364
    }
365
}
366
367
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368
/*
369
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370
 * details... Compiler-generated code doesn't seem to give any
371
 * performance improvement, at least not on x86[_64]. It's here
372
 * mostly as reference and a placeholder for possible future
373
 * non-trivial optimization[s]...
374
 */
375
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376
                           const u8 *inp, size_t len)
377
{
378
    u128 Z;
379
    int cnt;
380
    size_t rem, nlo, nhi;
381
    DECLARE_IS_ENDIAN;
382
383
#   if 1
384
    do {
385
        cnt = 15;
386
        nlo = ((const u8 *)Xi)[15];
387
        nlo ^= inp[15];
388
        nhi = nlo >> 4;
389
        nlo &= 0xf;
390
391
        Z.hi = Htable[nlo].hi;
392
        Z.lo = Htable[nlo].lo;
393
394
        while (1) {
395
            rem = (size_t)Z.lo & 0xf;
396
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397
            Z.hi = (Z.hi >> 4);
398
            if (sizeof(size_t) == 8)
399
                Z.hi ^= rem_4bit[rem];
400
            else
401
                Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403
            Z.hi ^= Htable[nhi].hi;
404
            Z.lo ^= Htable[nhi].lo;
405
406
            if (--cnt < 0)
407
                break;
408
409
            nlo = ((const u8 *)Xi)[cnt];
410
            nlo ^= inp[cnt];
411
            nhi = nlo >> 4;
412
            nlo &= 0xf;
413
414
            rem = (size_t)Z.lo & 0xf;
415
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416
            Z.hi = (Z.hi >> 4);
417
            if (sizeof(size_t) == 8)
418
                Z.hi ^= rem_4bit[rem];
419
            else
420
                Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422
            Z.hi ^= Htable[nlo].hi;
423
            Z.lo ^= Htable[nlo].lo;
424
        }
425
#   else
426
    /*
427
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428
     * [should] give ~50% improvement... One could have PACK()-ed
429
     * the rem_8bit even here, but the priority is to minimize
430
     * cache footprint...
431
     */
432
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434
    static const unsigned short rem_8bit[256] = {
435
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467
    };
468
    /*
469
     * This pre-processing phase slows down procedure by approximately
470
     * same time as it makes each loop spin faster. In other words
471
     * single block performance is approximately same as straightforward
472
     * "4-bit" implementation, and then it goes only faster...
473
     */
474
    for (cnt = 0; cnt < 16; ++cnt) {
475
        Z.hi = Htable[cnt].hi;
476
        Z.lo = Htable[cnt].lo;
477
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478
        Hshr4[cnt].hi = (Z.hi >> 4);
479
        Hshl4[cnt] = (u8)(Z.lo << 4);
480
    }
481
482
    do {
483
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484
            nlo = ((const u8 *)Xi)[cnt];
485
            nlo ^= inp[cnt];
486
            nhi = nlo >> 4;
487
            nlo &= 0xf;
488
489
            Z.hi ^= Htable[nlo].hi;
490
            Z.lo ^= Htable[nlo].lo;
491
492
            rem = (size_t)Z.lo & 0xff;
493
494
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495
            Z.hi = (Z.hi >> 8);
496
497
            Z.hi ^= Hshr4[nhi].hi;
498
            Z.lo ^= Hshr4[nhi].lo;
499
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500
        }
501
502
        nlo = ((const u8 *)Xi)[0];
503
        nlo ^= inp[0];
504
        nhi = nlo >> 4;
505
        nlo &= 0xf;
506
507
        Z.hi ^= Htable[nlo].hi;
508
        Z.lo ^= Htable[nlo].lo;
509
510
        rem = (size_t)Z.lo & 0xf;
511
512
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513
        Z.hi = (Z.hi >> 4);
514
515
        Z.hi ^= Htable[nhi].hi;
516
        Z.lo ^= Htable[nhi].lo;
517
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518
#   endif
519
520
        if (IS_LITTLE_ENDIAN) {
521
#   ifdef BSWAP8
522
            Xi[0] = BSWAP8(Z.hi);
523
            Xi[1] = BSWAP8(Z.lo);
524
#   else
525
            u8 *p = (u8 *)Xi;
526
            u32 v;
527
            v = (u32)(Z.hi >> 32);
528
            PUTU32(p, v);
529
            v = (u32)(Z.hi);
530
            PUTU32(p + 4, v);
531
            v = (u32)(Z.lo >> 32);
532
            PUTU32(p + 8, v);
533
            v = (u32)(Z.lo);
534
            PUTU32(p + 12, v);
535
#   endif
536
        } else {
537
            Xi[0] = Z.hi;
538
            Xi[1] = Z.lo;
539
        }
540
    } while (inp += 16, len -= 16);
541
}
542
#  endif
543
# else
544
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546
                    size_t len);
547
# endif
548
549
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552
/*
553
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554
 * effect. In other words idea is to hash data while it's still in L1 cache
555
 * after encryption pass...
556
 */
557
4.54M
#  define GHASH_CHUNK       (3*1024)
558
# endif
559
560
#else                           /* TABLE_BITS */
561
562
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563
{
564
    u128 V, Z = { 0, 0 };
565
    long X;
566
    int i, j;
567
    const long *xi = (const long *)Xi;
568
    DECLARE_IS_ENDIAN;
569
570
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571
    V.lo = H[1];
572
573
    for (j = 0; j < 16 / sizeof(long); ++j) {
574
        if (IS_LITTLE_ENDIAN) {
575
            if (sizeof(long) == 8) {
576
# ifdef BSWAP8
577
                X = (long)(BSWAP8(xi[j]));
578
# else
579
                const u8 *p = (const u8 *)(xi + j);
580
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581
# endif
582
            } else {
583
                const u8 *p = (const u8 *)(xi + j);
584
                X = (long)GETU32(p);
585
            }
586
        } else
587
            X = xi[j];
588
589
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591
            Z.hi ^= V.hi & M;
592
            Z.lo ^= V.lo & M;
593
594
            REDUCE1BIT(V);
595
        }
596
    }
597
598
    if (IS_LITTLE_ENDIAN) {
599
# ifdef BSWAP8
600
        Xi[0] = BSWAP8(Z.hi);
601
        Xi[1] = BSWAP8(Z.lo);
602
# else
603
        u8 *p = (u8 *)Xi;
604
        u32 v;
605
        v = (u32)(Z.hi >> 32);
606
        PUTU32(p, v);
607
        v = (u32)(Z.hi);
608
        PUTU32(p + 4, v);
609
        v = (u32)(Z.lo >> 32);
610
        PUTU32(p + 8, v);
611
        v = (u32)(Z.lo);
612
        PUTU32(p + 12, v);
613
# endif
614
    } else {
615
        Xi[0] = Z.hi;
616
        Xi[1] = Z.lo;
617
    }
618
}
619
620
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622
#endif
623
624
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625
# if    !defined(I386_ONLY) && \
626
        (defined(__i386)        || defined(__i386__)    || \
627
         defined(__x86_64)      || defined(__x86_64__)  || \
628
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629
#  define GHASH_ASM_X86_OR_64
630
#  define GCM_FUNCREF_4BIT
631
632
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635
                     size_t len);
636
637
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638
#   define gcm_init_avx   gcm_init_clmul
639
#   define gcm_gmult_avx  gcm_gmult_clmul
640
#   define gcm_ghash_avx  gcm_ghash_clmul
641
#  else
642
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645
                   size_t len);
646
#  endif
647
648
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649
#   define GHASH_ASM_X86
650
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652
                        size_t len);
653
654
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656
                        size_t len);
657
#  endif
658
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659
#  include "arm_arch.h"
660
#  if __ARM_MAX_ARCH__>=7
661
#   define GHASH_ASM_ARM
662
#   define GCM_FUNCREF_4BIT
663
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664
#   if defined(__arm__) || defined(__arm)
665
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666
#   endif
667
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                    size_t len);
671
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674
                  size_t len);
675
#  endif
676
# elif defined(__sparc__) || defined(__sparc)
677
#  include "crypto/sparc_arch.h"
678
#  define GHASH_ASM_SPARC
679
#  define GCM_FUNCREF_4BIT
680
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683
                    size_t len);
684
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685
#  include "crypto/ppc_arch.h"
686
#  define GHASH_ASM_PPC
687
#  define GCM_FUNCREF_4BIT
688
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691
                  size_t len);
692
# endif
693
#endif
694
695
#ifdef GCM_FUNCREF_4BIT
696
# undef  GCM_MUL
697
533k
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698
# ifdef GHASH
699
#  undef  GHASH
700
6.80M
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701
# endif
702
#endif
703
704
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705
1.15k
{
706
1.15k
    DECLARE_IS_ENDIAN;
707
708
1.15k
    memset(ctx, 0, sizeof(*ctx));
709
1.15k
    ctx->block = block;
710
1.15k
    ctx->key = key;
711
712
1.15k
    (*block) (ctx->H.c, ctx->H.c, key);
713
714
1.15k
    if (IS_LITTLE_ENDIAN) {
715
        /* H is stored in host byte order */
716
#ifdef BSWAP8
717
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719
#else
720
1.15k
        u8 *p = ctx->H.c;
721
1.15k
        u64 hi, lo;
722
1.15k
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723
1.15k
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724
1.15k
        ctx->H.u[0] = hi;
725
1.15k
        ctx->H.u[1] = lo;
726
1.15k
#endif
727
1.15k
    }
728
#if     TABLE_BITS==8
729
    gcm_init_8bit(ctx->Htable, ctx->H.u);
730
#elif   TABLE_BITS==4
731
# if    defined(GHASH)
732
1.15k
#  define CTX__GHASH(f) (ctx->ghash = (f))
733
# else
734
#  define CTX__GHASH(f) (ctx->ghash = NULL)
735
# endif
736
1.15k
# if    defined(GHASH_ASM_X86_OR_64)
737
1.15k
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738
1.15k
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739
1.15k
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740
1.15k
            gcm_init_avx(ctx->Htable, ctx->H.u);
741
1.15k
            ctx->gmult = gcm_gmult_avx;
742
1.15k
            CTX__GHASH(gcm_ghash_avx);
743
1.15k
        } else {
744
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
745
0
            ctx->gmult = gcm_gmult_clmul;
746
0
            CTX__GHASH(gcm_ghash_clmul);
747
0
        }
748
1.15k
        return;
749
1.15k
    }
750
0
#  endif
751
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
752
#  if   defined(GHASH_ASM_X86)  /* x86 only */
753
#   if  defined(OPENSSL_IA32_SSE2)
754
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755
#   else
756
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757
#   endif
758
        ctx->gmult = gcm_gmult_4bit_mmx;
759
        CTX__GHASH(gcm_ghash_4bit_mmx);
760
    } else {
761
        ctx->gmult = gcm_gmult_4bit_x86;
762
        CTX__GHASH(gcm_ghash_4bit_x86);
763
    }
764
#  else
765
0
    ctx->gmult = gcm_gmult_4bit;
766
0
    CTX__GHASH(gcm_ghash_4bit);
767
0
#  endif
768
# elif  defined(GHASH_ASM_ARM)
769
#  ifdef PMULL_CAPABLE
770
    if (PMULL_CAPABLE) {
771
        gcm_init_v8(ctx->Htable, ctx->H.u);
772
        ctx->gmult = gcm_gmult_v8;
773
        CTX__GHASH(gcm_ghash_v8);
774
    } else
775
#  endif
776
#  ifdef NEON_CAPABLE
777
    if (NEON_CAPABLE) {
778
        gcm_init_neon(ctx->Htable, ctx->H.u);
779
        ctx->gmult = gcm_gmult_neon;
780
        CTX__GHASH(gcm_ghash_neon);
781
    } else
782
#  endif
783
    {
784
        gcm_init_4bit(ctx->Htable, ctx->H.u);
785
        ctx->gmult = gcm_gmult_4bit;
786
        CTX__GHASH(gcm_ghash_4bit);
787
    }
788
# elif  defined(GHASH_ASM_SPARC)
789
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790
        gcm_init_vis3(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_vis3;
792
        CTX__GHASH(gcm_ghash_vis3);
793
    } else {
794
        gcm_init_4bit(ctx->Htable, ctx->H.u);
795
        ctx->gmult = gcm_gmult_4bit;
796
        CTX__GHASH(gcm_ghash_4bit);
797
    }
798
# elif  defined(GHASH_ASM_PPC)
799
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800
        gcm_init_p8(ctx->Htable, ctx->H.u);
801
        ctx->gmult = gcm_gmult_p8;
802
        CTX__GHASH(gcm_ghash_p8);
803
    } else {
804
        gcm_init_4bit(ctx->Htable, ctx->H.u);
805
        ctx->gmult = gcm_gmult_4bit;
806
        CTX__GHASH(gcm_ghash_4bit);
807
    }
808
# else
809
    gcm_init_4bit(ctx->Htable, ctx->H.u);
810
# endif
811
0
# undef CTX__GHASH
812
0
#endif
813
0
}
814
815
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816
                         size_t len)
817
3.11M
{
818
3.11M
    DECLARE_IS_ENDIAN;
819
3.11M
    unsigned int ctr;
820
3.11M
#ifdef GCM_FUNCREF_4BIT
821
3.11M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822
3.11M
#endif
823
824
3.11M
    ctx->len.u[0] = 0;          /* AAD length */
825
3.11M
    ctx->len.u[1] = 0;          /* message length */
826
3.11M
    ctx->ares = 0;
827
3.11M
    ctx->mres = 0;
828
829
3.11M
    if (len == 12) {
830
3.11M
        memcpy(ctx->Yi.c, iv, 12);
831
3.11M
        ctx->Yi.c[12] = 0;
832
3.11M
        ctx->Yi.c[13] = 0;
833
3.11M
        ctx->Yi.c[14] = 0;
834
3.11M
        ctx->Yi.c[15] = 1;
835
3.11M
        ctr = 1;
836
3.11M
    } else {
837
0
        size_t i;
838
0
        u64 len0 = len;
839
840
        /* Borrow ctx->Xi to calculate initial Yi */
841
0
        ctx->Xi.u[0] = 0;
842
0
        ctx->Xi.u[1] = 0;
843
844
0
        while (len >= 16) {
845
0
            for (i = 0; i < 16; ++i)
846
0
                ctx->Xi.c[i] ^= iv[i];
847
0
            GCM_MUL(ctx);
848
0
            iv += 16;
849
0
            len -= 16;
850
0
        }
851
0
        if (len) {
852
0
            for (i = 0; i < len; ++i)
853
0
                ctx->Xi.c[i] ^= iv[i];
854
0
            GCM_MUL(ctx);
855
0
        }
856
0
        len0 <<= 3;
857
0
        if (IS_LITTLE_ENDIAN) {
858
#ifdef BSWAP8
859
            ctx->Xi.u[1] ^= BSWAP8(len0);
860
#else
861
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868
0
            ctx->Xi.c[15] ^= (u8)(len0);
869
0
#endif
870
0
        } else {
871
0
            ctx->Xi.u[1] ^= len0;
872
0
        }
873
874
0
        GCM_MUL(ctx);
875
876
0
        if (IS_LITTLE_ENDIAN)
877
#ifdef BSWAP4
878
            ctr = BSWAP4(ctx->Xi.d[3]);
879
#else
880
0
            ctr = GETU32(ctx->Xi.c + 12);
881
0
#endif
882
0
        else
883
0
            ctr = ctx->Xi.d[3];
884
885
        /* Copy borrowed Xi to Yi */
886
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
887
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
888
0
    }
889
890
3.11M
    ctx->Xi.u[0] = 0;
891
3.11M
    ctx->Xi.u[1] = 0;
892
893
3.11M
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894
3.11M
    ++ctr;
895
3.11M
    if (IS_LITTLE_ENDIAN)
896
#ifdef BSWAP4
897
        ctx->Yi.d[3] = BSWAP4(ctr);
898
#else
899
3.11M
        PUTU32(ctx->Yi.c + 12, ctr);
900
0
#endif
901
0
    else
902
0
        ctx->Yi.d[3] = ctr;
903
3.11M
}
904
905
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906
                      size_t len)
907
3.42M
{
908
3.42M
    size_t i;
909
3.42M
    unsigned int n;
910
3.42M
    u64 alen = ctx->len.u[0];
911
3.42M
#ifdef GCM_FUNCREF_4BIT
912
3.42M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913
3.42M
# ifdef GHASH
914
3.42M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915
3.42M
                         const u8 *inp, size_t len) = ctx->ghash;
916
3.42M
# endif
917
3.42M
#endif
918
919
3.42M
    if (ctx->len.u[1])
920
0
        return -2;
921
922
3.42M
    alen += len;
923
3.42M
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924
0
        return -1;
925
3.42M
    ctx->len.u[0] = alen;
926
927
3.42M
    n = ctx->ares;
928
3.42M
    if (n) {
929
1.04M
        while (n && len) {
930
771k
            ctx->Xi.c[n] ^= *(aad++);
931
771k
            --len;
932
771k
            n = (n + 1) % 16;
933
771k
        }
934
275k
        if (n == 0)
935
26.1k
            GCM_MUL(ctx);
936
249k
        else {
937
249k
            ctx->ares = n;
938
249k
            return 0;
939
249k
        }
940
275k
    }
941
3.17M
#ifdef GHASH
942
3.17M
    if ((i = (len & (size_t)-16))) {
943
1.28M
        GHASH(ctx, aad, i);
944
1.28M
        aad += i;
945
1.28M
        len -= i;
946
1.28M
    }
947
#else
948
    while (len >= 16) {
949
        for (i = 0; i < 16; ++i)
950
            ctx->Xi.c[i] ^= aad[i];
951
        GCM_MUL(ctx);
952
        aad += 16;
953
        len -= 16;
954
    }
955
#endif
956
3.17M
    if (len) {
957
2.71M
        n = (unsigned int)len;
958
31.4M
        for (i = 0; i < len; ++i)
959
28.6M
            ctx->Xi.c[i] ^= aad[i];
960
2.71M
    }
961
962
3.17M
    ctx->ares = n;
963
3.17M
    return 0;
964
3.42M
}
965
966
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967
                          const unsigned char *in, unsigned char *out,
968
                          size_t len)
969
1.19M
{
970
1.19M
    DECLARE_IS_ENDIAN;
971
1.19M
    unsigned int n, ctr, mres;
972
1.19M
    size_t i;
973
1.19M
    u64 mlen = ctx->len.u[1];
974
1.19M
    block128_f block = ctx->block;
975
1.19M
    void *key = ctx->key;
976
1.19M
#ifdef GCM_FUNCREF_4BIT
977
1.19M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978
1.19M
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979
1.19M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980
1.19M
                         const u8 *inp, size_t len) = ctx->ghash;
981
1.19M
# endif
982
1.19M
#endif
983
984
1.19M
    mlen += len;
985
1.19M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
1.19M
    ctx->len.u[1] = mlen;
988
989
1.19M
    mres = ctx->mres;
990
991
1.19M
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
11.1k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
11.1k
        if (len == 0) {
995
10.4k
            GCM_MUL(ctx);
996
10.4k
            ctx->ares = 0;
997
10.4k
            return 0;
998
10.4k
        }
999
705
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
705
        ctx->Xi.u[0] = 0;
1001
705
        ctx->Xi.u[1] = 0;
1002
705
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
705
        ctx->ares = 0;
1007
705
    }
1008
1009
1.18M
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
1.18M
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
1.18M
    n = mres % 16;
1019
1.18M
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
1.18M
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
1.18M
        do {
1022
1.18M
            if (n) {
1023
1.16M
# if defined(GHASH)
1024
17.0M
                while (n && len) {
1025
15.9M
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
15.9M
                    --len;
1027
15.9M
                    n = (n + 1) % 16;
1028
15.9M
                }
1029
1.16M
                if (n == 0) {
1030
1.16M
                    GHASH(ctx, ctx->Xn, mres);
1031
1.16M
                    mres = 0;
1032
1.16M
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
# else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
# endif
1050
1.16M
            }
1051
1.18M
# if defined(STRICT_ALIGNMENT)
1052
1.18M
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
1.17M
                break;
1054
11.7k
# endif
1055
11.7k
# if defined(GHASH)
1056
11.7k
            if (len >= 16 && mres) {
1057
428
                GHASH(ctx, ctx->Xn, mres);
1058
428
                mres = 0;
1059
428
            }
1060
11.7k
#  if defined(GHASH_CHUNK)
1061
11.7k
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#   ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#   else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#   endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
11.7k
#  endif
1088
11.7k
            if ((i = (len & (size_t)-16))) {
1089
428
                size_t j = i;
1090
1091
856
                while (len >= 16) {
1092
428
                    size_t_aX *out_t = (size_t_aX *)out;
1093
428
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
428
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096
428
                    ++ctr;
1097
428
                    if (IS_LITTLE_ENDIAN)
1098
#  ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#  else
1101
428
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#  endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
1.28k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
856
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
428
                    out += 16;
1108
428
                    in += 16;
1109
428
                    len -= 16;
1110
428
                }
1111
428
                GHASH(ctx, out - j, j);
1112
428
            }
1113
# else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#  ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#  else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#  endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
# endif
1136
11.7k
            if (len) {
1137
694
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
694
                ++ctr;
1139
694
                if (IS_LITTLE_ENDIAN)
1140
# ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
# else
1143
694
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
# endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
694
# if defined(GHASH)
1148
2.56k
                while (len--) {
1149
1.87k
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
1.87k
                    ++n;
1151
1.87k
                }
1152
# else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
# endif
1159
694
            }
1160
1161
11.7k
            ctx->mres = mres;
1162
11.7k
            return 0;
1163
1.18M
        } while (0);
1164
1.18M
    }
1165
1.17M
#endif
1166
1.17M
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
1.17M
    ctx->mres = mres;
1195
1.17M
    return 0;
1196
1.18M
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
                          const unsigned char *in, unsigned char *out,
1200
                          size_t len)
1201
613k
{
1202
613k
    DECLARE_IS_ENDIAN;
1203
613k
    unsigned int n, ctr, mres;
1204
613k
    size_t i;
1205
613k
    u64 mlen = ctx->len.u[1];
1206
613k
    block128_f block = ctx->block;
1207
613k
    void *key = ctx->key;
1208
613k
#ifdef GCM_FUNCREF_4BIT
1209
613k
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
613k
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
613k
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212
613k
                         const u8 *inp, size_t len) = ctx->ghash;
1213
613k
# endif
1214
613k
#endif
1215
1216
613k
    mlen += len;
1217
613k
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218
0
        return -1;
1219
613k
    ctx->len.u[1] = mlen;
1220
1221
613k
    mres = ctx->mres;
1222
1223
613k
    if (ctx->ares) {
1224
        /* First call to decrypt finalizes GHASH(AAD) */
1225
227k
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226
227k
        if (len == 0) {
1227
192k
            GCM_MUL(ctx);
1228
192k
            ctx->ares = 0;
1229
192k
            return 0;
1230
192k
        }
1231
34.2k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232
34.2k
        ctx->Xi.u[0] = 0;
1233
34.2k
        ctx->Xi.u[1] = 0;
1234
34.2k
        mres = sizeof(ctx->Xi);
1235
#else
1236
        GCM_MUL(ctx);
1237
#endif
1238
34.2k
        ctx->ares = 0;
1239
34.2k
    }
1240
1241
421k
    if (IS_LITTLE_ENDIAN)
1242
#ifdef BSWAP4
1243
        ctr = BSWAP4(ctx->Yi.d[3]);
1244
#else
1245
421k
        ctr = GETU32(ctx->Yi.c + 12);
1246
0
#endif
1247
0
    else
1248
0
        ctr = ctx->Yi.d[3];
1249
1250
421k
    n = mres % 16;
1251
421k
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252
421k
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253
421k
        do {
1254
421k
            if (n) {
1255
0
# if defined(GHASH)
1256
0
                while (n && len) {
1257
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258
0
                    --len;
1259
0
                    n = (n + 1) % 16;
1260
0
                }
1261
0
                if (n == 0) {
1262
0
                    GHASH(ctx, ctx->Xn, mres);
1263
0
                    mres = 0;
1264
0
                } else {
1265
0
                    ctx->mres = mres;
1266
0
                    return 0;
1267
0
                }
1268
# else
1269
                while (n && len) {
1270
                    u8 c = *(in++);
1271
                    *(out++) = c ^ ctx->EKi.c[n];
1272
                    ctx->Xi.c[n] ^= c;
1273
                    --len;
1274
                    n = (n + 1) % 16;
1275
                }
1276
                if (n == 0) {
1277
                    GCM_MUL(ctx);
1278
                    mres = 0;
1279
                } else {
1280
                    ctx->mres = n;
1281
                    return 0;
1282
                }
1283
# endif
1284
0
            }
1285
421k
# if defined(STRICT_ALIGNMENT)
1286
421k
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
776
                break;
1288
420k
# endif
1289
420k
# if defined(GHASH)
1290
420k
            if (len >= 16 && mres) {
1291
9.18k
                GHASH(ctx, ctx->Xn, mres);
1292
9.18k
                mres = 0;
1293
9.18k
            }
1294
420k
#  if defined(GHASH_CHUNK)
1295
423k
            while (len >= GHASH_CHUNK) {
1296
3.57k
                size_t j = GHASH_CHUNK;
1297
1298
3.57k
                GHASH(ctx, in, GHASH_CHUNK);
1299
689k
                while (j) {
1300
686k
                    size_t_aX *out_t = (size_t_aX *)out;
1301
686k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1302
1303
686k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304
686k
                    ++ctr;
1305
686k
                    if (IS_LITTLE_ENDIAN)
1306
#   ifdef BSWAP4
1307
                        ctx->Yi.d[3] = BSWAP4(ctr);
1308
#   else
1309
686k
                        PUTU32(ctx->Yi.c + 12, ctr);
1310
0
#   endif
1311
0
                    else
1312
0
                        ctx->Yi.d[3] = ctr;
1313
2.05M
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314
1.37M
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315
686k
                    out += 16;
1316
686k
                    in += 16;
1317
686k
                    j -= 16;
1318
686k
                }
1319
3.57k
                len -= GHASH_CHUNK;
1320
3.57k
            }
1321
420k
#  endif
1322
420k
            if ((i = (len & (size_t)-16))) {
1323
9.14k
                GHASH(ctx, in, i);
1324
140k
                while (len >= 16) {
1325
131k
                    size_t_aX *out_t = (size_t_aX *)out;
1326
131k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1327
1328
131k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329
131k
                    ++ctr;
1330
131k
                    if (IS_LITTLE_ENDIAN)
1331
#  ifdef BSWAP4
1332
                        ctx->Yi.d[3] = BSWAP4(ctr);
1333
#  else
1334
131k
                        PUTU32(ctx->Yi.c + 12, ctr);
1335
0
#  endif
1336
0
                    else
1337
0
                        ctx->Yi.d[3] = ctr;
1338
394k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339
262k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340
131k
                    out += 16;
1341
131k
                    in += 16;
1342
131k
                    len -= 16;
1343
131k
                }
1344
9.14k
            }
1345
# else
1346
            while (len >= 16) {
1347
                size_t *out_t = (size_t *)out;
1348
                const size_t *in_t = (const size_t *)in;
1349
1350
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351
                ++ctr;
1352
                if (IS_LITTLE_ENDIAN)
1353
#  ifdef BSWAP4
1354
                    ctx->Yi.d[3] = BSWAP4(ctr);
1355
#  else
1356
                    PUTU32(ctx->Yi.c + 12, ctr);
1357
#  endif
1358
                else
1359
                    ctx->Yi.d[3] = ctr;
1360
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361
                    size_t c = in_t[i];
1362
                    out_t[i] = c ^ ctx->EKi.t[i];
1363
                    ctx->Xi.t[i] ^= c;
1364
                }
1365
                GCM_MUL(ctx);
1366
                out += 16;
1367
                in += 16;
1368
                len -= 16;
1369
            }
1370
# endif
1371
420k
            if (len) {
1372
31.8k
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373
31.8k
                ++ctr;
1374
31.8k
                if (IS_LITTLE_ENDIAN)
1375
# ifdef BSWAP4
1376
                    ctx->Yi.d[3] = BSWAP4(ctr);
1377
# else
1378
31.8k
                    PUTU32(ctx->Yi.c + 12, ctr);
1379
0
# endif
1380
0
                else
1381
0
                    ctx->Yi.d[3] = ctr;
1382
31.8k
# if defined(GHASH)
1383
263k
                while (len--) {
1384
231k
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385
231k
                    ++n;
1386
231k
                }
1387
# else
1388
                while (len--) {
1389
                    u8 c = in[n];
1390
                    ctx->Xi.c[n] ^= c;
1391
                    out[n] = c ^ ctx->EKi.c[n];
1392
                    ++n;
1393
                }
1394
                mres = n;
1395
# endif
1396
31.8k
            }
1397
1398
420k
            ctx->mres = mres;
1399
420k
            return 0;
1400
421k
        } while (0);
1401
421k
    }
1402
776
#endif
1403
776
    for (i = 0; i < len; ++i) {
1404
0
        u8 c;
1405
0
        if (n == 0) {
1406
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407
0
            ++ctr;
1408
0
            if (IS_LITTLE_ENDIAN)
1409
#ifdef BSWAP4
1410
                ctx->Yi.d[3] = BSWAP4(ctr);
1411
#else
1412
0
                PUTU32(ctx->Yi.c + 12, ctr);
1413
0
#endif
1414
0
            else
1415
0
                ctx->Yi.d[3] = ctr;
1416
0
        }
1417
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419
0
        n = (n + 1) % 16;
1420
0
        if (mres == sizeof(ctx->Xn)) {
1421
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422
0
            mres = 0;
1423
0
        }
1424
#else
1425
        c = in[i];
1426
        out[i] = c ^ ctx->EKi.c[n];
1427
        ctx->Xi.c[n] ^= c;
1428
        mres = n = (n + 1) % 16;
1429
        if (n == 0)
1430
            GCM_MUL(ctx);
1431
#endif
1432
0
    }
1433
1434
776
    ctx->mres = mres;
1435
776
    return 0;
1436
421k
}
1437
1438
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439
                                const unsigned char *in, unsigned char *out,
1440
                                size_t len, ctr128_f stream)
1441
3.15M
{
1442
#if defined(OPENSSL_SMALL_FOOTPRINT)
1443
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444
#else
1445
3.15M
    DECLARE_IS_ENDIAN;
1446
3.15M
    unsigned int n, ctr, mres;
1447
3.15M
    size_t i;
1448
3.15M
    u64 mlen = ctx->len.u[1];
1449
3.15M
    void *key = ctx->key;
1450
3.15M
# ifdef GCM_FUNCREF_4BIT
1451
3.15M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452
3.15M
#  ifdef GHASH
1453
3.15M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454
3.15M
                         const u8 *inp, size_t len) = ctx->ghash;
1455
3.15M
#  endif
1456
3.15M
# endif
1457
1458
3.15M
    mlen += len;
1459
3.15M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460
0
        return -1;
1461
3.15M
    ctx->len.u[1] = mlen;
1462
1463
3.15M
    mres = ctx->mres;
1464
1465
3.15M
    if (ctx->ares) {
1466
        /* First call to encrypt finalizes GHASH(AAD) */
1467
1.60M
#if defined(GHASH)
1468
1.60M
        if (len == 0) {
1469
0
            GCM_MUL(ctx);
1470
0
            ctx->ares = 0;
1471
0
            return 0;
1472
0
        }
1473
1.60M
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474
1.60M
        ctx->Xi.u[0] = 0;
1475
1.60M
        ctx->Xi.u[1] = 0;
1476
1.60M
        mres = sizeof(ctx->Xi);
1477
#else
1478
        GCM_MUL(ctx);
1479
#endif
1480
1.60M
        ctx->ares = 0;
1481
1.60M
    }
1482
1483
3.15M
    if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
        ctr = BSWAP4(ctx->Yi.d[3]);
1486
# else
1487
3.15M
        ctr = GETU32(ctx->Yi.c + 12);
1488
0
# endif
1489
0
    else
1490
0
        ctr = ctx->Yi.d[3];
1491
1492
3.15M
    n = mres % 16;
1493
3.15M
    if (n) {
1494
333k
# if defined(GHASH)
1495
2.05M
        while (n && len) {
1496
1.71M
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497
1.71M
            --len;
1498
1.71M
            n = (n + 1) % 16;
1499
1.71M
        }
1500
333k
        if (n == 0) {
1501
159k
            GHASH(ctx, ctx->Xn, mres);
1502
159k
            mres = 0;
1503
173k
        } else {
1504
173k
            ctx->mres = mres;
1505
173k
            return 0;
1506
173k
        }
1507
# else
1508
        while (n && len) {
1509
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510
            --len;
1511
            n = (n + 1) % 16;
1512
        }
1513
        if (n == 0) {
1514
            GCM_MUL(ctx);
1515
            mres = 0;
1516
        } else {
1517
            ctx->mres = n;
1518
            return 0;
1519
        }
1520
# endif
1521
333k
    }
1522
2.98M
# if defined(GHASH)
1523
2.98M
        if (len >= 16 && mres) {
1524
87.9k
            GHASH(ctx, ctx->Xn, mres);
1525
87.9k
            mres = 0;
1526
87.9k
        }
1527
2.98M
#  if defined(GHASH_CHUNK)
1528
2.98M
    while (len >= GHASH_CHUNK) {
1529
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530
0
        ctr += GHASH_CHUNK / 16;
1531
0
        if (IS_LITTLE_ENDIAN)
1532
#   ifdef BSWAP4
1533
            ctx->Yi.d[3] = BSWAP4(ctr);
1534
#   else
1535
0
            PUTU32(ctx->Yi.c + 12, ctr);
1536
0
#   endif
1537
0
        else
1538
0
            ctx->Yi.d[3] = ctr;
1539
0
        GHASH(ctx, out, GHASH_CHUNK);
1540
0
        out += GHASH_CHUNK;
1541
0
        in += GHASH_CHUNK;
1542
0
        len -= GHASH_CHUNK;
1543
0
    }
1544
2.98M
#  endif
1545
2.98M
# endif
1546
2.98M
    if ((i = (len & (size_t)-16))) {
1547
453k
        size_t j = i / 16;
1548
1549
453k
        (*stream) (in, out, j, key, ctx->Yi.c);
1550
453k
        ctr += (unsigned int)j;
1551
453k
        if (IS_LITTLE_ENDIAN)
1552
# ifdef BSWAP4
1553
            ctx->Yi.d[3] = BSWAP4(ctr);
1554
# else
1555
453k
            PUTU32(ctx->Yi.c + 12, ctr);
1556
0
# endif
1557
0
        else
1558
0
            ctx->Yi.d[3] = ctr;
1559
453k
        in += i;
1560
453k
        len -= i;
1561
453k
# if defined(GHASH)
1562
453k
        GHASH(ctx, out, i);
1563
453k
        out += i;
1564
# else
1565
        while (j--) {
1566
            for (i = 0; i < 16; ++i)
1567
                ctx->Xi.c[i] ^= out[i];
1568
            GCM_MUL(ctx);
1569
            out += 16;
1570
        }
1571
# endif
1572
453k
    }
1573
2.98M
    if (len) {
1574
2.93M
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575
2.93M
        ++ctr;
1576
2.93M
        if (IS_LITTLE_ENDIAN)
1577
# ifdef BSWAP4
1578
            ctx->Yi.d[3] = BSWAP4(ctr);
1579
# else
1580
2.93M
            PUTU32(ctx->Yi.c + 12, ctr);
1581
0
# endif
1582
0
        else
1583
0
            ctx->Yi.d[3] = ctr;
1584
11.7M
        while (len--) {
1585
8.84M
# if defined(GHASH)
1586
8.84M
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587
# else
1588
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589
# endif
1590
8.84M
            ++n;
1591
8.84M
        }
1592
2.93M
    }
1593
1594
2.98M
    ctx->mres = mres;
1595
2.98M
    return 0;
1596
3.15M
#endif
1597
3.15M
}
1598
1599
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600
                                const unsigned char *in, unsigned char *out,
1601
                                size_t len, ctr128_f stream)
1602
1.12M
{
1603
#if defined(OPENSSL_SMALL_FOOTPRINT)
1604
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605
#else
1606
1.12M
    DECLARE_IS_ENDIAN;
1607
1.12M
    unsigned int n, ctr, mres;
1608
1.12M
    size_t i;
1609
1.12M
    u64 mlen = ctx->len.u[1];
1610
1.12M
    void *key = ctx->key;
1611
1.12M
# ifdef GCM_FUNCREF_4BIT
1612
1.12M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613
1.12M
#  ifdef GHASH
1614
1.12M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615
1.12M
                         const u8 *inp, size_t len) = ctx->ghash;
1616
1.12M
#  endif
1617
1.12M
# endif
1618
1619
1.12M
    mlen += len;
1620
1.12M
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621
0
        return -1;
1622
1.12M
    ctx->len.u[1] = mlen;
1623
1624
1.12M
    mres = ctx->mres;
1625
1626
1.12M
    if (ctx->ares) {
1627
        /* First call to decrypt finalizes GHASH(AAD) */
1628
550k
# if defined(GHASH)
1629
550k
        if (len == 0) {
1630
11.6k
            GCM_MUL(ctx);
1631
11.6k
            ctx->ares = 0;
1632
11.6k
            return 0;
1633
11.6k
        }
1634
538k
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635
538k
        ctx->Xi.u[0] = 0;
1636
538k
        ctx->Xi.u[1] = 0;
1637
538k
        mres = sizeof(ctx->Xi);
1638
# else
1639
        GCM_MUL(ctx);
1640
# endif
1641
538k
        ctx->ares = 0;
1642
538k
    }
1643
1644
1.11M
    if (IS_LITTLE_ENDIAN)
1645
# ifdef BSWAP4
1646
        ctr = BSWAP4(ctx->Yi.d[3]);
1647
# else
1648
1.11M
        ctr = GETU32(ctx->Yi.c + 12);
1649
0
# endif
1650
0
    else
1651
0
        ctr = ctx->Yi.d[3];
1652
1653
1.11M
    n = mres % 16;
1654
1.11M
    if (n) {
1655
0
# if defined(GHASH)
1656
0
        while (n && len) {
1657
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658
0
            --len;
1659
0
            n = (n + 1) % 16;
1660
0
        }
1661
0
        if (n == 0) {
1662
0
            GHASH(ctx, ctx->Xn, mres);
1663
0
            mres = 0;
1664
0
        } else {
1665
0
            ctx->mres = mres;
1666
0
            return 0;
1667
0
        }
1668
# else
1669
        while (n && len) {
1670
            u8 c = *(in++);
1671
            *(out++) = c ^ ctx->EKi.c[n];
1672
            ctx->Xi.c[n] ^= c;
1673
            --len;
1674
            n = (n + 1) % 16;
1675
        }
1676
        if (n == 0) {
1677
            GCM_MUL(ctx);
1678
            mres = 0;
1679
        } else {
1680
            ctx->mres = n;
1681
            return 0;
1682
        }
1683
# endif
1684
0
    }
1685
1.11M
# if defined(GHASH)
1686
1.11M
    if (len >= 16 && mres) {
1687
0
        GHASH(ctx, ctx->Xn, mres);
1688
0
        mres = 0;
1689
0
    }
1690
1.11M
#  if defined(GHASH_CHUNK)
1691
1.11M
    while (len >= GHASH_CHUNK) {
1692
0
        GHASH(ctx, in, GHASH_CHUNK);
1693
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694
0
        ctr += GHASH_CHUNK / 16;
1695
0
        if (IS_LITTLE_ENDIAN)
1696
#   ifdef BSWAP4
1697
            ctx->Yi.d[3] = BSWAP4(ctr);
1698
#   else
1699
0
            PUTU32(ctx->Yi.c + 12, ctr);
1700
0
#   endif
1701
0
        else
1702
0
            ctx->Yi.d[3] = ctr;
1703
0
        out += GHASH_CHUNK;
1704
0
        in += GHASH_CHUNK;
1705
0
        len -= GHASH_CHUNK;
1706
0
    }
1707
1.11M
#  endif
1708
1.11M
# endif
1709
1.11M
    if ((i = (len & (size_t)-16))) {
1710
523k
        size_t j = i / 16;
1711
1712
523k
# if defined(GHASH)
1713
523k
        GHASH(ctx, in, i);
1714
# else
1715
        while (j--) {
1716
            size_t k;
1717
            for (k = 0; k < 16; ++k)
1718
                ctx->Xi.c[k] ^= in[k];
1719
            GCM_MUL(ctx);
1720
            in += 16;
1721
        }
1722
        j = i / 16;
1723
        in -= i;
1724
# endif
1725
523k
        (*stream) (in, out, j, key, ctx->Yi.c);
1726
523k
        ctr += (unsigned int)j;
1727
523k
        if (IS_LITTLE_ENDIAN)
1728
# ifdef BSWAP4
1729
            ctx->Yi.d[3] = BSWAP4(ctr);
1730
# else
1731
523k
            PUTU32(ctx->Yi.c + 12, ctr);
1732
0
# endif
1733
0
        else
1734
0
            ctx->Yi.d[3] = ctr;
1735
523k
        out += i;
1736
523k
        in += i;
1737
523k
        len -= i;
1738
523k
    }
1739
1.11M
    if (len) {
1740
1.08M
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741
1.08M
        ++ctr;
1742
1.08M
        if (IS_LITTLE_ENDIAN)
1743
# ifdef BSWAP4
1744
            ctx->Yi.d[3] = BSWAP4(ctr);
1745
# else
1746
1.08M
            PUTU32(ctx->Yi.c + 12, ctr);
1747
0
# endif
1748
0
        else
1749
0
            ctx->Yi.d[3] = ctr;
1750
4.98M
        while (len--) {
1751
3.89M
# if defined(GHASH)
1752
3.89M
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753
# else
1754
            u8 c = in[n];
1755
            ctx->Xi.c[mres++] ^= c;
1756
            out[n] = c ^ ctx->EKi.c[n];
1757
# endif
1758
3.89M
            ++n;
1759
3.89M
        }
1760
1.08M
    }
1761
1762
1.11M
    ctx->mres = mres;
1763
1.11M
    return 0;
1764
1.11M
#endif
1765
1.11M
}
1766
1767
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768
                         size_t len)
1769
3.11M
{
1770
3.11M
    DECLARE_IS_ENDIAN;
1771
3.11M
    u64 alen = ctx->len.u[0] << 3;
1772
3.11M
    u64 clen = ctx->len.u[1] << 3;
1773
3.11M
#ifdef GCM_FUNCREF_4BIT
1774
3.11M
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775
3.11M
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776
3.11M
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777
3.11M
                         const u8 *inp, size_t len) = ctx->ghash;
1778
3.11M
# endif
1779
3.11M
#endif
1780
1781
3.11M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782
3.11M
    u128 bitlen;
1783
3.11M
    unsigned int mres = ctx->mres;
1784
1785
3.11M
    if (mres) {
1786
2.73M
        unsigned blocks = (mres + 15) & -16;
1787
1788
2.73M
        memset(ctx->Xn + mres, 0, blocks - mres);
1789
2.73M
        mres = blocks;
1790
2.73M
        if (mres == sizeof(ctx->Xn)) {
1791
0
            GHASH(ctx, ctx->Xn, mres);
1792
0
            mres = 0;
1793
0
        }
1794
2.73M
    } else if (ctx->ares) {
1795
292k
        GCM_MUL(ctx);
1796
292k
    }
1797
#else
1798
    if (ctx->mres || ctx->ares)
1799
        GCM_MUL(ctx);
1800
#endif
1801
1802
3.11M
    if (IS_LITTLE_ENDIAN) {
1803
#ifdef BSWAP8
1804
        alen = BSWAP8(alen);
1805
        clen = BSWAP8(clen);
1806
#else
1807
3.11M
        u8 *p = ctx->len.c;
1808
1809
3.11M
        ctx->len.u[0] = alen;
1810
3.11M
        ctx->len.u[1] = clen;
1811
1812
3.11M
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813
3.11M
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814
3.11M
#endif
1815
3.11M
    }
1816
1817
3.11M
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
3.11M
    bitlen.hi = alen;
1819
3.11M
    bitlen.lo = clen;
1820
3.11M
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821
3.11M
    mres += sizeof(bitlen);
1822
3.11M
    GHASH(ctx, ctx->Xn, mres);
1823
#else
1824
    ctx->Xi.u[0] ^= alen;
1825
    ctx->Xi.u[1] ^= clen;
1826
    GCM_MUL(ctx);
1827
#endif
1828
1829
3.11M
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830
3.11M
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831
1832
3.11M
    if (tag && len <= sizeof(ctx->Xi))
1833
1.16M
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834
1.94M
    else
1835
1.94M
        return -1;
1836
3.11M
}
1837
1838
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839
1.94M
{
1840
1.94M
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841
1.94M
    memcpy(tag, ctx->Xi.c,
1842
1.94M
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843
1.94M
}
1844
1845
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846
0
{
1847
0
    GCM128_CONTEXT *ret;
1848
1849
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850
0
        CRYPTO_gcm128_init(ret, key, block);
1851
1852
0
    return ret;
1853
0
}
1854
1855
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856
0
{
1857
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858
0
}