Coverage Report

Created: 2023-06-08 06:41

/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31
0
#define REDUCE1BIT(V)   do { \
32
0
        if (sizeof(size_t)==8) { \
33
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
35
0
                V.hi  = (V.hi>>1 )^T; \
36
0
        } \
37
0
        else { \
38
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41
0
        } \
42
0
} while(0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if     TABLE_BITS==8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
# ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
# else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
# endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219
#elif   TABLE_BITS==4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
# if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
# endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
# if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
# else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
# endif
269
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
        } else
283
            for (j = 0; j < 16; ++j) {
284
                V = Htable[j];
285
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287
            }
288
    }
289
# endif
290
0
}
291
292
# ifndef GHASH_ASM
293
static const size_t rem_4bit[16] = {
294
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298
};
299
300
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301
{
302
    u128 Z;
303
    int cnt = 15;
304
    size_t rem, nlo, nhi;
305
    DECLARE_IS_ENDIAN;
306
307
    nlo = ((const u8 *)Xi)[15];
308
    nhi = nlo >> 4;
309
    nlo &= 0xf;
310
311
    Z.hi = Htable[nlo].hi;
312
    Z.lo = Htable[nlo].lo;
313
314
    while (1) {
315
        rem = (size_t)Z.lo & 0xf;
316
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317
        Z.hi = (Z.hi >> 4);
318
        if (sizeof(size_t) == 8)
319
            Z.hi ^= rem_4bit[rem];
320
        else
321
            Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323
        Z.hi ^= Htable[nhi].hi;
324
        Z.lo ^= Htable[nhi].lo;
325
326
        if (--cnt < 0)
327
            break;
328
329
        nlo = ((const u8 *)Xi)[cnt];
330
        nhi = nlo >> 4;
331
        nlo &= 0xf;
332
333
        rem = (size_t)Z.lo & 0xf;
334
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335
        Z.hi = (Z.hi >> 4);
336
        if (sizeof(size_t) == 8)
337
            Z.hi ^= rem_4bit[rem];
338
        else
339
            Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341
        Z.hi ^= Htable[nlo].hi;
342
        Z.lo ^= Htable[nlo].lo;
343
    }
344
345
    if (IS_LITTLE_ENDIAN) {
346
#  ifdef BSWAP8
347
        Xi[0] = BSWAP8(Z.hi);
348
        Xi[1] = BSWAP8(Z.lo);
349
#  else
350
        u8 *p = (u8 *)Xi;
351
        u32 v;
352
        v = (u32)(Z.hi >> 32);
353
        PUTU32(p, v);
354
        v = (u32)(Z.hi);
355
        PUTU32(p + 4, v);
356
        v = (u32)(Z.lo >> 32);
357
        PUTU32(p + 8, v);
358
        v = (u32)(Z.lo);
359
        PUTU32(p + 12, v);
360
#  endif
361
    } else {
362
        Xi[0] = Z.hi;
363
        Xi[1] = Z.lo;
364
    }
365
}
366
367
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368
/*
369
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370
 * details... Compiler-generated code doesn't seem to give any
371
 * performance improvement, at least not on x86[_64]. It's here
372
 * mostly as reference and a placeholder for possible future
373
 * non-trivial optimization[s]...
374
 */
375
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376
                           const u8 *inp, size_t len)
377
{
378
    u128 Z;
379
    int cnt;
380
    size_t rem, nlo, nhi;
381
    DECLARE_IS_ENDIAN;
382
383
#   if 1
384
    do {
385
        cnt = 15;
386
        nlo = ((const u8 *)Xi)[15];
387
        nlo ^= inp[15];
388
        nhi = nlo >> 4;
389
        nlo &= 0xf;
390
391
        Z.hi = Htable[nlo].hi;
392
        Z.lo = Htable[nlo].lo;
393
394
        while (1) {
395
            rem = (size_t)Z.lo & 0xf;
396
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397
            Z.hi = (Z.hi >> 4);
398
            if (sizeof(size_t) == 8)
399
                Z.hi ^= rem_4bit[rem];
400
            else
401
                Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403
            Z.hi ^= Htable[nhi].hi;
404
            Z.lo ^= Htable[nhi].lo;
405
406
            if (--cnt < 0)
407
                break;
408
409
            nlo = ((const u8 *)Xi)[cnt];
410
            nlo ^= inp[cnt];
411
            nhi = nlo >> 4;
412
            nlo &= 0xf;
413
414
            rem = (size_t)Z.lo & 0xf;
415
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416
            Z.hi = (Z.hi >> 4);
417
            if (sizeof(size_t) == 8)
418
                Z.hi ^= rem_4bit[rem];
419
            else
420
                Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422
            Z.hi ^= Htable[nlo].hi;
423
            Z.lo ^= Htable[nlo].lo;
424
        }
425
#   else
426
    /*
427
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428
     * [should] give ~50% improvement... One could have PACK()-ed
429
     * the rem_8bit even here, but the priority is to minimize
430
     * cache footprint...
431
     */
432
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434
    static const unsigned short rem_8bit[256] = {
435
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467
    };
468
    /*
469
     * This pre-processing phase slows down procedure by approximately
470
     * same time as it makes each loop spin faster. In other words
471
     * single block performance is approximately same as straightforward
472
     * "4-bit" implementation, and then it goes only faster...
473
     */
474
    for (cnt = 0; cnt < 16; ++cnt) {
475
        Z.hi = Htable[cnt].hi;
476
        Z.lo = Htable[cnt].lo;
477
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478
        Hshr4[cnt].hi = (Z.hi >> 4);
479
        Hshl4[cnt] = (u8)(Z.lo << 4);
480
    }
481
482
    do {
483
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484
            nlo = ((const u8 *)Xi)[cnt];
485
            nlo ^= inp[cnt];
486
            nhi = nlo >> 4;
487
            nlo &= 0xf;
488
489
            Z.hi ^= Htable[nlo].hi;
490
            Z.lo ^= Htable[nlo].lo;
491
492
            rem = (size_t)Z.lo & 0xff;
493
494
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495
            Z.hi = (Z.hi >> 8);
496
497
            Z.hi ^= Hshr4[nhi].hi;
498
            Z.lo ^= Hshr4[nhi].lo;
499
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500
        }
501
502
        nlo = ((const u8 *)Xi)[0];
503
        nlo ^= inp[0];
504
        nhi = nlo >> 4;
505
        nlo &= 0xf;
506
507
        Z.hi ^= Htable[nlo].hi;
508
        Z.lo ^= Htable[nlo].lo;
509
510
        rem = (size_t)Z.lo & 0xf;
511
512
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513
        Z.hi = (Z.hi >> 4);
514
515
        Z.hi ^= Htable[nhi].hi;
516
        Z.lo ^= Htable[nhi].lo;
517
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518
#   endif
519
520
        if (IS_LITTLE_ENDIAN) {
521
#   ifdef BSWAP8
522
            Xi[0] = BSWAP8(Z.hi);
523
            Xi[1] = BSWAP8(Z.lo);
524
#   else
525
            u8 *p = (u8 *)Xi;
526
            u32 v;
527
            v = (u32)(Z.hi >> 32);
528
            PUTU32(p, v);
529
            v = (u32)(Z.hi);
530
            PUTU32(p + 4, v);
531
            v = (u32)(Z.lo >> 32);
532
            PUTU32(p + 8, v);
533
            v = (u32)(Z.lo);
534
            PUTU32(p + 12, v);
535
#   endif
536
        } else {
537
            Xi[0] = Z.hi;
538
            Xi[1] = Z.lo;
539
        }
540
    } while (inp += 16, len -= 16);
541
}
542
#  endif
543
# else
544
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546
                    size_t len);
547
# endif
548
549
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552
/*
553
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554
 * effect. In other words idea is to hash data while it's still in L1 cache
555
 * after encryption pass...
556
 */
557
0
#  define GHASH_CHUNK       (3*1024)
558
# endif
559
560
#else                           /* TABLE_BITS */
561
562
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563
{
564
    u128 V, Z = { 0, 0 };
565
    long X;
566
    int i, j;
567
    const long *xi = (const long *)Xi;
568
    DECLARE_IS_ENDIAN;
569
570
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571
    V.lo = H[1];
572
573
    for (j = 0; j < 16 / sizeof(long); ++j) {
574
        if (IS_LITTLE_ENDIAN) {
575
            if (sizeof(long) == 8) {
576
# ifdef BSWAP8
577
                X = (long)(BSWAP8(xi[j]));
578
# else
579
                const u8 *p = (const u8 *)(xi + j);
580
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581
# endif
582
            } else {
583
                const u8 *p = (const u8 *)(xi + j);
584
                X = (long)GETU32(p);
585
            }
586
        } else
587
            X = xi[j];
588
589
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591
            Z.hi ^= V.hi & M;
592
            Z.lo ^= V.lo & M;
593
594
            REDUCE1BIT(V);
595
        }
596
    }
597
598
    if (IS_LITTLE_ENDIAN) {
599
# ifdef BSWAP8
600
        Xi[0] = BSWAP8(Z.hi);
601
        Xi[1] = BSWAP8(Z.lo);
602
# else
603
        u8 *p = (u8 *)Xi;
604
        u32 v;
605
        v = (u32)(Z.hi >> 32);
606
        PUTU32(p, v);
607
        v = (u32)(Z.hi);
608
        PUTU32(p + 4, v);
609
        v = (u32)(Z.lo >> 32);
610
        PUTU32(p + 8, v);
611
        v = (u32)(Z.lo);
612
        PUTU32(p + 12, v);
613
# endif
614
    } else {
615
        Xi[0] = Z.hi;
616
        Xi[1] = Z.lo;
617
    }
618
}
619
620
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622
#endif
623
624
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625
# if    !defined(I386_ONLY) && \
626
        (defined(__i386)        || defined(__i386__)    || \
627
         defined(__x86_64)      || defined(__x86_64__)  || \
628
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629
#  define GHASH_ASM_X86_OR_64
630
#  define GCM_FUNCREF_4BIT
631
632
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635
                     size_t len);
636
637
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638
#   define gcm_init_avx   gcm_init_clmul
639
#   define gcm_gmult_avx  gcm_gmult_clmul
640
#   define gcm_ghash_avx  gcm_ghash_clmul
641
#  else
642
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645
                   size_t len);
646
#  endif
647
648
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649
#   define GHASH_ASM_X86
650
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652
                        size_t len);
653
654
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656
                        size_t len);
657
#  endif
658
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659
#  include "arm_arch.h"
660
#  if __ARM_MAX_ARCH__>=7
661
#   define GHASH_ASM_ARM
662
#   define GCM_FUNCREF_4BIT
663
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664
#   if defined(__arm__) || defined(__arm)
665
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666
#   endif
667
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                    size_t len);
671
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674
                  size_t len);
675
#  endif
676
# elif defined(__sparc__) || defined(__sparc)
677
#  include "crypto/sparc_arch.h"
678
#  define GHASH_ASM_SPARC
679
#  define GCM_FUNCREF_4BIT
680
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683
                    size_t len);
684
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685
#  include "crypto/ppc_arch.h"
686
#  define GHASH_ASM_PPC
687
#  define GCM_FUNCREF_4BIT
688
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691
                  size_t len);
692
# endif
693
#endif
694
695
#ifdef GCM_FUNCREF_4BIT
696
# undef  GCM_MUL
697
0
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698
# ifdef GHASH
699
#  undef  GHASH
700
0
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701
# endif
702
#endif
703
704
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705
0
{
706
0
    DECLARE_IS_ENDIAN;
707
708
0
    memset(ctx, 0, sizeof(*ctx));
709
0
    ctx->block = block;
710
0
    ctx->key = key;
711
712
0
    (*block) (ctx->H.c, ctx->H.c, key);
713
714
0
    if (IS_LITTLE_ENDIAN) {
715
        /* H is stored in host byte order */
716
#ifdef BSWAP8
717
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719
#else
720
0
        u8 *p = ctx->H.c;
721
0
        u64 hi, lo;
722
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724
0
        ctx->H.u[0] = hi;
725
0
        ctx->H.u[1] = lo;
726
0
#endif
727
0
    }
728
#if     TABLE_BITS==8
729
    gcm_init_8bit(ctx->Htable, ctx->H.u);
730
#elif   TABLE_BITS==4
731
0
# if    defined(GHASH)
732
0
#  define CTX__GHASH(f) (ctx->ghash = (f))
733
# else
734
#  define CTX__GHASH(f) (ctx->ghash = NULL)
735
# endif
736
0
# if    defined(GHASH_ASM_X86_OR_64)
737
0
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738
0
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739
0
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740
0
            gcm_init_avx(ctx->Htable, ctx->H.u);
741
0
            ctx->gmult = gcm_gmult_avx;
742
0
            CTX__GHASH(gcm_ghash_avx);
743
0
        } else {
744
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
745
0
            ctx->gmult = gcm_gmult_clmul;
746
0
            CTX__GHASH(gcm_ghash_clmul);
747
0
        }
748
0
        return;
749
0
    }
750
0
#  endif
751
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
752
#  if   defined(GHASH_ASM_X86)  /* x86 only */
753
#   if  defined(OPENSSL_IA32_SSE2)
754
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755
#   else
756
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757
#   endif
758
        ctx->gmult = gcm_gmult_4bit_mmx;
759
        CTX__GHASH(gcm_ghash_4bit_mmx);
760
    } else {
761
        ctx->gmult = gcm_gmult_4bit_x86;
762
        CTX__GHASH(gcm_ghash_4bit_x86);
763
    }
764
#  else
765
0
    ctx->gmult = gcm_gmult_4bit;
766
0
    CTX__GHASH(gcm_ghash_4bit);
767
0
#  endif
768
# elif  defined(GHASH_ASM_ARM)
769
#  ifdef PMULL_CAPABLE
770
    if (PMULL_CAPABLE) {
771
        gcm_init_v8(ctx->Htable, ctx->H.u);
772
        ctx->gmult = gcm_gmult_v8;
773
        CTX__GHASH(gcm_ghash_v8);
774
    } else
775
#  endif
776
#  ifdef NEON_CAPABLE
777
    if (NEON_CAPABLE) {
778
        gcm_init_neon(ctx->Htable, ctx->H.u);
779
        ctx->gmult = gcm_gmult_neon;
780
        CTX__GHASH(gcm_ghash_neon);
781
    } else
782
#  endif
783
    {
784
        gcm_init_4bit(ctx->Htable, ctx->H.u);
785
        ctx->gmult = gcm_gmult_4bit;
786
        CTX__GHASH(gcm_ghash_4bit);
787
    }
788
# elif  defined(GHASH_ASM_SPARC)
789
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790
        gcm_init_vis3(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_vis3;
792
        CTX__GHASH(gcm_ghash_vis3);
793
    } else {
794
        gcm_init_4bit(ctx->Htable, ctx->H.u);
795
        ctx->gmult = gcm_gmult_4bit;
796
        CTX__GHASH(gcm_ghash_4bit);
797
    }
798
# elif  defined(GHASH_ASM_PPC)
799
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800
        gcm_init_p8(ctx->Htable, ctx->H.u);
801
        ctx->gmult = gcm_gmult_p8;
802
        CTX__GHASH(gcm_ghash_p8);
803
    } else {
804
        gcm_init_4bit(ctx->Htable, ctx->H.u);
805
        ctx->gmult = gcm_gmult_4bit;
806
        CTX__GHASH(gcm_ghash_4bit);
807
    }
808
# else
809
    gcm_init_4bit(ctx->Htable, ctx->H.u);
810
# endif
811
0
# undef CTX__GHASH
812
0
#endif
813
0
}
814
815
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816
                         size_t len)
817
0
{
818
0
    DECLARE_IS_ENDIAN;
819
0
    unsigned int ctr;
820
0
#ifdef GCM_FUNCREF_4BIT
821
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822
0
#endif
823
824
0
    ctx->len.u[0] = 0;          /* AAD length */
825
0
    ctx->len.u[1] = 0;          /* message length */
826
0
    ctx->ares = 0;
827
0
    ctx->mres = 0;
828
829
0
    if (len == 12) {
830
0
        memcpy(ctx->Yi.c, iv, 12);
831
0
        ctx->Yi.c[12] = 0;
832
0
        ctx->Yi.c[13] = 0;
833
0
        ctx->Yi.c[14] = 0;
834
0
        ctx->Yi.c[15] = 1;
835
0
        ctr = 1;
836
0
    } else {
837
0
        size_t i;
838
0
        u64 len0 = len;
839
840
        /* Borrow ctx->Xi to calculate initial Yi */
841
0
        ctx->Xi.u[0] = 0;
842
0
        ctx->Xi.u[1] = 0;
843
844
0
        while (len >= 16) {
845
0
            for (i = 0; i < 16; ++i)
846
0
                ctx->Xi.c[i] ^= iv[i];
847
0
            GCM_MUL(ctx);
848
0
            iv += 16;
849
0
            len -= 16;
850
0
        }
851
0
        if (len) {
852
0
            for (i = 0; i < len; ++i)
853
0
                ctx->Xi.c[i] ^= iv[i];
854
0
            GCM_MUL(ctx);
855
0
        }
856
0
        len0 <<= 3;
857
0
        if (IS_LITTLE_ENDIAN) {
858
#ifdef BSWAP8
859
            ctx->Xi.u[1] ^= BSWAP8(len0);
860
#else
861
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868
0
            ctx->Xi.c[15] ^= (u8)(len0);
869
0
#endif
870
0
        } else {
871
0
            ctx->Xi.u[1] ^= len0;
872
0
        }
873
874
0
        GCM_MUL(ctx);
875
876
0
        if (IS_LITTLE_ENDIAN)
877
#ifdef BSWAP4
878
            ctr = BSWAP4(ctx->Xi.d[3]);
879
#else
880
0
            ctr = GETU32(ctx->Xi.c + 12);
881
0
#endif
882
0
        else
883
0
            ctr = ctx->Xi.d[3];
884
885
        /* Copy borrowed Xi to Yi */
886
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
887
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
888
0
    }
889
890
0
    ctx->Xi.u[0] = 0;
891
0
    ctx->Xi.u[1] = 0;
892
893
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894
0
    ++ctr;
895
0
    if (IS_LITTLE_ENDIAN)
896
#ifdef BSWAP4
897
        ctx->Yi.d[3] = BSWAP4(ctr);
898
#else
899
0
        PUTU32(ctx->Yi.c + 12, ctr);
900
0
#endif
901
0
    else
902
0
        ctx->Yi.d[3] = ctr;
903
0
}
904
905
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906
                      size_t len)
907
0
{
908
0
    size_t i;
909
0
    unsigned int n;
910
0
    u64 alen = ctx->len.u[0];
911
0
#ifdef GCM_FUNCREF_4BIT
912
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913
0
# ifdef GHASH
914
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915
0
                         const u8 *inp, size_t len) = ctx->ghash;
916
0
# endif
917
0
#endif
918
919
0
    if (ctx->len.u[1])
920
0
        return -2;
921
922
0
    alen += len;
923
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924
0
        return -1;
925
0
    ctx->len.u[0] = alen;
926
927
0
    n = ctx->ares;
928
0
    if (n) {
929
0
        while (n && len) {
930
0
            ctx->Xi.c[n] ^= *(aad++);
931
0
            --len;
932
0
            n = (n + 1) % 16;
933
0
        }
934
0
        if (n == 0)
935
0
            GCM_MUL(ctx);
936
0
        else {
937
0
            ctx->ares = n;
938
0
            return 0;
939
0
        }
940
0
    }
941
0
#ifdef GHASH
942
0
    if ((i = (len & (size_t)-16))) {
943
0
        GHASH(ctx, aad, i);
944
0
        aad += i;
945
0
        len -= i;
946
0
    }
947
#else
948
    while (len >= 16) {
949
        for (i = 0; i < 16; ++i)
950
            ctx->Xi.c[i] ^= aad[i];
951
        GCM_MUL(ctx);
952
        aad += 16;
953
        len -= 16;
954
    }
955
#endif
956
0
    if (len) {
957
0
        n = (unsigned int)len;
958
0
        for (i = 0; i < len; ++i)
959
0
            ctx->Xi.c[i] ^= aad[i];
960
0
    }
961
962
0
    ctx->ares = n;
963
0
    return 0;
964
0
}
965
966
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967
                          const unsigned char *in, unsigned char *out,
968
                          size_t len)
969
0
{
970
0
    DECLARE_IS_ENDIAN;
971
0
    unsigned int n, ctr, mres;
972
0
    size_t i;
973
0
    u64 mlen = ctx->len.u[1];
974
0
    block128_f block = ctx->block;
975
0
    void *key = ctx->key;
976
0
#ifdef GCM_FUNCREF_4BIT
977
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980
0
                         const u8 *inp, size_t len) = ctx->ghash;
981
0
# endif
982
0
#endif
983
984
0
    mlen += len;
985
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
0
    ctx->len.u[1] = mlen;
988
989
0
    mres = ctx->mres;
990
991
0
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
0
        if (len == 0) {
995
0
            GCM_MUL(ctx);
996
0
            ctx->ares = 0;
997
0
            return 0;
998
0
        }
999
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
0
        ctx->Xi.u[0] = 0;
1001
0
        ctx->Xi.u[1] = 0;
1002
0
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
0
        ctx->ares = 0;
1007
0
    }
1008
1009
0
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
0
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
0
    n = mres % 16;
1019
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
0
        do {
1022
0
            if (n) {
1023
0
# if defined(GHASH)
1024
0
                while (n && len) {
1025
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
0
                    --len;
1027
0
                    n = (n + 1) % 16;
1028
0
                }
1029
0
                if (n == 0) {
1030
0
                    GHASH(ctx, ctx->Xn, mres);
1031
0
                    mres = 0;
1032
0
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
# else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
# endif
1050
0
            }
1051
0
# if defined(STRICT_ALIGNMENT)
1052
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
0
                break;
1054
0
# endif
1055
0
# if defined(GHASH)
1056
0
            if (len >= 16 && mres) {
1057
0
                GHASH(ctx, ctx->Xn, mres);
1058
0
                mres = 0;
1059
0
            }
1060
0
#  if defined(GHASH_CHUNK)
1061
0
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#   ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#   else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#   endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
0
#  endif
1088
0
            if ((i = (len & (size_t)-16))) {
1089
0
                size_t j = i;
1090
1091
0
                while (len >= 16) {
1092
0
                    size_t_aX *out_t = (size_t_aX *)out;
1093
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096
0
                    ++ctr;
1097
0
                    if (IS_LITTLE_ENDIAN)
1098
#  ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#  else
1101
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#  endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
0
                    out += 16;
1108
0
                    in += 16;
1109
0
                    len -= 16;
1110
0
                }
1111
0
                GHASH(ctx, out - j, j);
1112
0
            }
1113
# else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#  ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#  else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#  endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
# endif
1136
0
            if (len) {
1137
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
0
                ++ctr;
1139
0
                if (IS_LITTLE_ENDIAN)
1140
# ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
# else
1143
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
# endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
0
# if defined(GHASH)
1148
0
                while (len--) {
1149
0
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
0
                    ++n;
1151
0
                }
1152
# else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
# endif
1159
0
            }
1160
1161
0
            ctx->mres = mres;
1162
0
            return 0;
1163
0
        } while (0);
1164
0
    }
1165
0
#endif
1166
0
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
0
    ctx->mres = mres;
1195
0
    return 0;
1196
0
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
                          const unsigned char *in, unsigned char *out,
1200
                          size_t len)
1201
0
{
1202
0
    DECLARE_IS_ENDIAN;
1203
0
    unsigned int n, ctr, mres;
1204
0
    size_t i;
1205
0
    u64 mlen = ctx->len.u[1];
1206
0
    block128_f block = ctx->block;
1207
0
    void *key = ctx->key;
1208
0
#ifdef GCM_FUNCREF_4BIT
1209
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212
0
                         const u8 *inp, size_t len) = ctx->ghash;
1213
0
# endif
1214
0
#endif
1215
1216
0
    mlen += len;
1217
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218
0
        return -1;
1219
0
    ctx->len.u[1] = mlen;
1220
1221
0
    mres = ctx->mres;
1222
1223
0
    if (ctx->ares) {
1224
        /* First call to decrypt finalizes GHASH(AAD) */
1225
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226
0
        if (len == 0) {
1227
0
            GCM_MUL(ctx);
1228
0
            ctx->ares = 0;
1229
0
            return 0;
1230
0
        }
1231
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232
0
        ctx->Xi.u[0] = 0;
1233
0
        ctx->Xi.u[1] = 0;
1234
0
        mres = sizeof(ctx->Xi);
1235
#else
1236
        GCM_MUL(ctx);
1237
#endif
1238
0
        ctx->ares = 0;
1239
0
    }
1240
1241
0
    if (IS_LITTLE_ENDIAN)
1242
#ifdef BSWAP4
1243
        ctr = BSWAP4(ctx->Yi.d[3]);
1244
#else
1245
0
        ctr = GETU32(ctx->Yi.c + 12);
1246
0
#endif
1247
0
    else
1248
0
        ctr = ctx->Yi.d[3];
1249
1250
0
    n = mres % 16;
1251
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253
0
        do {
1254
0
            if (n) {
1255
0
# if defined(GHASH)
1256
0
                while (n && len) {
1257
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258
0
                    --len;
1259
0
                    n = (n + 1) % 16;
1260
0
                }
1261
0
                if (n == 0) {
1262
0
                    GHASH(ctx, ctx->Xn, mres);
1263
0
                    mres = 0;
1264
0
                } else {
1265
0
                    ctx->mres = mres;
1266
0
                    return 0;
1267
0
                }
1268
# else
1269
                while (n && len) {
1270
                    u8 c = *(in++);
1271
                    *(out++) = c ^ ctx->EKi.c[n];
1272
                    ctx->Xi.c[n] ^= c;
1273
                    --len;
1274
                    n = (n + 1) % 16;
1275
                }
1276
                if (n == 0) {
1277
                    GCM_MUL(ctx);
1278
                    mres = 0;
1279
                } else {
1280
                    ctx->mres = n;
1281
                    return 0;
1282
                }
1283
# endif
1284
0
            }
1285
0
# if defined(STRICT_ALIGNMENT)
1286
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
0
                break;
1288
0
# endif
1289
0
# if defined(GHASH)
1290
0
            if (len >= 16 && mres) {
1291
0
                GHASH(ctx, ctx->Xn, mres);
1292
0
                mres = 0;
1293
0
            }
1294
0
#  if defined(GHASH_CHUNK)
1295
0
            while (len >= GHASH_CHUNK) {
1296
0
                size_t j = GHASH_CHUNK;
1297
1298
0
                GHASH(ctx, in, GHASH_CHUNK);
1299
0
                while (j) {
1300
0
                    size_t_aX *out_t = (size_t_aX *)out;
1301
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1302
1303
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304
0
                    ++ctr;
1305
0
                    if (IS_LITTLE_ENDIAN)
1306
#   ifdef BSWAP4
1307
                        ctx->Yi.d[3] = BSWAP4(ctr);
1308
#   else
1309
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1310
0
#   endif
1311
0
                    else
1312
0
                        ctx->Yi.d[3] = ctr;
1313
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315
0
                    out += 16;
1316
0
                    in += 16;
1317
0
                    j -= 16;
1318
0
                }
1319
0
                len -= GHASH_CHUNK;
1320
0
            }
1321
0
#  endif
1322
0
            if ((i = (len & (size_t)-16))) {
1323
0
                GHASH(ctx, in, i);
1324
0
                while (len >= 16) {
1325
0
                    size_t_aX *out_t = (size_t_aX *)out;
1326
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1327
1328
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329
0
                    ++ctr;
1330
0
                    if (IS_LITTLE_ENDIAN)
1331
#  ifdef BSWAP4
1332
                        ctx->Yi.d[3] = BSWAP4(ctr);
1333
#  else
1334
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1335
0
#  endif
1336
0
                    else
1337
0
                        ctx->Yi.d[3] = ctr;
1338
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340
0
                    out += 16;
1341
0
                    in += 16;
1342
0
                    len -= 16;
1343
0
                }
1344
0
            }
1345
# else
1346
            while (len >= 16) {
1347
                size_t *out_t = (size_t *)out;
1348
                const size_t *in_t = (const size_t *)in;
1349
1350
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351
                ++ctr;
1352
                if (IS_LITTLE_ENDIAN)
1353
#  ifdef BSWAP4
1354
                    ctx->Yi.d[3] = BSWAP4(ctr);
1355
#  else
1356
                    PUTU32(ctx->Yi.c + 12, ctr);
1357
#  endif
1358
                else
1359
                    ctx->Yi.d[3] = ctr;
1360
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361
                    size_t c = in_t[i];
1362
                    out_t[i] = c ^ ctx->EKi.t[i];
1363
                    ctx->Xi.t[i] ^= c;
1364
                }
1365
                GCM_MUL(ctx);
1366
                out += 16;
1367
                in += 16;
1368
                len -= 16;
1369
            }
1370
# endif
1371
0
            if (len) {
1372
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373
0
                ++ctr;
1374
0
                if (IS_LITTLE_ENDIAN)
1375
# ifdef BSWAP4
1376
                    ctx->Yi.d[3] = BSWAP4(ctr);
1377
# else
1378
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1379
0
# endif
1380
0
                else
1381
0
                    ctx->Yi.d[3] = ctr;
1382
0
# if defined(GHASH)
1383
0
                while (len--) {
1384
0
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385
0
                    ++n;
1386
0
                }
1387
# else
1388
                while (len--) {
1389
                    u8 c = in[n];
1390
                    ctx->Xi.c[n] ^= c;
1391
                    out[n] = c ^ ctx->EKi.c[n];
1392
                    ++n;
1393
                }
1394
                mres = n;
1395
# endif
1396
0
            }
1397
1398
0
            ctx->mres = mres;
1399
0
            return 0;
1400
0
        } while (0);
1401
0
    }
1402
0
#endif
1403
0
    for (i = 0; i < len; ++i) {
1404
0
        u8 c;
1405
0
        if (n == 0) {
1406
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407
0
            ++ctr;
1408
0
            if (IS_LITTLE_ENDIAN)
1409
#ifdef BSWAP4
1410
                ctx->Yi.d[3] = BSWAP4(ctr);
1411
#else
1412
0
                PUTU32(ctx->Yi.c + 12, ctr);
1413
0
#endif
1414
0
            else
1415
0
                ctx->Yi.d[3] = ctr;
1416
0
        }
1417
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419
0
        n = (n + 1) % 16;
1420
0
        if (mres == sizeof(ctx->Xn)) {
1421
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422
0
            mres = 0;
1423
0
        }
1424
#else
1425
        c = in[i];
1426
        out[i] = c ^ ctx->EKi.c[n];
1427
        ctx->Xi.c[n] ^= c;
1428
        mres = n = (n + 1) % 16;
1429
        if (n == 0)
1430
            GCM_MUL(ctx);
1431
#endif
1432
0
    }
1433
1434
0
    ctx->mres = mres;
1435
0
    return 0;
1436
0
}
1437
1438
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439
                                const unsigned char *in, unsigned char *out,
1440
                                size_t len, ctr128_f stream)
1441
0
{
1442
#if defined(OPENSSL_SMALL_FOOTPRINT)
1443
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444
#else
1445
0
    DECLARE_IS_ENDIAN;
1446
0
    unsigned int n, ctr, mres;
1447
0
    size_t i;
1448
0
    u64 mlen = ctx->len.u[1];
1449
0
    void *key = ctx->key;
1450
0
# ifdef GCM_FUNCREF_4BIT
1451
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452
0
#  ifdef GHASH
1453
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454
0
                         const u8 *inp, size_t len) = ctx->ghash;
1455
0
#  endif
1456
0
# endif
1457
1458
0
    mlen += len;
1459
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460
0
        return -1;
1461
0
    ctx->len.u[1] = mlen;
1462
1463
0
    mres = ctx->mres;
1464
1465
0
    if (ctx->ares) {
1466
        /* First call to encrypt finalizes GHASH(AAD) */
1467
0
#if defined(GHASH)
1468
0
        if (len == 0) {
1469
0
            GCM_MUL(ctx);
1470
0
            ctx->ares = 0;
1471
0
            return 0;
1472
0
        }
1473
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474
0
        ctx->Xi.u[0] = 0;
1475
0
        ctx->Xi.u[1] = 0;
1476
0
        mres = sizeof(ctx->Xi);
1477
#else
1478
        GCM_MUL(ctx);
1479
#endif
1480
0
        ctx->ares = 0;
1481
0
    }
1482
1483
0
    if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
        ctr = BSWAP4(ctx->Yi.d[3]);
1486
# else
1487
0
        ctr = GETU32(ctx->Yi.c + 12);
1488
0
# endif
1489
0
    else
1490
0
        ctr = ctx->Yi.d[3];
1491
1492
0
    n = mres % 16;
1493
0
    if (n) {
1494
0
# if defined(GHASH)
1495
0
        while (n && len) {
1496
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497
0
            --len;
1498
0
            n = (n + 1) % 16;
1499
0
        }
1500
0
        if (n == 0) {
1501
0
            GHASH(ctx, ctx->Xn, mres);
1502
0
            mres = 0;
1503
0
        } else {
1504
0
            ctx->mres = mres;
1505
0
            return 0;
1506
0
        }
1507
# else
1508
        while (n && len) {
1509
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510
            --len;
1511
            n = (n + 1) % 16;
1512
        }
1513
        if (n == 0) {
1514
            GCM_MUL(ctx);
1515
            mres = 0;
1516
        } else {
1517
            ctx->mres = n;
1518
            return 0;
1519
        }
1520
# endif
1521
0
    }
1522
0
# if defined(GHASH)
1523
0
        if (len >= 16 && mres) {
1524
0
            GHASH(ctx, ctx->Xn, mres);
1525
0
            mres = 0;
1526
0
        }
1527
0
#  if defined(GHASH_CHUNK)
1528
0
    while (len >= GHASH_CHUNK) {
1529
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530
0
        ctr += GHASH_CHUNK / 16;
1531
0
        if (IS_LITTLE_ENDIAN)
1532
#   ifdef BSWAP4
1533
            ctx->Yi.d[3] = BSWAP4(ctr);
1534
#   else
1535
0
            PUTU32(ctx->Yi.c + 12, ctr);
1536
0
#   endif
1537
0
        else
1538
0
            ctx->Yi.d[3] = ctr;
1539
0
        GHASH(ctx, out, GHASH_CHUNK);
1540
0
        out += GHASH_CHUNK;
1541
0
        in += GHASH_CHUNK;
1542
0
        len -= GHASH_CHUNK;
1543
0
    }
1544
0
#  endif
1545
0
# endif
1546
0
    if ((i = (len & (size_t)-16))) {
1547
0
        size_t j = i / 16;
1548
1549
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1550
0
        ctr += (unsigned int)j;
1551
0
        if (IS_LITTLE_ENDIAN)
1552
# ifdef BSWAP4
1553
            ctx->Yi.d[3] = BSWAP4(ctr);
1554
# else
1555
0
            PUTU32(ctx->Yi.c + 12, ctr);
1556
0
# endif
1557
0
        else
1558
0
            ctx->Yi.d[3] = ctr;
1559
0
        in += i;
1560
0
        len -= i;
1561
0
# if defined(GHASH)
1562
0
        GHASH(ctx, out, i);
1563
0
        out += i;
1564
# else
1565
        while (j--) {
1566
            for (i = 0; i < 16; ++i)
1567
                ctx->Xi.c[i] ^= out[i];
1568
            GCM_MUL(ctx);
1569
            out += 16;
1570
        }
1571
# endif
1572
0
    }
1573
0
    if (len) {
1574
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575
0
        ++ctr;
1576
0
        if (IS_LITTLE_ENDIAN)
1577
# ifdef BSWAP4
1578
            ctx->Yi.d[3] = BSWAP4(ctr);
1579
# else
1580
0
            PUTU32(ctx->Yi.c + 12, ctr);
1581
0
# endif
1582
0
        else
1583
0
            ctx->Yi.d[3] = ctr;
1584
0
        while (len--) {
1585
0
# if defined(GHASH)
1586
0
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587
# else
1588
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589
# endif
1590
0
            ++n;
1591
0
        }
1592
0
    }
1593
1594
0
    ctx->mres = mres;
1595
0
    return 0;
1596
0
#endif
1597
0
}
1598
1599
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600
                                const unsigned char *in, unsigned char *out,
1601
                                size_t len, ctr128_f stream)
1602
0
{
1603
#if defined(OPENSSL_SMALL_FOOTPRINT)
1604
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605
#else
1606
0
    DECLARE_IS_ENDIAN;
1607
0
    unsigned int n, ctr, mres;
1608
0
    size_t i;
1609
0
    u64 mlen = ctx->len.u[1];
1610
0
    void *key = ctx->key;
1611
0
# ifdef GCM_FUNCREF_4BIT
1612
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613
0
#  ifdef GHASH
1614
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615
0
                         const u8 *inp, size_t len) = ctx->ghash;
1616
0
#  endif
1617
0
# endif
1618
1619
0
    mlen += len;
1620
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621
0
        return -1;
1622
0
    ctx->len.u[1] = mlen;
1623
1624
0
    mres = ctx->mres;
1625
1626
0
    if (ctx->ares) {
1627
        /* First call to decrypt finalizes GHASH(AAD) */
1628
0
# if defined(GHASH)
1629
0
        if (len == 0) {
1630
0
            GCM_MUL(ctx);
1631
0
            ctx->ares = 0;
1632
0
            return 0;
1633
0
        }
1634
0
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635
0
        ctx->Xi.u[0] = 0;
1636
0
        ctx->Xi.u[1] = 0;
1637
0
        mres = sizeof(ctx->Xi);
1638
# else
1639
        GCM_MUL(ctx);
1640
# endif
1641
0
        ctx->ares = 0;
1642
0
    }
1643
1644
0
    if (IS_LITTLE_ENDIAN)
1645
# ifdef BSWAP4
1646
        ctr = BSWAP4(ctx->Yi.d[3]);
1647
# else
1648
0
        ctr = GETU32(ctx->Yi.c + 12);
1649
0
# endif
1650
0
    else
1651
0
        ctr = ctx->Yi.d[3];
1652
1653
0
    n = mres % 16;
1654
0
    if (n) {
1655
0
# if defined(GHASH)
1656
0
        while (n && len) {
1657
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658
0
            --len;
1659
0
            n = (n + 1) % 16;
1660
0
        }
1661
0
        if (n == 0) {
1662
0
            GHASH(ctx, ctx->Xn, mres);
1663
0
            mres = 0;
1664
0
        } else {
1665
0
            ctx->mres = mres;
1666
0
            return 0;
1667
0
        }
1668
# else
1669
        while (n && len) {
1670
            u8 c = *(in++);
1671
            *(out++) = c ^ ctx->EKi.c[n];
1672
            ctx->Xi.c[n] ^= c;
1673
            --len;
1674
            n = (n + 1) % 16;
1675
        }
1676
        if (n == 0) {
1677
            GCM_MUL(ctx);
1678
            mres = 0;
1679
        } else {
1680
            ctx->mres = n;
1681
            return 0;
1682
        }
1683
# endif
1684
0
    }
1685
0
# if defined(GHASH)
1686
0
    if (len >= 16 && mres) {
1687
0
        GHASH(ctx, ctx->Xn, mres);
1688
0
        mres = 0;
1689
0
    }
1690
0
#  if defined(GHASH_CHUNK)
1691
0
    while (len >= GHASH_CHUNK) {
1692
0
        GHASH(ctx, in, GHASH_CHUNK);
1693
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694
0
        ctr += GHASH_CHUNK / 16;
1695
0
        if (IS_LITTLE_ENDIAN)
1696
#   ifdef BSWAP4
1697
            ctx->Yi.d[3] = BSWAP4(ctr);
1698
#   else
1699
0
            PUTU32(ctx->Yi.c + 12, ctr);
1700
0
#   endif
1701
0
        else
1702
0
            ctx->Yi.d[3] = ctr;
1703
0
        out += GHASH_CHUNK;
1704
0
        in += GHASH_CHUNK;
1705
0
        len -= GHASH_CHUNK;
1706
0
    }
1707
0
#  endif
1708
0
# endif
1709
0
    if ((i = (len & (size_t)-16))) {
1710
0
        size_t j = i / 16;
1711
1712
0
# if defined(GHASH)
1713
0
        GHASH(ctx, in, i);
1714
# else
1715
        while (j--) {
1716
            size_t k;
1717
            for (k = 0; k < 16; ++k)
1718
                ctx->Xi.c[k] ^= in[k];
1719
            GCM_MUL(ctx);
1720
            in += 16;
1721
        }
1722
        j = i / 16;
1723
        in -= i;
1724
# endif
1725
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1726
0
        ctr += (unsigned int)j;
1727
0
        if (IS_LITTLE_ENDIAN)
1728
# ifdef BSWAP4
1729
            ctx->Yi.d[3] = BSWAP4(ctr);
1730
# else
1731
0
            PUTU32(ctx->Yi.c + 12, ctr);
1732
0
# endif
1733
0
        else
1734
0
            ctx->Yi.d[3] = ctr;
1735
0
        out += i;
1736
0
        in += i;
1737
0
        len -= i;
1738
0
    }
1739
0
    if (len) {
1740
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741
0
        ++ctr;
1742
0
        if (IS_LITTLE_ENDIAN)
1743
# ifdef BSWAP4
1744
            ctx->Yi.d[3] = BSWAP4(ctr);
1745
# else
1746
0
            PUTU32(ctx->Yi.c + 12, ctr);
1747
0
# endif
1748
0
        else
1749
0
            ctx->Yi.d[3] = ctr;
1750
0
        while (len--) {
1751
0
# if defined(GHASH)
1752
0
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753
# else
1754
            u8 c = in[n];
1755
            ctx->Xi.c[mres++] ^= c;
1756
            out[n] = c ^ ctx->EKi.c[n];
1757
# endif
1758
0
            ++n;
1759
0
        }
1760
0
    }
1761
1762
0
    ctx->mres = mres;
1763
0
    return 0;
1764
0
#endif
1765
0
}
1766
1767
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768
                         size_t len)
1769
0
{
1770
0
    DECLARE_IS_ENDIAN;
1771
0
    u64 alen = ctx->len.u[0] << 3;
1772
0
    u64 clen = ctx->len.u[1] << 3;
1773
0
#ifdef GCM_FUNCREF_4BIT
1774
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775
0
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777
0
                         const u8 *inp, size_t len) = ctx->ghash;
1778
0
# endif
1779
0
#endif
1780
1781
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782
0
    u128 bitlen;
1783
0
    unsigned int mres = ctx->mres;
1784
1785
0
    if (mres) {
1786
0
        unsigned blocks = (mres + 15) & -16;
1787
1788
0
        memset(ctx->Xn + mres, 0, blocks - mres);
1789
0
        mres = blocks;
1790
0
        if (mres == sizeof(ctx->Xn)) {
1791
0
            GHASH(ctx, ctx->Xn, mres);
1792
0
            mres = 0;
1793
0
        }
1794
0
    } else if (ctx->ares) {
1795
0
        GCM_MUL(ctx);
1796
0
    }
1797
#else
1798
    if (ctx->mres || ctx->ares)
1799
        GCM_MUL(ctx);
1800
#endif
1801
1802
0
    if (IS_LITTLE_ENDIAN) {
1803
#ifdef BSWAP8
1804
        alen = BSWAP8(alen);
1805
        clen = BSWAP8(clen);
1806
#else
1807
0
        u8 *p = ctx->len.c;
1808
1809
0
        ctx->len.u[0] = alen;
1810
0
        ctx->len.u[1] = clen;
1811
1812
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814
0
#endif
1815
0
    }
1816
1817
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
0
    bitlen.hi = alen;
1819
0
    bitlen.lo = clen;
1820
0
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821
0
    mres += sizeof(bitlen);
1822
0
    GHASH(ctx, ctx->Xn, mres);
1823
#else
1824
    ctx->Xi.u[0] ^= alen;
1825
    ctx->Xi.u[1] ^= clen;
1826
    GCM_MUL(ctx);
1827
#endif
1828
1829
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831
1832
0
    if (tag && len <= sizeof(ctx->Xi))
1833
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834
0
    else
1835
0
        return -1;
1836
0
}
1837
1838
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839
0
{
1840
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841
0
    memcpy(tag, ctx->Xi.c,
1842
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843
0
}
1844
1845
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846
0
{
1847
0
    GCM128_CONTEXT *ret;
1848
1849
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850
0
        CRYPTO_gcm128_init(ret, key, block);
1851
1852
0
    return ret;
1853
0
}
1854
1855
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856
0
{
1857
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858
0
}