Coverage Report

Created: 2023-06-08 06:41

/src/openssl30/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
#include <string.h>
11
#include <openssl/crypto.h>
12
#include "internal/cryptlib.h"
13
#include "internal/endian.h"
14
#include "crypto/modes.h"
15
16
#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17
typedef size_t size_t_aX __attribute((__aligned__(1)));
18
#else
19
typedef size_t size_t_aX;
20
#endif
21
22
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23
/* redefine, because alignment is ensured */
24
# undef  GETU32
25
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26
# undef  PUTU32
27
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28
#endif
29
30
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31
0
#define REDUCE1BIT(V)   do { \
32
0
        if (sizeof(size_t)==8) { \
33
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
35
0
                V.hi  = (V.hi>>1 )^T; \
36
0
        } \
37
0
        else { \
38
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
40
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41
0
        } \
42
0
} while(0)
43
44
/*-
45
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46
 * never be set to 8. 8 is effectively reserved for testing purposes.
47
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49
 * whole spectrum of possible table driven implementations. Why? In
50
 * non-"Shoup's" case memory access pattern is segmented in such manner,
51
 * that it's trivial to see that cache timing information can reveal
52
 * fair portion of intermediate hash value. Given that ciphertext is
53
 * always available to attacker, it's possible for him to attempt to
54
 * deduce secret parameter H and if successful, tamper with messages
55
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56
 * not as trivial, but there is no reason to believe that it's resistant
57
 * to cache-timing attack. And the thing about "8-bit" implementation is
58
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59
 * key + 1KB shared. Well, on pros side it should be twice as fast as
60
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61
 * was observed to run ~75% faster, closer to 100% for commercial
62
 * compilers... Yet "4-bit" procedure is preferred, because it's
63
 * believed to provide better security-performance balance and adequate
64
 * all-round performance. "All-round" refers to things like:
65
 *
66
 * - shorter setup time effectively improves overall timing for
67
 *   handling short messages;
68
 * - larger table allocation can become unbearable because of VM
69
 *   subsystem penalties (for example on Windows large enough free
70
 *   results in VM working set trimming, meaning that consequent
71
 *   malloc would immediately incur working set expansion);
72
 * - larger table has larger cache footprint, which can affect
73
 *   performance of other code paths (not necessarily even from same
74
 *   thread in Hyper-Threading world);
75
 *
76
 * Value of 1 is not appropriate for performance reasons.
77
 */
78
#if     TABLE_BITS==8
79
80
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81
{
82
    int i, j;
83
    u128 V;
84
85
    Htable[0].hi = 0;
86
    Htable[0].lo = 0;
87
    V.hi = H[0];
88
    V.lo = H[1];
89
90
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91
        REDUCE1BIT(V);
92
        Htable[i] = V;
93
    }
94
95
    for (i = 2; i < 256; i <<= 1) {
96
        u128 *Hi = Htable + i, H0 = *Hi;
97
        for (j = 1; j < i; ++j) {
98
            Hi[j].hi = H0.hi ^ Htable[j].hi;
99
            Hi[j].lo = H0.lo ^ Htable[j].lo;
100
        }
101
    }
102
}
103
104
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105
{
106
    u128 Z = { 0, 0 };
107
    const u8 *xi = (const u8 *)Xi + 15;
108
    size_t rem, n = *xi;
109
    DECLARE_IS_ENDIAN;
110
    static const size_t rem_8bit[256] = {
111
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175
    };
176
177
    while (1) {
178
        Z.hi ^= Htable[n].hi;
179
        Z.lo ^= Htable[n].lo;
180
181
        if ((u8 *)Xi == xi)
182
            break;
183
184
        n = *(--xi);
185
186
        rem = (size_t)Z.lo & 0xff;
187
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188
        Z.hi = (Z.hi >> 8);
189
        if (sizeof(size_t) == 8)
190
            Z.hi ^= rem_8bit[rem];
191
        else
192
            Z.hi ^= (u64)rem_8bit[rem] << 32;
193
    }
194
195
    if (IS_LITTLE_ENDIAN) {
196
# ifdef BSWAP8
197
        Xi[0] = BSWAP8(Z.hi);
198
        Xi[1] = BSWAP8(Z.lo);
199
# else
200
        u8 *p = (u8 *)Xi;
201
        u32 v;
202
        v = (u32)(Z.hi >> 32);
203
        PUTU32(p, v);
204
        v = (u32)(Z.hi);
205
        PUTU32(p + 4, v);
206
        v = (u32)(Z.lo >> 32);
207
        PUTU32(p + 8, v);
208
        v = (u32)(Z.lo);
209
        PUTU32(p + 12, v);
210
# endif
211
    } else {
212
        Xi[0] = Z.hi;
213
        Xi[1] = Z.lo;
214
    }
215
}
216
217
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219
#elif   TABLE_BITS==4
220
221
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222
0
{
223
0
    u128 V;
224
# if defined(OPENSSL_SMALL_FOOTPRINT)
225
    int i;
226
# endif
227
228
0
    Htable[0].hi = 0;
229
0
    Htable[0].lo = 0;
230
0
    V.hi = H[0];
231
0
    V.lo = H[1];
232
233
# if defined(OPENSSL_SMALL_FOOTPRINT)
234
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235
        REDUCE1BIT(V);
236
        Htable[i] = V;
237
    }
238
239
    for (i = 2; i < 16; i <<= 1) {
240
        u128 *Hi = Htable + i;
241
        int j;
242
        for (V = *Hi, j = 1; j < i; ++j) {
243
            Hi[j].hi = V.hi ^ Htable[j].hi;
244
            Hi[j].lo = V.lo ^ Htable[j].lo;
245
        }
246
    }
247
# else
248
0
    Htable[8] = V;
249
0
    REDUCE1BIT(V);
250
0
    Htable[4] = V;
251
0
    REDUCE1BIT(V);
252
0
    Htable[2] = V;
253
0
    REDUCE1BIT(V);
254
0
    Htable[1] = V;
255
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256
0
    V = Htable[4];
257
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260
0
    V = Htable[8];
261
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268
0
# endif
269
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270
    /*
271
     * ARM assembler expects specific dword order in Htable.
272
     */
273
    {
274
        int j;
275
        DECLARE_IS_ENDIAN;
276
277
        if (IS_LITTLE_ENDIAN)
278
            for (j = 0; j < 16; ++j) {
279
                V = Htable[j];
280
                Htable[j].hi = V.lo;
281
                Htable[j].lo = V.hi;
282
        } else
283
            for (j = 0; j < 16; ++j) {
284
                V = Htable[j];
285
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287
            }
288
    }
289
# endif
290
0
}
291
292
# ifndef GHASH_ASM
293
static const size_t rem_4bit[16] = {
294
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298
};
299
300
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301
{
302
    u128 Z;
303
    int cnt = 15;
304
    size_t rem, nlo, nhi;
305
    DECLARE_IS_ENDIAN;
306
307
    nlo = ((const u8 *)Xi)[15];
308
    nhi = nlo >> 4;
309
    nlo &= 0xf;
310
311
    Z.hi = Htable[nlo].hi;
312
    Z.lo = Htable[nlo].lo;
313
314
    while (1) {
315
        rem = (size_t)Z.lo & 0xf;
316
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317
        Z.hi = (Z.hi >> 4);
318
        if (sizeof(size_t) == 8)
319
            Z.hi ^= rem_4bit[rem];
320
        else
321
            Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323
        Z.hi ^= Htable[nhi].hi;
324
        Z.lo ^= Htable[nhi].lo;
325
326
        if (--cnt < 0)
327
            break;
328
329
        nlo = ((const u8 *)Xi)[cnt];
330
        nhi = nlo >> 4;
331
        nlo &= 0xf;
332
333
        rem = (size_t)Z.lo & 0xf;
334
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335
        Z.hi = (Z.hi >> 4);
336
        if (sizeof(size_t) == 8)
337
            Z.hi ^= rem_4bit[rem];
338
        else
339
            Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341
        Z.hi ^= Htable[nlo].hi;
342
        Z.lo ^= Htable[nlo].lo;
343
    }
344
345
    if (IS_LITTLE_ENDIAN) {
346
#  ifdef BSWAP8
347
        Xi[0] = BSWAP8(Z.hi);
348
        Xi[1] = BSWAP8(Z.lo);
349
#  else
350
        u8 *p = (u8 *)Xi;
351
        u32 v;
352
        v = (u32)(Z.hi >> 32);
353
        PUTU32(p, v);
354
        v = (u32)(Z.hi);
355
        PUTU32(p + 4, v);
356
        v = (u32)(Z.lo >> 32);
357
        PUTU32(p + 8, v);
358
        v = (u32)(Z.lo);
359
        PUTU32(p + 12, v);
360
#  endif
361
    } else {
362
        Xi[0] = Z.hi;
363
        Xi[1] = Z.lo;
364
    }
365
}
366
367
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368
/*
369
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370
 * details... Compiler-generated code doesn't seem to give any
371
 * performance improvement, at least not on x86[_64]. It's here
372
 * mostly as reference and a placeholder for possible future
373
 * non-trivial optimization[s]...
374
 */
375
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376
                           const u8 *inp, size_t len)
377
{
378
    u128 Z;
379
    int cnt;
380
    size_t rem, nlo, nhi;
381
    DECLARE_IS_ENDIAN;
382
383
#   if 1
384
    do {
385
        cnt = 15;
386
        nlo = ((const u8 *)Xi)[15];
387
        nlo ^= inp[15];
388
        nhi = nlo >> 4;
389
        nlo &= 0xf;
390
391
        Z.hi = Htable[nlo].hi;
392
        Z.lo = Htable[nlo].lo;
393
394
        while (1) {
395
            rem = (size_t)Z.lo & 0xf;
396
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397
            Z.hi = (Z.hi >> 4);
398
            if (sizeof(size_t) == 8)
399
                Z.hi ^= rem_4bit[rem];
400
            else
401
                Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403
            Z.hi ^= Htable[nhi].hi;
404
            Z.lo ^= Htable[nhi].lo;
405
406
            if (--cnt < 0)
407
                break;
408
409
            nlo = ((const u8 *)Xi)[cnt];
410
            nlo ^= inp[cnt];
411
            nhi = nlo >> 4;
412
            nlo &= 0xf;
413
414
            rem = (size_t)Z.lo & 0xf;
415
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416
            Z.hi = (Z.hi >> 4);
417
            if (sizeof(size_t) == 8)
418
                Z.hi ^= rem_4bit[rem];
419
            else
420
                Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422
            Z.hi ^= Htable[nlo].hi;
423
            Z.lo ^= Htable[nlo].lo;
424
        }
425
#   else
426
    /*
427
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428
     * [should] give ~50% improvement... One could have PACK()-ed
429
     * the rem_8bit even here, but the priority is to minimize
430
     * cache footprint...
431
     */
432
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434
    static const unsigned short rem_8bit[256] = {
435
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467
    };
468
    /*
469
     * This pre-processing phase slows down procedure by approximately
470
     * same time as it makes each loop spin faster. In other words
471
     * single block performance is approximately same as straightforward
472
     * "4-bit" implementation, and then it goes only faster...
473
     */
474
    for (cnt = 0; cnt < 16; ++cnt) {
475
        Z.hi = Htable[cnt].hi;
476
        Z.lo = Htable[cnt].lo;
477
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478
        Hshr4[cnt].hi = (Z.hi >> 4);
479
        Hshl4[cnt] = (u8)(Z.lo << 4);
480
    }
481
482
    do {
483
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484
            nlo = ((const u8 *)Xi)[cnt];
485
            nlo ^= inp[cnt];
486
            nhi = nlo >> 4;
487
            nlo &= 0xf;
488
489
            Z.hi ^= Htable[nlo].hi;
490
            Z.lo ^= Htable[nlo].lo;
491
492
            rem = (size_t)Z.lo & 0xff;
493
494
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495
            Z.hi = (Z.hi >> 8);
496
497
            Z.hi ^= Hshr4[nhi].hi;
498
            Z.lo ^= Hshr4[nhi].lo;
499
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500
        }
501
502
        nlo = ((const u8 *)Xi)[0];
503
        nlo ^= inp[0];
504
        nhi = nlo >> 4;
505
        nlo &= 0xf;
506
507
        Z.hi ^= Htable[nlo].hi;
508
        Z.lo ^= Htable[nlo].lo;
509
510
        rem = (size_t)Z.lo & 0xf;
511
512
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513
        Z.hi = (Z.hi >> 4);
514
515
        Z.hi ^= Htable[nhi].hi;
516
        Z.lo ^= Htable[nhi].lo;
517
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518
#   endif
519
520
        if (IS_LITTLE_ENDIAN) {
521
#   ifdef BSWAP8
522
            Xi[0] = BSWAP8(Z.hi);
523
            Xi[1] = BSWAP8(Z.lo);
524
#   else
525
            u8 *p = (u8 *)Xi;
526
            u32 v;
527
            v = (u32)(Z.hi >> 32);
528
            PUTU32(p, v);
529
            v = (u32)(Z.hi);
530
            PUTU32(p + 4, v);
531
            v = (u32)(Z.lo >> 32);
532
            PUTU32(p + 8, v);
533
            v = (u32)(Z.lo);
534
            PUTU32(p + 12, v);
535
#   endif
536
        } else {
537
            Xi[0] = Z.hi;
538
            Xi[1] = Z.lo;
539
        }
540
    } while (inp += 16, len -= 16);
541
}
542
#  endif
543
# else
544
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546
                    size_t len);
547
# endif
548
549
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552
/*
553
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554
 * effect. In other words idea is to hash data while it's still in L1 cache
555
 * after encryption pass...
556
 */
557
465
#  define GHASH_CHUNK       (3*1024)
558
# endif
559
560
#else                           /* TABLE_BITS */
561
562
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563
{
564
    u128 V, Z = { 0, 0 };
565
    long X;
566
    int i, j;
567
    const long *xi = (const long *)Xi;
568
    DECLARE_IS_ENDIAN;
569
570
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571
    V.lo = H[1];
572
573
    for (j = 0; j < 16 / sizeof(long); ++j) {
574
        if (IS_LITTLE_ENDIAN) {
575
            if (sizeof(long) == 8) {
576
# ifdef BSWAP8
577
                X = (long)(BSWAP8(xi[j]));
578
# else
579
                const u8 *p = (const u8 *)(xi + j);
580
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581
# endif
582
            } else {
583
                const u8 *p = (const u8 *)(xi + j);
584
                X = (long)GETU32(p);
585
            }
586
        } else
587
            X = xi[j];
588
589
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591
            Z.hi ^= V.hi & M;
592
            Z.lo ^= V.lo & M;
593
594
            REDUCE1BIT(V);
595
        }
596
    }
597
598
    if (IS_LITTLE_ENDIAN) {
599
# ifdef BSWAP8
600
        Xi[0] = BSWAP8(Z.hi);
601
        Xi[1] = BSWAP8(Z.lo);
602
# else
603
        u8 *p = (u8 *)Xi;
604
        u32 v;
605
        v = (u32)(Z.hi >> 32);
606
        PUTU32(p, v);
607
        v = (u32)(Z.hi);
608
        PUTU32(p + 4, v);
609
        v = (u32)(Z.lo >> 32);
610
        PUTU32(p + 8, v);
611
        v = (u32)(Z.lo);
612
        PUTU32(p + 12, v);
613
# endif
614
    } else {
615
        Xi[0] = Z.hi;
616
        Xi[1] = Z.lo;
617
    }
618
}
619
620
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622
#endif
623
624
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625
# if    !defined(I386_ONLY) && \
626
        (defined(__i386)        || defined(__i386__)    || \
627
         defined(__x86_64)      || defined(__x86_64__)  || \
628
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629
#  define GHASH_ASM_X86_OR_64
630
#  define GCM_FUNCREF_4BIT
631
632
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635
                     size_t len);
636
637
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638
#   define gcm_init_avx   gcm_init_clmul
639
#   define gcm_gmult_avx  gcm_gmult_clmul
640
#   define gcm_ghash_avx  gcm_ghash_clmul
641
#  else
642
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645
                   size_t len);
646
#  endif
647
648
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649
#   define GHASH_ASM_X86
650
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652
                        size_t len);
653
654
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656
                        size_t len);
657
#  endif
658
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659
#  include "arm_arch.h"
660
#  if __ARM_MAX_ARCH__>=7
661
#   define GHASH_ASM_ARM
662
#   define GCM_FUNCREF_4BIT
663
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664
#   if defined(__arm__) || defined(__arm)
665
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666
#   endif
667
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670
                    size_t len);
671
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674
                  size_t len);
675
#  endif
676
# elif defined(__sparc__) || defined(__sparc)
677
#  include "crypto/sparc_arch.h"
678
#  define GHASH_ASM_SPARC
679
#  define GCM_FUNCREF_4BIT
680
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683
                    size_t len);
684
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685
#  include "crypto/ppc_arch.h"
686
#  define GHASH_ASM_PPC
687
#  define GCM_FUNCREF_4BIT
688
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691
                  size_t len);
692
# endif
693
#endif
694
695
#ifdef GCM_FUNCREF_4BIT
696
# undef  GCM_MUL
697
20
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698
# ifdef GHASH
699
#  undef  GHASH
700
878
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701
# endif
702
#endif
703
704
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705
307
{
706
307
    DECLARE_IS_ENDIAN;
707
708
307
    memset(ctx, 0, sizeof(*ctx));
709
307
    ctx->block = block;
710
307
    ctx->key = key;
711
712
307
    (*block) (ctx->H.c, ctx->H.c, key);
713
714
307
    if (IS_LITTLE_ENDIAN) {
715
        /* H is stored in host byte order */
716
#ifdef BSWAP8
717
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719
#else
720
307
        u8 *p = ctx->H.c;
721
307
        u64 hi, lo;
722
307
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723
307
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724
307
        ctx->H.u[0] = hi;
725
307
        ctx->H.u[1] = lo;
726
307
#endif
727
307
    }
728
#if     TABLE_BITS==8
729
    gcm_init_8bit(ctx->Htable, ctx->H.u);
730
#elif   TABLE_BITS==4
731
307
# if    defined(GHASH)
732
307
#  define CTX__GHASH(f) (ctx->ghash = (f))
733
# else
734
#  define CTX__GHASH(f) (ctx->ghash = NULL)
735
# endif
736
307
# if    defined(GHASH_ASM_X86_OR_64)
737
307
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738
307
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739
307
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740
307
            gcm_init_avx(ctx->Htable, ctx->H.u);
741
307
            ctx->gmult = gcm_gmult_avx;
742
307
            CTX__GHASH(gcm_ghash_avx);
743
307
        } else {
744
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
745
0
            ctx->gmult = gcm_gmult_clmul;
746
0
            CTX__GHASH(gcm_ghash_clmul);
747
0
        }
748
307
        return;
749
307
    }
750
0
#  endif
751
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
752
#  if   defined(GHASH_ASM_X86)  /* x86 only */
753
#   if  defined(OPENSSL_IA32_SSE2)
754
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755
#   else
756
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757
#   endif
758
        ctx->gmult = gcm_gmult_4bit_mmx;
759
        CTX__GHASH(gcm_ghash_4bit_mmx);
760
    } else {
761
        ctx->gmult = gcm_gmult_4bit_x86;
762
        CTX__GHASH(gcm_ghash_4bit_x86);
763
    }
764
#  else
765
0
    ctx->gmult = gcm_gmult_4bit;
766
0
    CTX__GHASH(gcm_ghash_4bit);
767
0
#  endif
768
# elif  defined(GHASH_ASM_ARM)
769
#  ifdef PMULL_CAPABLE
770
    if (PMULL_CAPABLE) {
771
        gcm_init_v8(ctx->Htable, ctx->H.u);
772
        ctx->gmult = gcm_gmult_v8;
773
        CTX__GHASH(gcm_ghash_v8);
774
    } else
775
#  endif
776
#  ifdef NEON_CAPABLE
777
    if (NEON_CAPABLE) {
778
        gcm_init_neon(ctx->Htable, ctx->H.u);
779
        ctx->gmult = gcm_gmult_neon;
780
        CTX__GHASH(gcm_ghash_neon);
781
    } else
782
#  endif
783
    {
784
        gcm_init_4bit(ctx->Htable, ctx->H.u);
785
        ctx->gmult = gcm_gmult_4bit;
786
        CTX__GHASH(gcm_ghash_4bit);
787
    }
788
# elif  defined(GHASH_ASM_SPARC)
789
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790
        gcm_init_vis3(ctx->Htable, ctx->H.u);
791
        ctx->gmult = gcm_gmult_vis3;
792
        CTX__GHASH(gcm_ghash_vis3);
793
    } else {
794
        gcm_init_4bit(ctx->Htable, ctx->H.u);
795
        ctx->gmult = gcm_gmult_4bit;
796
        CTX__GHASH(gcm_ghash_4bit);
797
    }
798
# elif  defined(GHASH_ASM_PPC)
799
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800
        gcm_init_p8(ctx->Htable, ctx->H.u);
801
        ctx->gmult = gcm_gmult_p8;
802
        CTX__GHASH(gcm_ghash_p8);
803
    } else {
804
        gcm_init_4bit(ctx->Htable, ctx->H.u);
805
        ctx->gmult = gcm_gmult_4bit;
806
        CTX__GHASH(gcm_ghash_4bit);
807
    }
808
# else
809
    gcm_init_4bit(ctx->Htable, ctx->H.u);
810
# endif
811
0
# undef CTX__GHASH
812
0
#endif
813
0
}
814
815
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816
                         size_t len)
817
349
{
818
349
    DECLARE_IS_ENDIAN;
819
349
    unsigned int ctr;
820
349
#ifdef GCM_FUNCREF_4BIT
821
349
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822
349
#endif
823
824
349
    ctx->len.u[0] = 0;          /* AAD length */
825
349
    ctx->len.u[1] = 0;          /* message length */
826
349
    ctx->ares = 0;
827
349
    ctx->mres = 0;
828
829
349
    if (len == 12) {
830
349
        memcpy(ctx->Yi.c, iv, 12);
831
349
        ctx->Yi.c[12] = 0;
832
349
        ctx->Yi.c[13] = 0;
833
349
        ctx->Yi.c[14] = 0;
834
349
        ctx->Yi.c[15] = 1;
835
349
        ctr = 1;
836
349
    } else {
837
0
        size_t i;
838
0
        u64 len0 = len;
839
840
        /* Borrow ctx->Xi to calculate initial Yi */
841
0
        ctx->Xi.u[0] = 0;
842
0
        ctx->Xi.u[1] = 0;
843
844
0
        while (len >= 16) {
845
0
            for (i = 0; i < 16; ++i)
846
0
                ctx->Xi.c[i] ^= iv[i];
847
0
            GCM_MUL(ctx);
848
0
            iv += 16;
849
0
            len -= 16;
850
0
        }
851
0
        if (len) {
852
0
            for (i = 0; i < len; ++i)
853
0
                ctx->Xi.c[i] ^= iv[i];
854
0
            GCM_MUL(ctx);
855
0
        }
856
0
        len0 <<= 3;
857
0
        if (IS_LITTLE_ENDIAN) {
858
#ifdef BSWAP8
859
            ctx->Xi.u[1] ^= BSWAP8(len0);
860
#else
861
0
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862
0
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863
0
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864
0
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865
0
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866
0
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867
0
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868
0
            ctx->Xi.c[15] ^= (u8)(len0);
869
0
#endif
870
0
        } else {
871
0
            ctx->Xi.u[1] ^= len0;
872
0
        }
873
874
0
        GCM_MUL(ctx);
875
876
0
        if (IS_LITTLE_ENDIAN)
877
#ifdef BSWAP4
878
            ctr = BSWAP4(ctx->Xi.d[3]);
879
#else
880
0
            ctr = GETU32(ctx->Xi.c + 12);
881
0
#endif
882
0
        else
883
0
            ctr = ctx->Xi.d[3];
884
885
        /* Copy borrowed Xi to Yi */
886
0
        ctx->Yi.u[0] = ctx->Xi.u[0];
887
0
        ctx->Yi.u[1] = ctx->Xi.u[1];
888
0
    }
889
890
349
    ctx->Xi.u[0] = 0;
891
349
    ctx->Xi.u[1] = 0;
892
893
349
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894
349
    ++ctr;
895
349
    if (IS_LITTLE_ENDIAN)
896
#ifdef BSWAP4
897
        ctx->Yi.d[3] = BSWAP4(ctr);
898
#else
899
349
        PUTU32(ctx->Yi.c + 12, ctr);
900
0
#endif
901
0
    else
902
0
        ctx->Yi.d[3] = ctr;
903
349
}
904
905
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906
                      size_t len)
907
349
{
908
349
    size_t i;
909
349
    unsigned int n;
910
349
    u64 alen = ctx->len.u[0];
911
349
#ifdef GCM_FUNCREF_4BIT
912
349
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913
349
# ifdef GHASH
914
349
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915
349
                         const u8 *inp, size_t len) = ctx->ghash;
916
349
# endif
917
349
#endif
918
919
349
    if (ctx->len.u[1])
920
0
        return -2;
921
922
349
    alen += len;
923
349
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924
0
        return -1;
925
349
    ctx->len.u[0] = alen;
926
927
349
    n = ctx->ares;
928
349
    if (n) {
929
0
        while (n && len) {
930
0
            ctx->Xi.c[n] ^= *(aad++);
931
0
            --len;
932
0
            n = (n + 1) % 16;
933
0
        }
934
0
        if (n == 0)
935
0
            GCM_MUL(ctx);
936
0
        else {
937
0
            ctx->ares = n;
938
0
            return 0;
939
0
        }
940
0
    }
941
349
#ifdef GHASH
942
349
    if ((i = (len & (size_t)-16))) {
943
0
        GHASH(ctx, aad, i);
944
0
        aad += i;
945
0
        len -= i;
946
0
    }
947
#else
948
    while (len >= 16) {
949
        for (i = 0; i < 16; ++i)
950
            ctx->Xi.c[i] ^= aad[i];
951
        GCM_MUL(ctx);
952
        aad += 16;
953
        len -= 16;
954
    }
955
#endif
956
349
    if (len) {
957
349
        n = (unsigned int)len;
958
4.72k
        for (i = 0; i < len; ++i)
959
4.37k
            ctx->Xi.c[i] ^= aad[i];
960
349
    }
961
962
349
    ctx->ares = n;
963
349
    return 0;
964
349
}
965
966
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967
                          const unsigned char *in, unsigned char *out,
968
                          size_t len)
969
118
{
970
118
    DECLARE_IS_ENDIAN;
971
118
    unsigned int n, ctr, mres;
972
118
    size_t i;
973
118
    u64 mlen = ctx->len.u[1];
974
118
    block128_f block = ctx->block;
975
118
    void *key = ctx->key;
976
118
#ifdef GCM_FUNCREF_4BIT
977
118
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978
118
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979
118
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980
118
                         const u8 *inp, size_t len) = ctx->ghash;
981
118
# endif
982
118
#endif
983
984
118
    mlen += len;
985
118
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986
0
        return -1;
987
118
    ctx->len.u[1] = mlen;
988
989
118
    mres = ctx->mres;
990
991
118
    if (ctx->ares) {
992
        /* First call to encrypt finalizes GHASH(AAD) */
993
118
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994
118
        if (len == 0) {
995
0
            GCM_MUL(ctx);
996
0
            ctx->ares = 0;
997
0
            return 0;
998
0
        }
999
118
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000
118
        ctx->Xi.u[0] = 0;
1001
118
        ctx->Xi.u[1] = 0;
1002
118
        mres = sizeof(ctx->Xi);
1003
#else
1004
        GCM_MUL(ctx);
1005
#endif
1006
118
        ctx->ares = 0;
1007
118
    }
1008
1009
118
    if (IS_LITTLE_ENDIAN)
1010
#ifdef BSWAP4
1011
        ctr = BSWAP4(ctx->Yi.d[3]);
1012
#else
1013
118
        ctr = GETU32(ctx->Yi.c + 12);
1014
0
#endif
1015
0
    else
1016
0
        ctr = ctx->Yi.d[3];
1017
1018
118
    n = mres % 16;
1019
118
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020
118
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021
118
        do {
1022
118
            if (n) {
1023
0
# if defined(GHASH)
1024
0
                while (n && len) {
1025
0
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026
0
                    --len;
1027
0
                    n = (n + 1) % 16;
1028
0
                }
1029
0
                if (n == 0) {
1030
0
                    GHASH(ctx, ctx->Xn, mres);
1031
0
                    mres = 0;
1032
0
                } else {
1033
0
                    ctx->mres = mres;
1034
0
                    return 0;
1035
0
                }
1036
# else
1037
                while (n && len) {
1038
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039
                    --len;
1040
                    n = (n + 1) % 16;
1041
                }
1042
                if (n == 0) {
1043
                    GCM_MUL(ctx);
1044
                    mres = 0;
1045
                } else {
1046
                    ctx->mres = n;
1047
                    return 0;
1048
                }
1049
# endif
1050
0
            }
1051
118
# if defined(STRICT_ALIGNMENT)
1052
118
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053
0
                break;
1054
118
# endif
1055
118
# if defined(GHASH)
1056
118
            if (len >= 16 && mres) {
1057
61
                GHASH(ctx, ctx->Xn, mres);
1058
61
                mres = 0;
1059
61
            }
1060
118
#  if defined(GHASH_CHUNK)
1061
118
            while (len >= GHASH_CHUNK) {
1062
0
                size_t j = GHASH_CHUNK;
1063
1064
0
                while (j) {
1065
0
                    size_t_aX *out_t = (size_t_aX *)out;
1066
0
                    const size_t_aX *in_t = (const size_t_aX *)in;
1067
1068
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069
0
                    ++ctr;
1070
0
                    if (IS_LITTLE_ENDIAN)
1071
#   ifdef BSWAP4
1072
                        ctx->Yi.d[3] = BSWAP4(ctr);
1073
#   else
1074
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1075
0
#   endif
1076
0
                    else
1077
0
                        ctx->Yi.d[3] = ctr;
1078
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080
0
                    out += 16;
1081
0
                    in += 16;
1082
0
                    j -= 16;
1083
0
                }
1084
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085
0
                len -= GHASH_CHUNK;
1086
0
            }
1087
118
#  endif
1088
118
            if ((i = (len & (size_t)-16))) {
1089
61
                size_t j = i;
1090
1091
122
                while (len >= 16) {
1092
61
                    size_t_aX *out_t = (size_t_aX *)out;
1093
61
                    const size_t_aX *in_t = (const size_t_aX *)in;
1094
1095
61
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096
61
                    ++ctr;
1097
61
                    if (IS_LITTLE_ENDIAN)
1098
#  ifdef BSWAP4
1099
                        ctx->Yi.d[3] = BSWAP4(ctr);
1100
#  else
1101
61
                        PUTU32(ctx->Yi.c + 12, ctr);
1102
0
#  endif
1103
0
                    else
1104
0
                        ctx->Yi.d[3] = ctr;
1105
183
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106
122
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107
61
                    out += 16;
1108
61
                    in += 16;
1109
61
                    len -= 16;
1110
61
                }
1111
61
                GHASH(ctx, out - j, j);
1112
61
            }
1113
# else
1114
            while (len >= 16) {
1115
                size_t *out_t = (size_t *)out;
1116
                const size_t *in_t = (const size_t *)in;
1117
1118
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119
                ++ctr;
1120
                if (IS_LITTLE_ENDIAN)
1121
#  ifdef BSWAP4
1122
                    ctx->Yi.d[3] = BSWAP4(ctr);
1123
#  else
1124
                    PUTU32(ctx->Yi.c + 12, ctr);
1125
#  endif
1126
                else
1127
                    ctx->Yi.d[3] = ctr;
1128
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130
                GCM_MUL(ctx);
1131
                out += 16;
1132
                in += 16;
1133
                len -= 16;
1134
            }
1135
# endif
1136
118
            if (len) {
1137
57
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
57
                ++ctr;
1139
57
                if (IS_LITTLE_ENDIAN)
1140
# ifdef BSWAP4
1141
                    ctx->Yi.d[3] = BSWAP4(ctr);
1142
# else
1143
57
                    PUTU32(ctx->Yi.c + 12, ctr);
1144
0
# endif
1145
0
                else
1146
0
                    ctx->Yi.d[3] = ctr;
1147
57
# if defined(GHASH)
1148
171
                while (len--) {
1149
114
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150
114
                    ++n;
1151
114
                }
1152
# else
1153
                while (len--) {
1154
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155
                    ++n;
1156
                }
1157
                mres = n;
1158
# endif
1159
57
            }
1160
1161
118
            ctx->mres = mres;
1162
118
            return 0;
1163
118
        } while (0);
1164
118
    }
1165
0
#endif
1166
0
    for (i = 0; i < len; ++i) {
1167
0
        if (n == 0) {
1168
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169
0
            ++ctr;
1170
0
            if (IS_LITTLE_ENDIAN)
1171
#ifdef BSWAP4
1172
                ctx->Yi.d[3] = BSWAP4(ctr);
1173
#else
1174
0
                PUTU32(ctx->Yi.c + 12, ctr);
1175
0
#endif
1176
0
            else
1177
0
                ctx->Yi.d[3] = ctr;
1178
0
        }
1179
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180
0
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181
0
        n = (n + 1) % 16;
1182
0
        if (mres == sizeof(ctx->Xn)) {
1183
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184
0
            mres = 0;
1185
0
        }
1186
#else
1187
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188
        mres = n = (n + 1) % 16;
1189
        if (n == 0)
1190
            GCM_MUL(ctx);
1191
#endif
1192
0
    }
1193
1194
0
    ctx->mres = mres;
1195
0
    return 0;
1196
118
}
1197
1198
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199
                          const unsigned char *in, unsigned char *out,
1200
                          size_t len)
1201
65
{
1202
65
    DECLARE_IS_ENDIAN;
1203
65
    unsigned int n, ctr, mres;
1204
65
    size_t i;
1205
65
    u64 mlen = ctx->len.u[1];
1206
65
    block128_f block = ctx->block;
1207
65
    void *key = ctx->key;
1208
65
#ifdef GCM_FUNCREF_4BIT
1209
65
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210
65
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211
65
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212
65
                         const u8 *inp, size_t len) = ctx->ghash;
1213
65
# endif
1214
65
#endif
1215
1216
65
    mlen += len;
1217
65
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218
0
        return -1;
1219
65
    ctx->len.u[1] = mlen;
1220
1221
65
    mres = ctx->mres;
1222
1223
65
    if (ctx->ares) {
1224
        /* First call to decrypt finalizes GHASH(AAD) */
1225
65
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226
65
        if (len == 0) {
1227
14
            GCM_MUL(ctx);
1228
14
            ctx->ares = 0;
1229
14
            return 0;
1230
14
        }
1231
51
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232
51
        ctx->Xi.u[0] = 0;
1233
51
        ctx->Xi.u[1] = 0;
1234
51
        mres = sizeof(ctx->Xi);
1235
#else
1236
        GCM_MUL(ctx);
1237
#endif
1238
51
        ctx->ares = 0;
1239
51
    }
1240
1241
51
    if (IS_LITTLE_ENDIAN)
1242
#ifdef BSWAP4
1243
        ctr = BSWAP4(ctx->Yi.d[3]);
1244
#else
1245
51
        ctr = GETU32(ctx->Yi.c + 12);
1246
0
#endif
1247
0
    else
1248
0
        ctr = ctx->Yi.d[3];
1249
1250
51
    n = mres % 16;
1251
51
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252
51
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253
51
        do {
1254
51
            if (n) {
1255
0
# if defined(GHASH)
1256
0
                while (n && len) {
1257
0
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258
0
                    --len;
1259
0
                    n = (n + 1) % 16;
1260
0
                }
1261
0
                if (n == 0) {
1262
0
                    GHASH(ctx, ctx->Xn, mres);
1263
0
                    mres = 0;
1264
0
                } else {
1265
0
                    ctx->mres = mres;
1266
0
                    return 0;
1267
0
                }
1268
# else
1269
                while (n && len) {
1270
                    u8 c = *(in++);
1271
                    *(out++) = c ^ ctx->EKi.c[n];
1272
                    ctx->Xi.c[n] ^= c;
1273
                    --len;
1274
                    n = (n + 1) % 16;
1275
                }
1276
                if (n == 0) {
1277
                    GCM_MUL(ctx);
1278
                    mres = 0;
1279
                } else {
1280
                    ctx->mres = n;
1281
                    return 0;
1282
                }
1283
# endif
1284
0
            }
1285
51
# if defined(STRICT_ALIGNMENT)
1286
51
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
0
                break;
1288
51
# endif
1289
51
# if defined(GHASH)
1290
51
            if (len >= 16 && mres) {
1291
44
                GHASH(ctx, ctx->Xn, mres);
1292
44
                mres = 0;
1293
44
            }
1294
51
#  if defined(GHASH_CHUNK)
1295
92
            while (len >= GHASH_CHUNK) {
1296
41
                size_t j = GHASH_CHUNK;
1297
1298
41
                GHASH(ctx, in, GHASH_CHUNK);
1299
7.91k
                while (j) {
1300
7.87k
                    size_t_aX *out_t = (size_t_aX *)out;
1301
7.87k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1302
1303
7.87k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304
7.87k
                    ++ctr;
1305
7.87k
                    if (IS_LITTLE_ENDIAN)
1306
#   ifdef BSWAP4
1307
                        ctx->Yi.d[3] = BSWAP4(ctr);
1308
#   else
1309
7.87k
                        PUTU32(ctx->Yi.c + 12, ctr);
1310
0
#   endif
1311
0
                    else
1312
0
                        ctx->Yi.d[3] = ctr;
1313
23.6k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314
15.7k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315
7.87k
                    out += 16;
1316
7.87k
                    in += 16;
1317
7.87k
                    j -= 16;
1318
7.87k
                }
1319
41
                len -= GHASH_CHUNK;
1320
41
            }
1321
51
#  endif
1322
51
            if ((i = (len & (size_t)-16))) {
1323
43
                GHASH(ctx, in, i);
1324
2.24k
                while (len >= 16) {
1325
2.20k
                    size_t_aX *out_t = (size_t_aX *)out;
1326
2.20k
                    const size_t_aX *in_t = (const size_t_aX *)in;
1327
1328
2.20k
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329
2.20k
                    ++ctr;
1330
2.20k
                    if (IS_LITTLE_ENDIAN)
1331
#  ifdef BSWAP4
1332
                        ctx->Yi.d[3] = BSWAP4(ctr);
1333
#  else
1334
2.20k
                        PUTU32(ctx->Yi.c + 12, ctr);
1335
0
#  endif
1336
0
                    else
1337
0
                        ctx->Yi.d[3] = ctr;
1338
6.60k
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339
4.40k
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340
2.20k
                    out += 16;
1341
2.20k
                    in += 16;
1342
2.20k
                    len -= 16;
1343
2.20k
                }
1344
43
            }
1345
# else
1346
            while (len >= 16) {
1347
                size_t *out_t = (size_t *)out;
1348
                const size_t *in_t = (const size_t *)in;
1349
1350
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351
                ++ctr;
1352
                if (IS_LITTLE_ENDIAN)
1353
#  ifdef BSWAP4
1354
                    ctx->Yi.d[3] = BSWAP4(ctr);
1355
#  else
1356
                    PUTU32(ctx->Yi.c + 12, ctr);
1357
#  endif
1358
                else
1359
                    ctx->Yi.d[3] = ctr;
1360
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361
                    size_t c = in_t[i];
1362
                    out_t[i] = c ^ ctx->EKi.t[i];
1363
                    ctx->Xi.t[i] ^= c;
1364
                }
1365
                GCM_MUL(ctx);
1366
                out += 16;
1367
                in += 16;
1368
                len -= 16;
1369
            }
1370
# endif
1371
51
            if (len) {
1372
41
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373
41
                ++ctr;
1374
41
                if (IS_LITTLE_ENDIAN)
1375
# ifdef BSWAP4
1376
                    ctx->Yi.d[3] = BSWAP4(ctr);
1377
# else
1378
41
                    PUTU32(ctx->Yi.c + 12, ctr);
1379
0
# endif
1380
0
                else
1381
0
                    ctx->Yi.d[3] = ctr;
1382
41
# if defined(GHASH)
1383
375
                while (len--) {
1384
334
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385
334
                    ++n;
1386
334
                }
1387
# else
1388
                while (len--) {
1389
                    u8 c = in[n];
1390
                    ctx->Xi.c[n] ^= c;
1391
                    out[n] = c ^ ctx->EKi.c[n];
1392
                    ++n;
1393
                }
1394
                mres = n;
1395
# endif
1396
41
            }
1397
1398
51
            ctx->mres = mres;
1399
51
            return 0;
1400
51
        } while (0);
1401
51
    }
1402
0
#endif
1403
0
    for (i = 0; i < len; ++i) {
1404
0
        u8 c;
1405
0
        if (n == 0) {
1406
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407
0
            ++ctr;
1408
0
            if (IS_LITTLE_ENDIAN)
1409
#ifdef BSWAP4
1410
                ctx->Yi.d[3] = BSWAP4(ctr);
1411
#else
1412
0
                PUTU32(ctx->Yi.c + 12, ctr);
1413
0
#endif
1414
0
            else
1415
0
                ctx->Yi.d[3] = ctr;
1416
0
        }
1417
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418
0
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419
0
        n = (n + 1) % 16;
1420
0
        if (mres == sizeof(ctx->Xn)) {
1421
0
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422
0
            mres = 0;
1423
0
        }
1424
#else
1425
        c = in[i];
1426
        out[i] = c ^ ctx->EKi.c[n];
1427
        ctx->Xi.c[n] ^= c;
1428
        mres = n = (n + 1) % 16;
1429
        if (n == 0)
1430
            GCM_MUL(ctx);
1431
#endif
1432
0
    }
1433
1434
0
    ctx->mres = mres;
1435
0
    return 0;
1436
51
}
1437
1438
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439
                                const unsigned char *in, unsigned char *out,
1440
                                size_t len, ctr128_f stream)
1441
149
{
1442
#if defined(OPENSSL_SMALL_FOOTPRINT)
1443
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444
#else
1445
149
    DECLARE_IS_ENDIAN;
1446
149
    unsigned int n, ctr, mres;
1447
149
    size_t i;
1448
149
    u64 mlen = ctx->len.u[1];
1449
149
    void *key = ctx->key;
1450
149
# ifdef GCM_FUNCREF_4BIT
1451
149
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452
149
#  ifdef GHASH
1453
149
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454
149
                         const u8 *inp, size_t len) = ctx->ghash;
1455
149
#  endif
1456
149
# endif
1457
1458
149
    mlen += len;
1459
149
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460
0
        return -1;
1461
149
    ctx->len.u[1] = mlen;
1462
1463
149
    mres = ctx->mres;
1464
1465
149
    if (ctx->ares) {
1466
        /* First call to encrypt finalizes GHASH(AAD) */
1467
149
#if defined(GHASH)
1468
149
        if (len == 0) {
1469
0
            GCM_MUL(ctx);
1470
0
            ctx->ares = 0;
1471
0
            return 0;
1472
0
        }
1473
149
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474
149
        ctx->Xi.u[0] = 0;
1475
149
        ctx->Xi.u[1] = 0;
1476
149
        mres = sizeof(ctx->Xi);
1477
#else
1478
        GCM_MUL(ctx);
1479
#endif
1480
149
        ctx->ares = 0;
1481
149
    }
1482
1483
149
    if (IS_LITTLE_ENDIAN)
1484
# ifdef BSWAP4
1485
        ctr = BSWAP4(ctx->Yi.d[3]);
1486
# else
1487
149
        ctr = GETU32(ctx->Yi.c + 12);
1488
0
# endif
1489
0
    else
1490
0
        ctr = ctx->Yi.d[3];
1491
1492
149
    n = mres % 16;
1493
149
    if (n) {
1494
0
# if defined(GHASH)
1495
0
        while (n && len) {
1496
0
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497
0
            --len;
1498
0
            n = (n + 1) % 16;
1499
0
        }
1500
0
        if (n == 0) {
1501
0
            GHASH(ctx, ctx->Xn, mres);
1502
0
            mres = 0;
1503
0
        } else {
1504
0
            ctx->mres = mres;
1505
0
            return 0;
1506
0
        }
1507
# else
1508
        while (n && len) {
1509
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510
            --len;
1511
            n = (n + 1) % 16;
1512
        }
1513
        if (n == 0) {
1514
            GCM_MUL(ctx);
1515
            mres = 0;
1516
        } else {
1517
            ctx->mres = n;
1518
            return 0;
1519
        }
1520
# endif
1521
0
    }
1522
149
# if defined(GHASH)
1523
149
        if (len >= 16 && mres) {
1524
135
            GHASH(ctx, ctx->Xn, mres);
1525
135
            mres = 0;
1526
135
        }
1527
149
#  if defined(GHASH_CHUNK)
1528
149
    while (len >= GHASH_CHUNK) {
1529
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530
0
        ctr += GHASH_CHUNK / 16;
1531
0
        if (IS_LITTLE_ENDIAN)
1532
#   ifdef BSWAP4
1533
            ctx->Yi.d[3] = BSWAP4(ctr);
1534
#   else
1535
0
            PUTU32(ctx->Yi.c + 12, ctr);
1536
0
#   endif
1537
0
        else
1538
0
            ctx->Yi.d[3] = ctr;
1539
0
        GHASH(ctx, out, GHASH_CHUNK);
1540
0
        out += GHASH_CHUNK;
1541
0
        in += GHASH_CHUNK;
1542
0
        len -= GHASH_CHUNK;
1543
0
    }
1544
149
#  endif
1545
149
# endif
1546
149
    if ((i = (len & (size_t)-16))) {
1547
135
        size_t j = i / 16;
1548
1549
135
        (*stream) (in, out, j, key, ctx->Yi.c);
1550
135
        ctr += (unsigned int)j;
1551
135
        if (IS_LITTLE_ENDIAN)
1552
# ifdef BSWAP4
1553
            ctx->Yi.d[3] = BSWAP4(ctr);
1554
# else
1555
135
            PUTU32(ctx->Yi.c + 12, ctr);
1556
0
# endif
1557
0
        else
1558
0
            ctx->Yi.d[3] = ctr;
1559
135
        in += i;
1560
135
        len -= i;
1561
135
# if defined(GHASH)
1562
135
        GHASH(ctx, out, i);
1563
135
        out += i;
1564
# else
1565
        while (j--) {
1566
            for (i = 0; i < 16; ++i)
1567
                ctx->Xi.c[i] ^= out[i];
1568
            GCM_MUL(ctx);
1569
            out += 16;
1570
        }
1571
# endif
1572
135
    }
1573
149
    if (len) {
1574
14
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575
14
        ++ctr;
1576
14
        if (IS_LITTLE_ENDIAN)
1577
# ifdef BSWAP4
1578
            ctx->Yi.d[3] = BSWAP4(ctr);
1579
# else
1580
14
            PUTU32(ctx->Yi.c + 12, ctr);
1581
0
# endif
1582
0
        else
1583
0
            ctx->Yi.d[3] = ctr;
1584
42
        while (len--) {
1585
28
# if defined(GHASH)
1586
28
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587
# else
1588
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589
# endif
1590
28
            ++n;
1591
28
        }
1592
14
    }
1593
1594
149
    ctx->mres = mres;
1595
149
    return 0;
1596
149
#endif
1597
149
}
1598
1599
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600
                                const unsigned char *in, unsigned char *out,
1601
                                size_t len, ctr128_f stream)
1602
30
{
1603
#if defined(OPENSSL_SMALL_FOOTPRINT)
1604
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605
#else
1606
30
    DECLARE_IS_ENDIAN;
1607
30
    unsigned int n, ctr, mres;
1608
30
    size_t i;
1609
30
    u64 mlen = ctx->len.u[1];
1610
30
    void *key = ctx->key;
1611
30
# ifdef GCM_FUNCREF_4BIT
1612
30
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613
30
#  ifdef GHASH
1614
30
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615
30
                         const u8 *inp, size_t len) = ctx->ghash;
1616
30
#  endif
1617
30
# endif
1618
1619
30
    mlen += len;
1620
30
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621
0
        return -1;
1622
30
    ctx->len.u[1] = mlen;
1623
1624
30
    mres = ctx->mres;
1625
1626
30
    if (ctx->ares) {
1627
        /* First call to decrypt finalizes GHASH(AAD) */
1628
17
# if defined(GHASH)
1629
17
        if (len == 0) {
1630
6
            GCM_MUL(ctx);
1631
6
            ctx->ares = 0;
1632
6
            return 0;
1633
6
        }
1634
11
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635
11
        ctx->Xi.u[0] = 0;
1636
11
        ctx->Xi.u[1] = 0;
1637
11
        mres = sizeof(ctx->Xi);
1638
# else
1639
        GCM_MUL(ctx);
1640
# endif
1641
11
        ctx->ares = 0;
1642
11
    }
1643
1644
24
    if (IS_LITTLE_ENDIAN)
1645
# ifdef BSWAP4
1646
        ctr = BSWAP4(ctx->Yi.d[3]);
1647
# else
1648
24
        ctr = GETU32(ctx->Yi.c + 12);
1649
0
# endif
1650
0
    else
1651
0
        ctr = ctx->Yi.d[3];
1652
1653
24
    n = mres % 16;
1654
24
    if (n) {
1655
0
# if defined(GHASH)
1656
0
        while (n && len) {
1657
0
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658
0
            --len;
1659
0
            n = (n + 1) % 16;
1660
0
        }
1661
0
        if (n == 0) {
1662
0
            GHASH(ctx, ctx->Xn, mres);
1663
0
            mres = 0;
1664
0
        } else {
1665
0
            ctx->mres = mres;
1666
0
            return 0;
1667
0
        }
1668
# else
1669
        while (n && len) {
1670
            u8 c = *(in++);
1671
            *(out++) = c ^ ctx->EKi.c[n];
1672
            ctx->Xi.c[n] ^= c;
1673
            --len;
1674
            n = (n + 1) % 16;
1675
        }
1676
        if (n == 0) {
1677
            GCM_MUL(ctx);
1678
            mres = 0;
1679
        } else {
1680
            ctx->mres = n;
1681
            return 0;
1682
        }
1683
# endif
1684
0
    }
1685
24
# if defined(GHASH)
1686
24
    if (len >= 16 && mres) {
1687
0
        GHASH(ctx, ctx->Xn, mres);
1688
0
        mres = 0;
1689
0
    }
1690
24
#  if defined(GHASH_CHUNK)
1691
24
    while (len >= GHASH_CHUNK) {
1692
0
        GHASH(ctx, in, GHASH_CHUNK);
1693
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694
0
        ctr += GHASH_CHUNK / 16;
1695
0
        if (IS_LITTLE_ENDIAN)
1696
#   ifdef BSWAP4
1697
            ctx->Yi.d[3] = BSWAP4(ctr);
1698
#   else
1699
0
            PUTU32(ctx->Yi.c + 12, ctr);
1700
0
#   endif
1701
0
        else
1702
0
            ctx->Yi.d[3] = ctr;
1703
0
        out += GHASH_CHUNK;
1704
0
        in += GHASH_CHUNK;
1705
0
        len -= GHASH_CHUNK;
1706
0
    }
1707
24
#  endif
1708
24
# endif
1709
24
    if ((i = (len & (size_t)-16))) {
1710
9
        size_t j = i / 16;
1711
1712
9
# if defined(GHASH)
1713
9
        GHASH(ctx, in, i);
1714
# else
1715
        while (j--) {
1716
            size_t k;
1717
            for (k = 0; k < 16; ++k)
1718
                ctx->Xi.c[k] ^= in[k];
1719
            GCM_MUL(ctx);
1720
            in += 16;
1721
        }
1722
        j = i / 16;
1723
        in -= i;
1724
# endif
1725
9
        (*stream) (in, out, j, key, ctx->Yi.c);
1726
9
        ctr += (unsigned int)j;
1727
9
        if (IS_LITTLE_ENDIAN)
1728
# ifdef BSWAP4
1729
            ctx->Yi.d[3] = BSWAP4(ctr);
1730
# else
1731
9
            PUTU32(ctx->Yi.c + 12, ctr);
1732
0
# endif
1733
0
        else
1734
0
            ctx->Yi.d[3] = ctr;
1735
9
        out += i;
1736
9
        in += i;
1737
9
        len -= i;
1738
9
    }
1739
24
    if (len) {
1740
21
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741
21
        ++ctr;
1742
21
        if (IS_LITTLE_ENDIAN)
1743
# ifdef BSWAP4
1744
            ctx->Yi.d[3] = BSWAP4(ctr);
1745
# else
1746
21
            PUTU32(ctx->Yi.c + 12, ctr);
1747
0
# endif
1748
0
        else
1749
0
            ctx->Yi.d[3] = ctr;
1750
172
        while (len--) {
1751
151
# if defined(GHASH)
1752
151
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753
# else
1754
            u8 c = in[n];
1755
            ctx->Xi.c[mres++] ^= c;
1756
            out[n] = c ^ ctx->EKi.c[n];
1757
# endif
1758
151
            ++n;
1759
151
        }
1760
21
    }
1761
1762
24
    ctx->mres = mres;
1763
24
    return 0;
1764
24
#endif
1765
24
}
1766
1767
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768
                         size_t len)
1769
349
{
1770
349
    DECLARE_IS_ENDIAN;
1771
349
    u64 alen = ctx->len.u[0] << 3;
1772
349
    u64 clen = ctx->len.u[1] << 3;
1773
349
#ifdef GCM_FUNCREF_4BIT
1774
349
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775
349
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776
349
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777
349
                         const u8 *inp, size_t len) = ctx->ghash;
1778
349
# endif
1779
349
#endif
1780
1781
349
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782
349
    u128 bitlen;
1783
349
    unsigned int mres = ctx->mres;
1784
1785
349
    if (mres) {
1786
133
        unsigned blocks = (mres + 15) & -16;
1787
1788
133
        memset(ctx->Xn + mres, 0, blocks - mres);
1789
133
        mres = blocks;
1790
133
        if (mres == sizeof(ctx->Xn)) {
1791
0
            GHASH(ctx, ctx->Xn, mres);
1792
0
            mres = 0;
1793
0
        }
1794
216
    } else if (ctx->ares) {
1795
0
        GCM_MUL(ctx);
1796
0
    }
1797
#else
1798
    if (ctx->mres || ctx->ares)
1799
        GCM_MUL(ctx);
1800
#endif
1801
1802
349
    if (IS_LITTLE_ENDIAN) {
1803
#ifdef BSWAP8
1804
        alen = BSWAP8(alen);
1805
        clen = BSWAP8(clen);
1806
#else
1807
349
        u8 *p = ctx->len.c;
1808
1809
349
        ctx->len.u[0] = alen;
1810
349
        ctx->len.u[1] = clen;
1811
1812
349
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813
349
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814
349
#endif
1815
349
    }
1816
1817
349
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818
349
    bitlen.hi = alen;
1819
349
    bitlen.lo = clen;
1820
349
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821
349
    mres += sizeof(bitlen);
1822
349
    GHASH(ctx, ctx->Xn, mres);
1823
#else
1824
    ctx->Xi.u[0] ^= alen;
1825
    ctx->Xi.u[1] ^= clen;
1826
    GCM_MUL(ctx);
1827
#endif
1828
1829
349
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830
349
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831
1832
349
    if (tag && len <= sizeof(ctx->Xi))
1833
82
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834
267
    else
1835
267
        return -1;
1836
349
}
1837
1838
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839
267
{
1840
267
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841
267
    memcpy(tag, ctx->Xi.c,
1842
267
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843
267
}
1844
1845
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846
0
{
1847
0
    GCM128_CONTEXT *ret;
1848
1849
0
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850
0
        CRYPTO_gcm128_init(ret, key, block);
1851
1852
0
    return ret;
1853
0
}
1854
1855
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856
0
{
1857
0
    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858
0
}