Coverage Report

Created: 2022-11-30 06:20

/src/openssl/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/* ====================================================================
2
 * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 *
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in
13
 *    the documentation and/or other materials provided with the
14
 *    distribution.
15
 *
16
 * 3. All advertising materials mentioning features or use of this
17
 *    software must display the following acknowledgment:
18
 *    "This product includes software developed by the OpenSSL Project
19
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20
 *
21
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22
 *    endorse or promote products derived from this software without
23
 *    prior written permission. For written permission, please contact
24
 *    openssl-core@openssl.org.
25
 *
26
 * 5. Products derived from this software may not be called "OpenSSL"
27
 *    nor may "OpenSSL" appear in their names without prior written
28
 *    permission of the OpenSSL Project.
29
 *
30
 * 6. Redistributions of any form whatsoever must retain the following
31
 *    acknowledgment:
32
 *    "This product includes software developed by the OpenSSL Project
33
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34
 *
35
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46
 * OF THE POSSIBILITY OF SUCH DAMAGE.
47
 * ====================================================================
48
 */
49
50
#define OPENSSL_FIPSAPI
51
52
#include <openssl/crypto.h>
53
#include "modes_lcl.h"
54
#include <string.h>
55
56
#ifndef MODES_DEBUG
57
# ifndef NDEBUG
58
#  define NDEBUG
59
# endif
60
#endif
61
#include <assert.h>
62
63
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64
/* redefine, because alignment is ensured */
65
# undef  GETU32
66
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
67
# undef  PUTU32
68
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69
#endif
70
71
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72
0
#define REDUCE1BIT(V)   do { \
73
0
        if (sizeof(size_t)==8) { \
74
0
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
76
0
                V.hi  = (V.hi>>1 )^T; \
77
0
        } \
78
0
        else { \
79
0
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80
0
                V.lo  = (V.hi<<63)|(V.lo>>1); \
81
0
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
82
0
        } \
83
0
} while(0)
84
85
/*-
86
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87
 * never be set to 8. 8 is effectively reserved for testing purposes.
88
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90
 * whole spectrum of possible table driven implementations. Why? In
91
 * non-"Shoup's" case memory access pattern is segmented in such manner,
92
 * that it's trivial to see that cache timing information can reveal
93
 * fair portion of intermediate hash value. Given that ciphertext is
94
 * always available to attacker, it's possible for him to attempt to
95
 * deduce secret parameter H and if successful, tamper with messages
96
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97
 * not as trivial, but there is no reason to believe that it's resistant
98
 * to cache-timing attack. And the thing about "8-bit" implementation is
99
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100
 * key + 1KB shared. Well, on pros side it should be twice as fast as
101
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102
 * was observed to run ~75% faster, closer to 100% for commercial
103
 * compilers... Yet "4-bit" procedure is preferred, because it's
104
 * believed to provide better security-performance balance and adequate
105
 * all-round performance. "All-round" refers to things like:
106
 *
107
 * - shorter setup time effectively improves overall timing for
108
 *   handling short messages;
109
 * - larger table allocation can become unbearable because of VM
110
 *   subsystem penalties (for example on Windows large enough free
111
 *   results in VM working set trimming, meaning that consequent
112
 *   malloc would immediately incur working set expansion);
113
 * - larger table has larger cache footprint, which can affect
114
 *   performance of other code paths (not necessarily even from same
115
 *   thread in Hyper-Threading world);
116
 *
117
 * Value of 1 is not appropriate for performance reasons.
118
 */
119
#if     TABLE_BITS==8
120
121
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122
{
123
    int i, j;
124
    u128 V;
125
126
    Htable[0].hi = 0;
127
    Htable[0].lo = 0;
128
    V.hi = H[0];
129
    V.lo = H[1];
130
131
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
132
        REDUCE1BIT(V);
133
        Htable[i] = V;
134
    }
135
136
    for (i = 2; i < 256; i <<= 1) {
137
        u128 *Hi = Htable + i, H0 = *Hi;
138
        for (j = 1; j < i; ++j) {
139
            Hi[j].hi = H0.hi ^ Htable[j].hi;
140
            Hi[j].lo = H0.lo ^ Htable[j].lo;
141
        }
142
    }
143
}
144
145
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146
{
147
    u128 Z = { 0, 0 };
148
    const u8 *xi = (const u8 *)Xi + 15;
149
    size_t rem, n = *xi;
150
    const union {
151
        long one;
152
        char little;
153
    } is_endian = {
154
        1
155
    };
156
    static const size_t rem_8bit[256] = {
157
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
158
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
159
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
160
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
161
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
162
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
163
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
164
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
165
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
166
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
167
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
168
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
169
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
170
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
171
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
172
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
173
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
174
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
175
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
176
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
177
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
178
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
179
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
180
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
181
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
182
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
183
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
184
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
185
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
186
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
187
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
188
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
189
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
190
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
191
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
192
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
193
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
194
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
195
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
196
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
197
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
198
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
199
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
200
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
201
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
202
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
203
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
204
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
205
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
206
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
207
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
208
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
209
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
210
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
211
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
212
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
213
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
214
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
215
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
216
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
217
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
218
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
219
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
220
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
221
    };
222
223
    while (1) {
224
        Z.hi ^= Htable[n].hi;
225
        Z.lo ^= Htable[n].lo;
226
227
        if ((u8 *)Xi == xi)
228
            break;
229
230
        n = *(--xi);
231
232
        rem = (size_t)Z.lo & 0xff;
233
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
234
        Z.hi = (Z.hi >> 8);
235
        if (sizeof(size_t) == 8)
236
            Z.hi ^= rem_8bit[rem];
237
        else
238
            Z.hi ^= (u64)rem_8bit[rem] << 32;
239
    }
240
241
    if (is_endian.little) {
242
# ifdef BSWAP8
243
        Xi[0] = BSWAP8(Z.hi);
244
        Xi[1] = BSWAP8(Z.lo);
245
# else
246
        u8 *p = (u8 *)Xi;
247
        u32 v;
248
        v = (u32)(Z.hi >> 32);
249
        PUTU32(p, v);
250
        v = (u32)(Z.hi);
251
        PUTU32(p + 4, v);
252
        v = (u32)(Z.lo >> 32);
253
        PUTU32(p + 8, v);
254
        v = (u32)(Z.lo);
255
        PUTU32(p + 12, v);
256
# endif
257
    } else {
258
        Xi[0] = Z.hi;
259
        Xi[1] = Z.lo;
260
    }
261
}
262
263
# define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
264
265
#elif   TABLE_BITS==4
266
267
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
268
0
{
269
0
    u128 V;
270
# if defined(OPENSSL_SMALL_FOOTPRINT)
271
    int i;
272
# endif
273
274
0
    Htable[0].hi = 0;
275
0
    Htable[0].lo = 0;
276
0
    V.hi = H[0];
277
0
    V.lo = H[1];
278
279
# if defined(OPENSSL_SMALL_FOOTPRINT)
280
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281
        REDUCE1BIT(V);
282
        Htable[i] = V;
283
    }
284
285
    for (i = 2; i < 16; i <<= 1) {
286
        u128 *Hi = Htable + i;
287
        int j;
288
        for (V = *Hi, j = 1; j < i; ++j) {
289
            Hi[j].hi = V.hi ^ Htable[j].hi;
290
            Hi[j].lo = V.lo ^ Htable[j].lo;
291
        }
292
    }
293
# else
294
0
    Htable[8] = V;
295
0
    REDUCE1BIT(V);
296
0
    Htable[4] = V;
297
0
    REDUCE1BIT(V);
298
0
    Htable[2] = V;
299
0
    REDUCE1BIT(V);
300
0
    Htable[1] = V;
301
0
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
302
0
    V = Htable[4];
303
0
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
304
0
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
305
0
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
306
0
    V = Htable[8];
307
0
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
308
0
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
309
0
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
310
0
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
311
0
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
312
0
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
313
0
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
314
0
# endif
315
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
316
    /*
317
     * ARM assembler expects specific dword order in Htable.
318
     */
319
    {
320
        int j;
321
        const union {
322
            long one;
323
            char little;
324
        } is_endian = {
325
            1
326
        };
327
328
        if (is_endian.little)
329
            for (j = 0; j < 16; ++j) {
330
                V = Htable[j];
331
                Htable[j].hi = V.lo;
332
                Htable[j].lo = V.hi;
333
        } else
334
            for (j = 0; j < 16; ++j) {
335
                V = Htable[j];
336
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
337
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
338
            }
339
    }
340
# endif
341
0
}
342
343
# ifndef GHASH_ASM
344
static const size_t rem_4bit[16] = {
345
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
346
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
347
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
348
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
349
};
350
351
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
352
{
353
    u128 Z;
354
    int cnt = 15;
355
    size_t rem, nlo, nhi;
356
    const union {
357
        long one;
358
        char little;
359
    } is_endian = {
360
        1
361
    };
362
363
    nlo = ((const u8 *)Xi)[15];
364
    nhi = nlo >> 4;
365
    nlo &= 0xf;
366
367
    Z.hi = Htable[nlo].hi;
368
    Z.lo = Htable[nlo].lo;
369
370
    while (1) {
371
        rem = (size_t)Z.lo & 0xf;
372
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
373
        Z.hi = (Z.hi >> 4);
374
        if (sizeof(size_t) == 8)
375
            Z.hi ^= rem_4bit[rem];
376
        else
377
            Z.hi ^= (u64)rem_4bit[rem] << 32;
378
379
        Z.hi ^= Htable[nhi].hi;
380
        Z.lo ^= Htable[nhi].lo;
381
382
        if (--cnt < 0)
383
            break;
384
385
        nlo = ((const u8 *)Xi)[cnt];
386
        nhi = nlo >> 4;
387
        nlo &= 0xf;
388
389
        rem = (size_t)Z.lo & 0xf;
390
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
391
        Z.hi = (Z.hi >> 4);
392
        if (sizeof(size_t) == 8)
393
            Z.hi ^= rem_4bit[rem];
394
        else
395
            Z.hi ^= (u64)rem_4bit[rem] << 32;
396
397
        Z.hi ^= Htable[nlo].hi;
398
        Z.lo ^= Htable[nlo].lo;
399
    }
400
401
    if (is_endian.little) {
402
#  ifdef BSWAP8
403
        Xi[0] = BSWAP8(Z.hi);
404
        Xi[1] = BSWAP8(Z.lo);
405
#  else
406
        u8 *p = (u8 *)Xi;
407
        u32 v;
408
        v = (u32)(Z.hi >> 32);
409
        PUTU32(p, v);
410
        v = (u32)(Z.hi);
411
        PUTU32(p + 4, v);
412
        v = (u32)(Z.lo >> 32);
413
        PUTU32(p + 8, v);
414
        v = (u32)(Z.lo);
415
        PUTU32(p + 12, v);
416
#  endif
417
    } else {
418
        Xi[0] = Z.hi;
419
        Xi[1] = Z.lo;
420
    }
421
}
422
423
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
424
/*
425
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
426
 * details... Compiler-generated code doesn't seem to give any
427
 * performance improvement, at least not on x86[_64]. It's here
428
 * mostly as reference and a placeholder for possible future
429
 * non-trivial optimization[s]...
430
 */
431
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
432
                           const u8 *inp, size_t len)
433
{
434
    u128 Z;
435
    int cnt;
436
    size_t rem, nlo, nhi;
437
    const union {
438
        long one;
439
        char little;
440
    } is_endian = {
441
        1
442
    };
443
444
#   if 1
445
    do {
446
        cnt = 15;
447
        nlo = ((const u8 *)Xi)[15];
448
        nlo ^= inp[15];
449
        nhi = nlo >> 4;
450
        nlo &= 0xf;
451
452
        Z.hi = Htable[nlo].hi;
453
        Z.lo = Htable[nlo].lo;
454
455
        while (1) {
456
            rem = (size_t)Z.lo & 0xf;
457
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
458
            Z.hi = (Z.hi >> 4);
459
            if (sizeof(size_t) == 8)
460
                Z.hi ^= rem_4bit[rem];
461
            else
462
                Z.hi ^= (u64)rem_4bit[rem] << 32;
463
464
            Z.hi ^= Htable[nhi].hi;
465
            Z.lo ^= Htable[nhi].lo;
466
467
            if (--cnt < 0)
468
                break;
469
470
            nlo = ((const u8 *)Xi)[cnt];
471
            nlo ^= inp[cnt];
472
            nhi = nlo >> 4;
473
            nlo &= 0xf;
474
475
            rem = (size_t)Z.lo & 0xf;
476
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
477
            Z.hi = (Z.hi >> 4);
478
            if (sizeof(size_t) == 8)
479
                Z.hi ^= rem_4bit[rem];
480
            else
481
                Z.hi ^= (u64)rem_4bit[rem] << 32;
482
483
            Z.hi ^= Htable[nlo].hi;
484
            Z.lo ^= Htable[nlo].lo;
485
        }
486
#   else
487
    /*
488
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
489
     * [should] give ~50% improvement... One could have PACK()-ed
490
     * the rem_8bit even here, but the priority is to minimize
491
     * cache footprint...
492
     */
493
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
494
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
495
    static const unsigned short rem_8bit[256] = {
496
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
497
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
498
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
499
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
500
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
501
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
502
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
503
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
504
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
505
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
506
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
507
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
508
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
509
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
510
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
511
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
512
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
513
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
514
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
515
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
516
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
517
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
518
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
519
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
520
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
521
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
522
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
523
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
524
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
525
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
526
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
527
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
528
    };
529
    /*
530
     * This pre-processing phase slows down procedure by approximately
531
     * same time as it makes each loop spin faster. In other words
532
     * single block performance is approximately same as straightforward
533
     * "4-bit" implementation, and then it goes only faster...
534
     */
535
    for (cnt = 0; cnt < 16; ++cnt) {
536
        Z.hi = Htable[cnt].hi;
537
        Z.lo = Htable[cnt].lo;
538
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
539
        Hshr4[cnt].hi = (Z.hi >> 4);
540
        Hshl4[cnt] = (u8)(Z.lo << 4);
541
    }
542
543
    do {
544
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
545
            nlo = ((const u8 *)Xi)[cnt];
546
            nlo ^= inp[cnt];
547
            nhi = nlo >> 4;
548
            nlo &= 0xf;
549
550
            Z.hi ^= Htable[nlo].hi;
551
            Z.lo ^= Htable[nlo].lo;
552
553
            rem = (size_t)Z.lo & 0xff;
554
555
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
556
            Z.hi = (Z.hi >> 8);
557
558
            Z.hi ^= Hshr4[nhi].hi;
559
            Z.lo ^= Hshr4[nhi].lo;
560
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
561
        }
562
563
        nlo = ((const u8 *)Xi)[0];
564
        nlo ^= inp[0];
565
        nhi = nlo >> 4;
566
        nlo &= 0xf;
567
568
        Z.hi ^= Htable[nlo].hi;
569
        Z.lo ^= Htable[nlo].lo;
570
571
        rem = (size_t)Z.lo & 0xf;
572
573
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
574
        Z.hi = (Z.hi >> 4);
575
576
        Z.hi ^= Htable[nhi].hi;
577
        Z.lo ^= Htable[nhi].lo;
578
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
579
#   endif
580
581
        if (is_endian.little) {
582
#   ifdef BSWAP8
583
            Xi[0] = BSWAP8(Z.hi);
584
            Xi[1] = BSWAP8(Z.lo);
585
#   else
586
            u8 *p = (u8 *)Xi;
587
            u32 v;
588
            v = (u32)(Z.hi >> 32);
589
            PUTU32(p, v);
590
            v = (u32)(Z.hi);
591
            PUTU32(p + 4, v);
592
            v = (u32)(Z.lo >> 32);
593
            PUTU32(p + 8, v);
594
            v = (u32)(Z.lo);
595
            PUTU32(p + 12, v);
596
#   endif
597
        } else {
598
            Xi[0] = Z.hi;
599
            Xi[1] = Z.lo;
600
        }
601
    } while (inp += 16, len -= 16);
602
}
603
#  endif
604
# else
605
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
606
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
607
                    size_t len);
608
# endif
609
610
# define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
611
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
612
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
613
/*
614
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
615
 * effect. In other words idea is to hash data while it's still in L1 cache
616
 * after encryption pass...
617
 */
618
0
#  define GHASH_CHUNK       (3*1024)
619
# endif
620
621
#else                           /* TABLE_BITS */
622
623
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
624
{
625
    u128 V, Z = { 0, 0 };
626
    long X;
627
    int i, j;
628
    const long *xi = (const long *)Xi;
629
    const union {
630
        long one;
631
        char little;
632
    } is_endian = {
633
        1
634
    };
635
636
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
637
    V.lo = H[1];
638
639
    for (j = 0; j < 16 / sizeof(long); ++j) {
640
        if (is_endian.little) {
641
            if (sizeof(long) == 8) {
642
# ifdef BSWAP8
643
                X = (long)(BSWAP8(xi[j]));
644
# else
645
                const u8 *p = (const u8 *)(xi + j);
646
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
647
# endif
648
            } else {
649
                const u8 *p = (const u8 *)(xi + j);
650
                X = (long)GETU32(p);
651
            }
652
        } else
653
            X = xi[j];
654
655
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
656
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
657
            Z.hi ^= V.hi & M;
658
            Z.lo ^= V.lo & M;
659
660
            REDUCE1BIT(V);
661
        }
662
    }
663
664
    if (is_endian.little) {
665
# ifdef BSWAP8
666
        Xi[0] = BSWAP8(Z.hi);
667
        Xi[1] = BSWAP8(Z.lo);
668
# else
669
        u8 *p = (u8 *)Xi;
670
        u32 v;
671
        v = (u32)(Z.hi >> 32);
672
        PUTU32(p, v);
673
        v = (u32)(Z.hi);
674
        PUTU32(p + 4, v);
675
        v = (u32)(Z.lo >> 32);
676
        PUTU32(p + 8, v);
677
        v = (u32)(Z.lo);
678
        PUTU32(p + 12, v);
679
# endif
680
    } else {
681
        Xi[0] = Z.hi;
682
        Xi[1] = Z.lo;
683
    }
684
}
685
686
# define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
687
688
#endif
689
690
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
691
# if    !defined(I386_ONLY) && \
692
        (defined(__i386)        || defined(__i386__)    || \
693
         defined(__x86_64)      || defined(__x86_64__)  || \
694
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
695
#  define GHASH_ASM_X86_OR_64
696
#  define GCM_FUNCREF_4BIT
697
extern unsigned int OPENSSL_ia32cap_P[];
698
699
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
700
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
701
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702
                     size_t len);
703
704
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
705
#   define gcm_init_avx   gcm_init_clmul
706
#   define gcm_gmult_avx  gcm_gmult_clmul
707
#   define gcm_ghash_avx  gcm_ghash_clmul
708
#  else
709
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
710
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
711
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
712
                   size_t len);
713
#  endif
714
715
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
716
#   define GHASH_ASM_X86
717
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
718
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
719
                        size_t len);
720
721
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
722
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
723
                        size_t len);
724
#  endif
725
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
726
#  include "arm_arch.h"
727
#  if __ARM_MAX_ARCH__>=7
728
#   define GHASH_ASM_ARM
729
#   define GCM_FUNCREF_4BIT
730
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
731
#   if defined(__arm__) || defined(__arm)
732
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
733
#   endif
734
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
735
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
736
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
737
                    size_t len);
738
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
739
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
740
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
741
                  size_t len);
742
#  endif
743
# elif defined(__sparc__) || defined(__sparc)
744
#  include "sparc_arch.h"
745
#  define GHASH_ASM_SPARC
746
#  define GCM_FUNCREF_4BIT
747
extern unsigned int OPENSSL_sparcv9cap_P[];
748
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
749
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
750
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
751
                    size_t len);
752
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
753
#  include "ppc_arch.h"
754
#  define GHASH_ASM_PPC
755
#  define GCM_FUNCREF_4BIT
756
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
757
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
758
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
759
                  size_t len);
760
# endif
761
#endif
762
763
#ifdef GCM_FUNCREF_4BIT
764
# undef  GCM_MUL
765
0
# define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
766
# ifdef GHASH
767
#  undef  GHASH
768
0
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
769
# endif
770
#endif
771
772
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
773
0
{
774
0
    const union {
775
0
        long one;
776
0
        char little;
777
0
    } is_endian = {
778
0
        1
779
0
    };
780
781
0
    memset(ctx, 0, sizeof(*ctx));
782
0
    ctx->block = block;
783
0
    ctx->key = key;
784
785
0
    (*block) (ctx->H.c, ctx->H.c, key);
786
787
0
    if (is_endian.little) {
788
        /* H is stored in host byte order */
789
#ifdef BSWAP8
790
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
791
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
792
#else
793
0
        u8 *p = ctx->H.c;
794
0
        u64 hi, lo;
795
0
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
796
0
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
797
0
        ctx->H.u[0] = hi;
798
0
        ctx->H.u[1] = lo;
799
0
#endif
800
0
    }
801
#if     TABLE_BITS==8
802
    gcm_init_8bit(ctx->Htable, ctx->H.u);
803
#elif   TABLE_BITS==4
804
0
# if    defined(GHASH_ASM_X86_OR_64)
805
0
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
806
0
    if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
807
0
        OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
808
0
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
809
0
            gcm_init_avx(ctx->Htable, ctx->H.u);
810
0
            ctx->gmult = gcm_gmult_avx;
811
0
            ctx->ghash = gcm_ghash_avx;
812
0
        } else {
813
0
            gcm_init_clmul(ctx->Htable, ctx->H.u);
814
0
            ctx->gmult = gcm_gmult_clmul;
815
0
            ctx->ghash = gcm_ghash_clmul;
816
0
        }
817
0
        return;
818
0
    }
819
0
#  endif
820
0
    gcm_init_4bit(ctx->Htable, ctx->H.u);
821
#  if   defined(GHASH_ASM_X86)  /* x86 only */
822
#   if  defined(OPENSSL_IA32_SSE2)
823
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
824
#   else
825
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
826
#   endif
827
        ctx->gmult = gcm_gmult_4bit_mmx;
828
        ctx->ghash = gcm_ghash_4bit_mmx;
829
    } else {
830
        ctx->gmult = gcm_gmult_4bit_x86;
831
        ctx->ghash = gcm_ghash_4bit_x86;
832
    }
833
#  else
834
0
    ctx->gmult = gcm_gmult_4bit;
835
0
    ctx->ghash = gcm_ghash_4bit;
836
0
#  endif
837
# elif  defined(GHASH_ASM_ARM)
838
#  ifdef PMULL_CAPABLE
839
    if (PMULL_CAPABLE) {
840
        gcm_init_v8(ctx->Htable, ctx->H.u);
841
        ctx->gmult = gcm_gmult_v8;
842
        ctx->ghash = gcm_ghash_v8;
843
    } else
844
#  endif
845
#  ifdef NEON_CAPABLE
846
    if (NEON_CAPABLE) {
847
        gcm_init_neon(ctx->Htable, ctx->H.u);
848
        ctx->gmult = gcm_gmult_neon;
849
        ctx->ghash = gcm_ghash_neon;
850
    } else
851
#  endif
852
    {
853
        gcm_init_4bit(ctx->Htable, ctx->H.u);
854
        ctx->gmult = gcm_gmult_4bit;
855
#  if defined(GHASH)
856
        ctx->ghash = gcm_ghash_4bit;
857
#  else
858
        ctx->ghash = NULL;
859
#  endif
860
    }
861
# elif  defined(GHASH_ASM_SPARC)
862
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
863
        gcm_init_vis3(ctx->Htable, ctx->H.u);
864
        ctx->gmult = gcm_gmult_vis3;
865
        ctx->ghash = gcm_ghash_vis3;
866
    } else {
867
        gcm_init_4bit(ctx->Htable, ctx->H.u);
868
        ctx->gmult = gcm_gmult_4bit;
869
        ctx->ghash = gcm_ghash_4bit;
870
    }
871
# elif  defined(GHASH_ASM_PPC)
872
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
873
        gcm_init_p8(ctx->Htable, ctx->H.u);
874
        ctx->gmult = gcm_gmult_p8;
875
        ctx->ghash = gcm_ghash_p8;
876
    } else {
877
        gcm_init_4bit(ctx->Htable, ctx->H.u);
878
        ctx->gmult = gcm_gmult_4bit;
879
#  if defined(GHASH)
880
        ctx->ghash = gcm_ghash_4bit;
881
#  else
882
        ctx->ghash = NULL;
883
#  endif
884
    }
885
# else
886
    gcm_init_4bit(ctx->Htable, ctx->H.u);
887
# endif
888
0
#endif
889
0
}
890
891
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
892
                         size_t len)
893
0
{
894
0
    const union {
895
0
        long one;
896
0
        char little;
897
0
    } is_endian = {
898
0
        1
899
0
    };
900
0
    unsigned int ctr;
901
0
#ifdef GCM_FUNCREF_4BIT
902
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
903
0
#endif
904
905
0
    ctx->Yi.u[0] = 0;
906
0
    ctx->Yi.u[1] = 0;
907
0
    ctx->Xi.u[0] = 0;
908
0
    ctx->Xi.u[1] = 0;
909
0
    ctx->len.u[0] = 0;          /* AAD length */
910
0
    ctx->len.u[1] = 0;          /* message length */
911
0
    ctx->ares = 0;
912
0
    ctx->mres = 0;
913
914
0
    if (len == 12) {
915
0
        memcpy(ctx->Yi.c, iv, 12);
916
0
        ctx->Yi.c[15] = 1;
917
0
        ctr = 1;
918
0
    } else {
919
0
        size_t i;
920
0
        u64 len0 = len;
921
922
0
        while (len >= 16) {
923
0
            for (i = 0; i < 16; ++i)
924
0
                ctx->Yi.c[i] ^= iv[i];
925
0
            GCM_MUL(ctx, Yi);
926
0
            iv += 16;
927
0
            len -= 16;
928
0
        }
929
0
        if (len) {
930
0
            for (i = 0; i < len; ++i)
931
0
                ctx->Yi.c[i] ^= iv[i];
932
0
            GCM_MUL(ctx, Yi);
933
0
        }
934
0
        len0 <<= 3;
935
0
        if (is_endian.little) {
936
#ifdef BSWAP8
937
            ctx->Yi.u[1] ^= BSWAP8(len0);
938
#else
939
0
            ctx->Yi.c[8] ^= (u8)(len0 >> 56);
940
0
            ctx->Yi.c[9] ^= (u8)(len0 >> 48);
941
0
            ctx->Yi.c[10] ^= (u8)(len0 >> 40);
942
0
            ctx->Yi.c[11] ^= (u8)(len0 >> 32);
943
0
            ctx->Yi.c[12] ^= (u8)(len0 >> 24);
944
0
            ctx->Yi.c[13] ^= (u8)(len0 >> 16);
945
0
            ctx->Yi.c[14] ^= (u8)(len0 >> 8);
946
0
            ctx->Yi.c[15] ^= (u8)(len0);
947
0
#endif
948
0
        } else
949
0
            ctx->Yi.u[1] ^= len0;
950
951
0
        GCM_MUL(ctx, Yi);
952
953
0
        if (is_endian.little)
954
#ifdef BSWAP4
955
            ctr = BSWAP4(ctx->Yi.d[3]);
956
#else
957
0
            ctr = GETU32(ctx->Yi.c + 12);
958
0
#endif
959
0
        else
960
0
            ctr = ctx->Yi.d[3];
961
0
    }
962
963
0
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
964
0
    ++ctr;
965
0
    if (is_endian.little)
966
#ifdef BSWAP4
967
        ctx->Yi.d[3] = BSWAP4(ctr);
968
#else
969
0
        PUTU32(ctx->Yi.c + 12, ctr);
970
0
#endif
971
0
    else
972
0
        ctx->Yi.d[3] = ctr;
973
0
}
974
975
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
976
                      size_t len)
977
0
{
978
0
    size_t i;
979
0
    unsigned int n;
980
0
    u64 alen = ctx->len.u[0];
981
0
#ifdef GCM_FUNCREF_4BIT
982
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
983
0
# ifdef GHASH
984
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
985
0
                         const u8 *inp, size_t len) = ctx->ghash;
986
0
# endif
987
0
#endif
988
989
0
    if (ctx->len.u[1])
990
0
        return -2;
991
992
0
    alen += len;
993
0
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
994
0
        return -1;
995
0
    ctx->len.u[0] = alen;
996
997
0
    n = ctx->ares;
998
0
    if (n) {
999
0
        while (n && len) {
1000
0
            ctx->Xi.c[n] ^= *(aad++);
1001
0
            --len;
1002
0
            n = (n + 1) % 16;
1003
0
        }
1004
0
        if (n == 0)
1005
0
            GCM_MUL(ctx, Xi);
1006
0
        else {
1007
0
            ctx->ares = n;
1008
0
            return 0;
1009
0
        }
1010
0
    }
1011
0
#ifdef GHASH
1012
0
    if ((i = (len & (size_t)-16))) {
1013
0
        GHASH(ctx, aad, i);
1014
0
        aad += i;
1015
0
        len -= i;
1016
0
    }
1017
#else
1018
    while (len >= 16) {
1019
        for (i = 0; i < 16; ++i)
1020
            ctx->Xi.c[i] ^= aad[i];
1021
        GCM_MUL(ctx, Xi);
1022
        aad += 16;
1023
        len -= 16;
1024
    }
1025
#endif
1026
0
    if (len) {
1027
0
        n = (unsigned int)len;
1028
0
        for (i = 0; i < len; ++i)
1029
0
            ctx->Xi.c[i] ^= aad[i];
1030
0
    }
1031
1032
0
    ctx->ares = n;
1033
0
    return 0;
1034
0
}
1035
1036
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1037
                          const unsigned char *in, unsigned char *out,
1038
                          size_t len)
1039
0
{
1040
0
    const union {
1041
0
        long one;
1042
0
        char little;
1043
0
    } is_endian = {
1044
0
        1
1045
0
    };
1046
0
    unsigned int n, ctr;
1047
0
    size_t i;
1048
0
    u64 mlen = ctx->len.u[1];
1049
0
    block128_f block = ctx->block;
1050
0
    void *key = ctx->key;
1051
0
#ifdef GCM_FUNCREF_4BIT
1052
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1053
0
# ifdef GHASH
1054
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1055
0
                         const u8 *inp, size_t len) = ctx->ghash;
1056
0
# endif
1057
0
#endif
1058
1059
#if 0
1060
    n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1061
#endif
1062
0
    mlen += len;
1063
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1064
0
        return -1;
1065
0
    ctx->len.u[1] = mlen;
1066
1067
0
    if (ctx->ares) {
1068
        /* First call to encrypt finalizes GHASH(AAD) */
1069
0
        GCM_MUL(ctx, Xi);
1070
0
        ctx->ares = 0;
1071
0
    }
1072
1073
0
    if (is_endian.little)
1074
#ifdef BSWAP4
1075
        ctr = BSWAP4(ctx->Yi.d[3]);
1076
#else
1077
0
        ctr = GETU32(ctx->Yi.c + 12);
1078
0
#endif
1079
0
    else
1080
0
        ctr = ctx->Yi.d[3];
1081
1082
0
    n = ctx->mres;
1083
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1084
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1085
0
        do {
1086
0
            if (n) {
1087
0
                while (n && len) {
1088
0
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1089
0
                    --len;
1090
0
                    n = (n + 1) % 16;
1091
0
                }
1092
0
                if (n == 0)
1093
0
                    GCM_MUL(ctx, Xi);
1094
0
                else {
1095
0
                    ctx->mres = n;
1096
0
                    return 0;
1097
0
                }
1098
0
            }
1099
0
# if defined(STRICT_ALIGNMENT)
1100
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1101
0
                break;
1102
0
# endif
1103
0
# if defined(GHASH) && defined(GHASH_CHUNK)
1104
0
            while (len >= GHASH_CHUNK) {
1105
0
                size_t j = GHASH_CHUNK;
1106
1107
0
                while (j) {
1108
0
                    size_t *out_t = (size_t *)out;
1109
0
                    const size_t *in_t = (const size_t *)in;
1110
1111
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1112
0
                    ++ctr;
1113
0
                    if (is_endian.little)
1114
#  ifdef BSWAP4
1115
                        ctx->Yi.d[3] = BSWAP4(ctr);
1116
#  else
1117
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1118
0
#  endif
1119
0
                    else
1120
0
                        ctx->Yi.d[3] = ctr;
1121
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1122
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1123
0
                    out += 16;
1124
0
                    in += 16;
1125
0
                    j -= 16;
1126
0
                }
1127
0
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1128
0
                len -= GHASH_CHUNK;
1129
0
            }
1130
0
            if ((i = (len & (size_t)-16))) {
1131
0
                size_t j = i;
1132
1133
0
                while (len >= 16) {
1134
0
                    size_t *out_t = (size_t *)out;
1135
0
                    const size_t *in_t = (const size_t *)in;
1136
1137
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138
0
                    ++ctr;
1139
0
                    if (is_endian.little)
1140
#  ifdef BSWAP4
1141
                        ctx->Yi.d[3] = BSWAP4(ctr);
1142
#  else
1143
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1144
0
#  endif
1145
0
                    else
1146
0
                        ctx->Yi.d[3] = ctr;
1147
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1148
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1149
0
                    out += 16;
1150
0
                    in += 16;
1151
0
                    len -= 16;
1152
0
                }
1153
0
                GHASH(ctx, out - j, j);
1154
0
            }
1155
# else
1156
            while (len >= 16) {
1157
                size_t *out_t = (size_t *)out;
1158
                const size_t *in_t = (const size_t *)in;
1159
1160
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1161
                ++ctr;
1162
                if (is_endian.little)
1163
#  ifdef BSWAP4
1164
                    ctx->Yi.d[3] = BSWAP4(ctr);
1165
#  else
1166
                    PUTU32(ctx->Yi.c + 12, ctr);
1167
#  endif
1168
                else
1169
                    ctx->Yi.d[3] = ctr;
1170
                for (i = 0; i < 16 / sizeof(size_t); ++i)
1171
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1172
                GCM_MUL(ctx, Xi);
1173
                out += 16;
1174
                in += 16;
1175
                len -= 16;
1176
            }
1177
# endif
1178
0
            if (len) {
1179
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1180
0
                ++ctr;
1181
0
                if (is_endian.little)
1182
# ifdef BSWAP4
1183
                    ctx->Yi.d[3] = BSWAP4(ctr);
1184
# else
1185
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1186
0
# endif
1187
0
                else
1188
0
                    ctx->Yi.d[3] = ctr;
1189
0
                while (len--) {
1190
0
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1191
0
                    ++n;
1192
0
                }
1193
0
            }
1194
1195
0
            ctx->mres = n;
1196
0
            return 0;
1197
0
        } while (0);
1198
0
    }
1199
0
#endif
1200
0
    for (i = 0; i < len; ++i) {
1201
0
        if (n == 0) {
1202
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1203
0
            ++ctr;
1204
0
            if (is_endian.little)
1205
#ifdef BSWAP4
1206
                ctx->Yi.d[3] = BSWAP4(ctr);
1207
#else
1208
0
                PUTU32(ctx->Yi.c + 12, ctr);
1209
0
#endif
1210
0
            else
1211
0
                ctx->Yi.d[3] = ctr;
1212
0
        }
1213
0
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1214
0
        n = (n + 1) % 16;
1215
0
        if (n == 0)
1216
0
            GCM_MUL(ctx, Xi);
1217
0
    }
1218
1219
0
    ctx->mres = n;
1220
0
    return 0;
1221
0
}
1222
1223
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1224
                          const unsigned char *in, unsigned char *out,
1225
                          size_t len)
1226
0
{
1227
0
    const union {
1228
0
        long one;
1229
0
        char little;
1230
0
    } is_endian = {
1231
0
        1
1232
0
    };
1233
0
    unsigned int n, ctr;
1234
0
    size_t i;
1235
0
    u64 mlen = ctx->len.u[1];
1236
0
    block128_f block = ctx->block;
1237
0
    void *key = ctx->key;
1238
0
#ifdef GCM_FUNCREF_4BIT
1239
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1240
0
# ifdef GHASH
1241
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1242
0
                         const u8 *inp, size_t len) = ctx->ghash;
1243
0
# endif
1244
0
#endif
1245
1246
0
    mlen += len;
1247
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1248
0
        return -1;
1249
0
    ctx->len.u[1] = mlen;
1250
1251
0
    if (ctx->ares) {
1252
        /* First call to decrypt finalizes GHASH(AAD) */
1253
0
        GCM_MUL(ctx, Xi);
1254
0
        ctx->ares = 0;
1255
0
    }
1256
1257
0
    if (is_endian.little)
1258
#ifdef BSWAP4
1259
        ctr = BSWAP4(ctx->Yi.d[3]);
1260
#else
1261
0
        ctr = GETU32(ctx->Yi.c + 12);
1262
0
#endif
1263
0
    else
1264
0
        ctr = ctx->Yi.d[3];
1265
1266
0
    n = ctx->mres;
1267
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1268
0
    if (16 % sizeof(size_t) == 0) { /* always true actually */
1269
0
        do {
1270
0
            if (n) {
1271
0
                while (n && len) {
1272
0
                    u8 c = *(in++);
1273
0
                    *(out++) = c ^ ctx->EKi.c[n];
1274
0
                    ctx->Xi.c[n] ^= c;
1275
0
                    --len;
1276
0
                    n = (n + 1) % 16;
1277
0
                }
1278
0
                if (n == 0)
1279
0
                    GCM_MUL(ctx, Xi);
1280
0
                else {
1281
0
                    ctx->mres = n;
1282
0
                    return 0;
1283
0
                }
1284
0
            }
1285
0
# if defined(STRICT_ALIGNMENT)
1286
0
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287
0
                break;
1288
0
# endif
1289
0
# if defined(GHASH) && defined(GHASH_CHUNK)
1290
0
            while (len >= GHASH_CHUNK) {
1291
0
                size_t j = GHASH_CHUNK;
1292
1293
0
                GHASH(ctx, in, GHASH_CHUNK);
1294
0
                while (j) {
1295
0
                    size_t *out_t = (size_t *)out;
1296
0
                    const size_t *in_t = (const size_t *)in;
1297
1298
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1299
0
                    ++ctr;
1300
0
                    if (is_endian.little)
1301
#  ifdef BSWAP4
1302
                        ctx->Yi.d[3] = BSWAP4(ctr);
1303
#  else
1304
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1305
0
#  endif
1306
0
                    else
1307
0
                        ctx->Yi.d[3] = ctr;
1308
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1309
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1310
0
                    out += 16;
1311
0
                    in += 16;
1312
0
                    j -= 16;
1313
0
                }
1314
0
                len -= GHASH_CHUNK;
1315
0
            }
1316
0
            if ((i = (len & (size_t)-16))) {
1317
0
                GHASH(ctx, in, i);
1318
0
                while (len >= 16) {
1319
0
                    size_t *out_t = (size_t *)out;
1320
0
                    const size_t *in_t = (const size_t *)in;
1321
1322
0
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1323
0
                    ++ctr;
1324
0
                    if (is_endian.little)
1325
#  ifdef BSWAP4
1326
                        ctx->Yi.d[3] = BSWAP4(ctr);
1327
#  else
1328
0
                        PUTU32(ctx->Yi.c + 12, ctr);
1329
0
#  endif
1330
0
                    else
1331
0
                        ctx->Yi.d[3] = ctr;
1332
0
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1333
0
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1334
0
                    out += 16;
1335
0
                    in += 16;
1336
0
                    len -= 16;
1337
0
                }
1338
0
            }
1339
# else
1340
            while (len >= 16) {
1341
                size_t *out_t = (size_t *)out;
1342
                const size_t *in_t = (const size_t *)in;
1343
1344
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1345
                ++ctr;
1346
                if (is_endian.little)
1347
#  ifdef BSWAP4
1348
                    ctx->Yi.d[3] = BSWAP4(ctr);
1349
#  else
1350
                    PUTU32(ctx->Yi.c + 12, ctr);
1351
#  endif
1352
                else
1353
                    ctx->Yi.d[3] = ctr;
1354
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1355
                    size_t c = in[i];
1356
                    out[i] = c ^ ctx->EKi.t[i];
1357
                    ctx->Xi.t[i] ^= c;
1358
                }
1359
                GCM_MUL(ctx, Xi);
1360
                out += 16;
1361
                in += 16;
1362
                len -= 16;
1363
            }
1364
# endif
1365
0
            if (len) {
1366
0
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1367
0
                ++ctr;
1368
0
                if (is_endian.little)
1369
# ifdef BSWAP4
1370
                    ctx->Yi.d[3] = BSWAP4(ctr);
1371
# else
1372
0
                    PUTU32(ctx->Yi.c + 12, ctr);
1373
0
# endif
1374
0
                else
1375
0
                    ctx->Yi.d[3] = ctr;
1376
0
                while (len--) {
1377
0
                    u8 c = in[n];
1378
0
                    ctx->Xi.c[n] ^= c;
1379
0
                    out[n] = c ^ ctx->EKi.c[n];
1380
0
                    ++n;
1381
0
                }
1382
0
            }
1383
1384
0
            ctx->mres = n;
1385
0
            return 0;
1386
0
        } while (0);
1387
0
    }
1388
0
#endif
1389
0
    for (i = 0; i < len; ++i) {
1390
0
        u8 c;
1391
0
        if (n == 0) {
1392
0
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1393
0
            ++ctr;
1394
0
            if (is_endian.little)
1395
#ifdef BSWAP4
1396
                ctx->Yi.d[3] = BSWAP4(ctr);
1397
#else
1398
0
                PUTU32(ctx->Yi.c + 12, ctr);
1399
0
#endif
1400
0
            else
1401
0
                ctx->Yi.d[3] = ctr;
1402
0
        }
1403
0
        c = in[i];
1404
0
        out[i] = c ^ ctx->EKi.c[n];
1405
0
        ctx->Xi.c[n] ^= c;
1406
0
        n = (n + 1) % 16;
1407
0
        if (n == 0)
1408
0
            GCM_MUL(ctx, Xi);
1409
0
    }
1410
1411
0
    ctx->mres = n;
1412
0
    return 0;
1413
0
}
1414
1415
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1416
                                const unsigned char *in, unsigned char *out,
1417
                                size_t len, ctr128_f stream)
1418
0
{
1419
0
    const union {
1420
0
        long one;
1421
0
        char little;
1422
0
    } is_endian = {
1423
0
        1
1424
0
    };
1425
0
    unsigned int n, ctr;
1426
0
    size_t i;
1427
0
    u64 mlen = ctx->len.u[1];
1428
0
    void *key = ctx->key;
1429
0
#ifdef GCM_FUNCREF_4BIT
1430
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1431
0
# ifdef GHASH
1432
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1433
0
                         const u8 *inp, size_t len) = ctx->ghash;
1434
0
# endif
1435
0
#endif
1436
1437
0
    mlen += len;
1438
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1439
0
        return -1;
1440
0
    ctx->len.u[1] = mlen;
1441
1442
0
    if (ctx->ares) {
1443
        /* First call to encrypt finalizes GHASH(AAD) */
1444
0
        GCM_MUL(ctx, Xi);
1445
0
        ctx->ares = 0;
1446
0
    }
1447
1448
0
    if (is_endian.little)
1449
#ifdef BSWAP4
1450
        ctr = BSWAP4(ctx->Yi.d[3]);
1451
#else
1452
0
        ctr = GETU32(ctx->Yi.c + 12);
1453
0
#endif
1454
0
    else
1455
0
        ctr = ctx->Yi.d[3];
1456
1457
0
    n = ctx->mres;
1458
0
    if (n) {
1459
0
        while (n && len) {
1460
0
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1461
0
            --len;
1462
0
            n = (n + 1) % 16;
1463
0
        }
1464
0
        if (n == 0)
1465
0
            GCM_MUL(ctx, Xi);
1466
0
        else {
1467
0
            ctx->mres = n;
1468
0
            return 0;
1469
0
        }
1470
0
    }
1471
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1472
0
    while (len >= GHASH_CHUNK) {
1473
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1474
0
        ctr += GHASH_CHUNK / 16;
1475
0
        if (is_endian.little)
1476
# ifdef BSWAP4
1477
            ctx->Yi.d[3] = BSWAP4(ctr);
1478
# else
1479
0
            PUTU32(ctx->Yi.c + 12, ctr);
1480
0
# endif
1481
0
        else
1482
0
            ctx->Yi.d[3] = ctr;
1483
0
        GHASH(ctx, out, GHASH_CHUNK);
1484
0
        out += GHASH_CHUNK;
1485
0
        in += GHASH_CHUNK;
1486
0
        len -= GHASH_CHUNK;
1487
0
    }
1488
0
#endif
1489
0
    if ((i = (len & (size_t)-16))) {
1490
0
        size_t j = i / 16;
1491
1492
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1493
0
        ctr += (unsigned int)j;
1494
0
        if (is_endian.little)
1495
#ifdef BSWAP4
1496
            ctx->Yi.d[3] = BSWAP4(ctr);
1497
#else
1498
0
            PUTU32(ctx->Yi.c + 12, ctr);
1499
0
#endif
1500
0
        else
1501
0
            ctx->Yi.d[3] = ctr;
1502
0
        in += i;
1503
0
        len -= i;
1504
0
#if defined(GHASH)
1505
0
        GHASH(ctx, out, i);
1506
0
        out += i;
1507
#else
1508
        while (j--) {
1509
            for (i = 0; i < 16; ++i)
1510
                ctx->Xi.c[i] ^= out[i];
1511
            GCM_MUL(ctx, Xi);
1512
            out += 16;
1513
        }
1514
#endif
1515
0
    }
1516
0
    if (len) {
1517
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1518
0
        ++ctr;
1519
0
        if (is_endian.little)
1520
#ifdef BSWAP4
1521
            ctx->Yi.d[3] = BSWAP4(ctr);
1522
#else
1523
0
            PUTU32(ctx->Yi.c + 12, ctr);
1524
0
#endif
1525
0
        else
1526
0
            ctx->Yi.d[3] = ctr;
1527
0
        while (len--) {
1528
0
            ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1529
0
            ++n;
1530
0
        }
1531
0
    }
1532
1533
0
    ctx->mres = n;
1534
0
    return 0;
1535
0
}
1536
1537
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1538
                                const unsigned char *in, unsigned char *out,
1539
                                size_t len, ctr128_f stream)
1540
0
{
1541
0
    const union {
1542
0
        long one;
1543
0
        char little;
1544
0
    } is_endian = {
1545
0
        1
1546
0
    };
1547
0
    unsigned int n, ctr;
1548
0
    size_t i;
1549
0
    u64 mlen = ctx->len.u[1];
1550
0
    void *key = ctx->key;
1551
0
#ifdef GCM_FUNCREF_4BIT
1552
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1553
0
# ifdef GHASH
1554
0
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1555
0
                         const u8 *inp, size_t len) = ctx->ghash;
1556
0
# endif
1557
0
#endif
1558
1559
0
    mlen += len;
1560
0
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1561
0
        return -1;
1562
0
    ctx->len.u[1] = mlen;
1563
1564
0
    if (ctx->ares) {
1565
        /* First call to decrypt finalizes GHASH(AAD) */
1566
0
        GCM_MUL(ctx, Xi);
1567
0
        ctx->ares = 0;
1568
0
    }
1569
1570
0
    if (is_endian.little)
1571
#ifdef BSWAP4
1572
        ctr = BSWAP4(ctx->Yi.d[3]);
1573
#else
1574
0
        ctr = GETU32(ctx->Yi.c + 12);
1575
0
#endif
1576
0
    else
1577
0
        ctr = ctx->Yi.d[3];
1578
1579
0
    n = ctx->mres;
1580
0
    if (n) {
1581
0
        while (n && len) {
1582
0
            u8 c = *(in++);
1583
0
            *(out++) = c ^ ctx->EKi.c[n];
1584
0
            ctx->Xi.c[n] ^= c;
1585
0
            --len;
1586
0
            n = (n + 1) % 16;
1587
0
        }
1588
0
        if (n == 0)
1589
0
            GCM_MUL(ctx, Xi);
1590
0
        else {
1591
0
            ctx->mres = n;
1592
0
            return 0;
1593
0
        }
1594
0
    }
1595
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1596
0
    while (len >= GHASH_CHUNK) {
1597
0
        GHASH(ctx, in, GHASH_CHUNK);
1598
0
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1599
0
        ctr += GHASH_CHUNK / 16;
1600
0
        if (is_endian.little)
1601
# ifdef BSWAP4
1602
            ctx->Yi.d[3] = BSWAP4(ctr);
1603
# else
1604
0
            PUTU32(ctx->Yi.c + 12, ctr);
1605
0
# endif
1606
0
        else
1607
0
            ctx->Yi.d[3] = ctr;
1608
0
        out += GHASH_CHUNK;
1609
0
        in += GHASH_CHUNK;
1610
0
        len -= GHASH_CHUNK;
1611
0
    }
1612
0
#endif
1613
0
    if ((i = (len & (size_t)-16))) {
1614
0
        size_t j = i / 16;
1615
1616
0
#if defined(GHASH)
1617
0
        GHASH(ctx, in, i);
1618
#else
1619
        while (j--) {
1620
            size_t k;
1621
            for (k = 0; k < 16; ++k)
1622
                ctx->Xi.c[k] ^= in[k];
1623
            GCM_MUL(ctx, Xi);
1624
            in += 16;
1625
        }
1626
        j = i / 16;
1627
        in -= i;
1628
#endif
1629
0
        (*stream) (in, out, j, key, ctx->Yi.c);
1630
0
        ctr += (unsigned int)j;
1631
0
        if (is_endian.little)
1632
#ifdef BSWAP4
1633
            ctx->Yi.d[3] = BSWAP4(ctr);
1634
#else
1635
0
            PUTU32(ctx->Yi.c + 12, ctr);
1636
0
#endif
1637
0
        else
1638
0
            ctx->Yi.d[3] = ctr;
1639
0
        out += i;
1640
0
        in += i;
1641
0
        len -= i;
1642
0
    }
1643
0
    if (len) {
1644
0
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1645
0
        ++ctr;
1646
0
        if (is_endian.little)
1647
#ifdef BSWAP4
1648
            ctx->Yi.d[3] = BSWAP4(ctr);
1649
#else
1650
0
            PUTU32(ctx->Yi.c + 12, ctr);
1651
0
#endif
1652
0
        else
1653
0
            ctx->Yi.d[3] = ctr;
1654
0
        while (len--) {
1655
0
            u8 c = in[n];
1656
0
            ctx->Xi.c[n] ^= c;
1657
0
            out[n] = c ^ ctx->EKi.c[n];
1658
0
            ++n;
1659
0
        }
1660
0
    }
1661
1662
0
    ctx->mres = n;
1663
0
    return 0;
1664
0
}
1665
1666
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1667
                         size_t len)
1668
0
{
1669
0
    const union {
1670
0
        long one;
1671
0
        char little;
1672
0
    } is_endian = {
1673
0
        1
1674
0
    };
1675
0
    u64 alen = ctx->len.u[0] << 3;
1676
0
    u64 clen = ctx->len.u[1] << 3;
1677
0
#ifdef GCM_FUNCREF_4BIT
1678
0
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1679
0
#endif
1680
1681
0
    if (ctx->mres || ctx->ares)
1682
0
        GCM_MUL(ctx, Xi);
1683
1684
0
    if (is_endian.little) {
1685
#ifdef BSWAP8
1686
        alen = BSWAP8(alen);
1687
        clen = BSWAP8(clen);
1688
#else
1689
0
        u8 *p = ctx->len.c;
1690
1691
0
        ctx->len.u[0] = alen;
1692
0
        ctx->len.u[1] = clen;
1693
1694
0
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1695
0
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1696
0
#endif
1697
0
    }
1698
1699
0
    ctx->Xi.u[0] ^= alen;
1700
0
    ctx->Xi.u[1] ^= clen;
1701
0
    GCM_MUL(ctx, Xi);
1702
1703
0
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1704
0
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1705
1706
0
    if (tag && len <= sizeof(ctx->Xi))
1707
0
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1708
0
    else
1709
0
        return -1;
1710
0
}
1711
1712
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1713
0
{
1714
0
    CRYPTO_gcm128_finish(ctx, NULL, 0);
1715
0
    memcpy(tag, ctx->Xi.c,
1716
0
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1717
0
}
1718
1719
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1720
0
{
1721
0
    GCM128_CONTEXT *ret;
1722
1723
0
    if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1724
0
        CRYPTO_gcm128_init(ret, key, block);
1725
1726
0
    return ret;
1727
0
}
1728
1729
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1730
0
{
1731
0
    if (ctx) {
1732
0
        OPENSSL_cleanse(ctx, sizeof(*ctx));
1733
0
        OPENSSL_free(ctx);
1734
0
    }
1735
0
}
1736
1737
#if defined(SELFTEST)
1738
# include <stdio.h>
1739
# include <openssl/aes.h>
1740
1741
/* Test Case 1 */
1742
static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1743
static const u8 T1[] = {
1744
    0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1745
    0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1746
};
1747
1748
/* Test Case 2 */
1749
# define K2 K1
1750
# define A2 A1
1751
# define IV2 IV1
1752
static const u8 P2[16];
1753
static const u8 C2[] = {
1754
    0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1755
    0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1756
};
1757
1758
static const u8 T2[] = {
1759
    0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1760
    0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1761
};
1762
1763
/* Test Case 3 */
1764
# define A3 A2
1765
static const u8 K3[] = {
1766
    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1767
    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1768
};
1769
1770
static const u8 P3[] = {
1771
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1772
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1773
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1774
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1775
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1776
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1777
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1778
    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1779
};
1780
1781
static const u8 IV3[] = {
1782
    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1783
    0xde, 0xca, 0xf8, 0x88
1784
};
1785
1786
static const u8 C3[] = {
1787
    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1788
    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1789
    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1790
    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1791
    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1792
    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1793
    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1794
    0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1795
};
1796
1797
static const u8 T3[] = {
1798
    0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1799
    0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1800
};
1801
1802
/* Test Case 4 */
1803
# define K4 K3
1804
# define IV4 IV3
1805
static const u8 P4[] = {
1806
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1807
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1808
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1809
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1810
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1811
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1812
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1813
    0xba, 0x63, 0x7b, 0x39
1814
};
1815
1816
static const u8 A4[] = {
1817
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1818
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1819
    0xab, 0xad, 0xda, 0xd2
1820
};
1821
1822
static const u8 C4[] = {
1823
    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1824
    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1825
    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1826
    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1827
    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1828
    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1829
    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1830
    0x3d, 0x58, 0xe0, 0x91
1831
};
1832
1833
static const u8 T4[] = {
1834
    0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1835
    0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1836
};
1837
1838
/* Test Case 5 */
1839
# define K5 K4
1840
# define P5 P4
1841
# define A5 A4
1842
static const u8 IV5[] = {
1843
    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1844
};
1845
1846
static const u8 C5[] = {
1847
    0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1848
    0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1849
    0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1850
    0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1851
    0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1852
    0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1853
    0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1854
    0xc2, 0x3f, 0x45, 0x98
1855
};
1856
1857
static const u8 T5[] = {
1858
    0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1859
    0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1860
};
1861
1862
/* Test Case 6 */
1863
# define K6 K5
1864
# define P6 P5
1865
# define A6 A5
1866
static const u8 IV6[] = {
1867
    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1868
    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1869
    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1870
    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1871
    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1872
    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1873
    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1874
    0xa6, 0x37, 0xb3, 0x9b
1875
};
1876
1877
static const u8 C6[] = {
1878
    0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1879
    0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1880
    0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1881
    0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1882
    0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1883
    0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1884
    0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1885
    0x4c, 0x34, 0xae, 0xe5
1886
};
1887
1888
static const u8 T6[] = {
1889
    0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1890
    0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1891
};
1892
1893
/* Test Case 7 */
1894
static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1895
static const u8 T7[] = {
1896
    0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1897
    0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1898
};
1899
1900
/* Test Case 8 */
1901
# define K8 K7
1902
# define IV8 IV7
1903
# define A8 A7
1904
static const u8 P8[16];
1905
static const u8 C8[] = {
1906
    0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1907
    0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1908
};
1909
1910
static const u8 T8[] = {
1911
    0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1912
    0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1913
};
1914
1915
/* Test Case 9 */
1916
# define A9 A8
1917
static const u8 K9[] = {
1918
    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1919
    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1920
    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1921
};
1922
1923
static const u8 P9[] = {
1924
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1925
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1926
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1927
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1928
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1929
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1930
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1931
    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1932
};
1933
1934
static const u8 IV9[] = {
1935
    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1936
    0xde, 0xca, 0xf8, 0x88
1937
};
1938
1939
static const u8 C9[] = {
1940
    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1941
    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1942
    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1943
    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1944
    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1945
    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1946
    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1947
    0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1948
};
1949
1950
static const u8 T9[] = {
1951
    0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1952
    0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1953
};
1954
1955
/* Test Case 10 */
1956
# define K10 K9
1957
# define IV10 IV9
1958
static const u8 P10[] = {
1959
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1960
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1961
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1962
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1963
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1964
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1965
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1966
    0xba, 0x63, 0x7b, 0x39
1967
};
1968
1969
static const u8 A10[] = {
1970
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1971
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1972
    0xab, 0xad, 0xda, 0xd2
1973
};
1974
1975
static const u8 C10[] = {
1976
    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1977
    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1978
    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1979
    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1980
    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1981
    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1982
    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1983
    0xcc, 0xda, 0x27, 0x10
1984
};
1985
1986
static const u8 T10[] = {
1987
    0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1988
    0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1989
};
1990
1991
/* Test Case 11 */
1992
# define K11 K10
1993
# define P11 P10
1994
# define A11 A10
1995
static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1996
1997
static const u8 C11[] = {
1998
    0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1999
    0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
2000
    0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
2001
    0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
2002
    0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
2003
    0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
2004
    0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
2005
    0xa0, 0xf0, 0x62, 0xf7
2006
};
2007
2008
static const u8 T11[] = {
2009
    0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2010
    0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2011
};
2012
2013
/* Test Case 12 */
2014
# define K12 K11
2015
# define P12 P11
2016
# define A12 A11
2017
static const u8 IV12[] = {
2018
    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2019
    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2020
    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2021
    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2022
    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2023
    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2024
    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2025
    0xa6, 0x37, 0xb3, 0x9b
2026
};
2027
2028
static const u8 C12[] = {
2029
    0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2030
    0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2031
    0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2032
    0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2033
    0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2034
    0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2035
    0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2036
    0xe9, 0xb7, 0x37, 0x3b
2037
};
2038
2039
static const u8 T12[] = {
2040
    0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2041
    0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2042
};
2043
2044
/* Test Case 13 */
2045
static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2046
static const u8 T13[] = {
2047
    0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2048
    0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2049
};
2050
2051
/* Test Case 14 */
2052
# define K14 K13
2053
# define A14 A13
2054
static const u8 P14[16], IV14[12];
2055
static const u8 C14[] = {
2056
    0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2057
    0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2058
};
2059
2060
static const u8 T14[] = {
2061
    0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2062
    0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2063
};
2064
2065
/* Test Case 15 */
2066
# define A15 A14
2067
static const u8 K15[] = {
2068
    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2069
    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2070
    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2071
    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2072
};
2073
2074
static const u8 P15[] = {
2075
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2076
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2077
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2078
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2079
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2080
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2081
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2082
    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2083
};
2084
2085
static const u8 IV15[] = {
2086
    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2087
    0xde, 0xca, 0xf8, 0x88
2088
};
2089
2090
static const u8 C15[] = {
2091
    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2092
    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2093
    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2094
    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2095
    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2096
    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2097
    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2098
    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2099
};
2100
2101
static const u8 T15[] = {
2102
    0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2103
    0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2104
};
2105
2106
/* Test Case 16 */
2107
# define K16 K15
2108
# define IV16 IV15
2109
static const u8 P16[] = {
2110
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2111
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2112
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2113
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2114
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2115
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2116
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2117
    0xba, 0x63, 0x7b, 0x39
2118
};
2119
2120
static const u8 A16[] = {
2121
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2122
    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2123
    0xab, 0xad, 0xda, 0xd2
2124
};
2125
2126
static const u8 C16[] = {
2127
    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2128
    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2129
    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2130
    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2131
    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2132
    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2133
    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2134
    0xbc, 0xc9, 0xf6, 0x62
2135
};
2136
2137
static const u8 T16[] = {
2138
    0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2139
    0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2140
};
2141
2142
/* Test Case 17 */
2143
# define K17 K16
2144
# define P17 P16
2145
# define A17 A16
2146
static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2147
2148
static const u8 C17[] = {
2149
    0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2150
    0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2151
    0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2152
    0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2153
    0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2154
    0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2155
    0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2156
    0xf4, 0x7c, 0x9b, 0x1f
2157
};
2158
2159
static const u8 T17[] = {
2160
    0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2161
    0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2162
};
2163
2164
/* Test Case 18 */
2165
# define K18 K17
2166
# define P18 P17
2167
# define A18 A17
2168
static const u8 IV18[] = {
2169
    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2170
    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2171
    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2172
    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2173
    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2174
    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2175
    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2176
    0xa6, 0x37, 0xb3, 0x9b
2177
};
2178
2179
static const u8 C18[] = {
2180
    0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2181
    0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2182
    0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2183
    0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2184
    0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2185
    0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2186
    0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2187
    0x44, 0xae, 0x7e, 0x3f
2188
};
2189
2190
static const u8 T18[] = {
2191
    0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2192
    0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2193
};
2194
2195
/* Test Case 19 */
2196
# define K19 K1
2197
# define P19 P1
2198
# define IV19 IV1
2199
# define C19 C1
2200
static const u8 A19[] = {
2201
    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2202
    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2203
    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2204
    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2205
    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2206
    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2207
    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2208
    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2209
    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2210
    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2211
    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2212
    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2213
    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2214
    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2215
    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2216
    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2217
};
2218
2219
static const u8 T19[] = {
2220
    0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2221
    0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2222
};
2223
2224
/* Test Case 20 */
2225
# define K20 K1
2226
# define A20 A1
2227
/* this results in 0xff in counter LSB */
2228
static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2229
2230
static const u8 P20[288];
2231
static const u8 C20[] = {
2232
    0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2233
    0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2234
    0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2235
    0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2236
    0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2237
    0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2238
    0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2239
    0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2240
    0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2241
    0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2242
    0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2243
    0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2244
    0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2245
    0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2246
    0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2247
    0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2248
    0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2249
    0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2250
    0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2251
    0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2252
    0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2253
    0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2254
    0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2255
    0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2256
    0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2257
    0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2258
    0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2259
    0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2260
    0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2261
    0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2262
    0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2263
    0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2264
    0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2265
    0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2266
    0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2267
    0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2268
};
2269
2270
static const u8 T20[] = {
2271
    0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2272
    0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2273
};
2274
2275
# define TEST_CASE(n)    do {                                    \
2276
        u8 out[sizeof(P##n)];                                   \
2277
        AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
2278
        CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
2279
        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2280
        memset(out,0,sizeof(out));                              \
2281
        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2282
        if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
2283
        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2284
            (C##n && memcmp(out,C##n,sizeof(out))))             \
2285
                ret++, printf ("encrypt test#%d failed.\n",n);  \
2286
        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2287
        memset(out,0,sizeof(out));                              \
2288
        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2289
        if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
2290
        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2291
            (P##n && memcmp(out,P##n,sizeof(out))))             \
2292
                ret++, printf ("decrypt test#%d failed.\n",n);  \
2293
        } while(0)
2294
2295
int main()
2296
{
2297
    GCM128_CONTEXT ctx;
2298
    AES_KEY key;
2299
    int ret = 0;
2300
2301
    TEST_CASE(1);
2302
    TEST_CASE(2);
2303
    TEST_CASE(3);
2304
    TEST_CASE(4);
2305
    TEST_CASE(5);
2306
    TEST_CASE(6);
2307
    TEST_CASE(7);
2308
    TEST_CASE(8);
2309
    TEST_CASE(9);
2310
    TEST_CASE(10);
2311
    TEST_CASE(11);
2312
    TEST_CASE(12);
2313
    TEST_CASE(13);
2314
    TEST_CASE(14);
2315
    TEST_CASE(15);
2316
    TEST_CASE(16);
2317
    TEST_CASE(17);
2318
    TEST_CASE(18);
2319
    TEST_CASE(19);
2320
    TEST_CASE(20);
2321
2322
# ifdef OPENSSL_CPUID_OBJ
2323
    {
2324
        size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2325
        union {
2326
            u64 u;
2327
            u8 c[1024];
2328
        } buf;
2329
        int i;
2330
2331
        AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2332
        CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2333
        CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2334
2335
        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2336
        start = OPENSSL_rdtsc();
2337
        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2338
        gcm_t = OPENSSL_rdtsc() - start;
2339
2340
        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2341
                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2342
                              (block128_f) AES_encrypt);
2343
        start = OPENSSL_rdtsc();
2344
        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2345
                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2346
                              (block128_f) AES_encrypt);
2347
        ctr_t = OPENSSL_rdtsc() - start;
2348
2349
        printf("%.2f-%.2f=%.2f\n",
2350
               gcm_t / (double)sizeof(buf),
2351
               ctr_t / (double)sizeof(buf),
2352
               (gcm_t - ctr_t) / (double)sizeof(buf));
2353
#  ifdef GHASH
2354
        {
2355
            void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2356
                                 const u8 *inp, size_t len) = ctx.ghash;
2357
2358
            GHASH((&ctx), buf.c, sizeof(buf));
2359
            start = OPENSSL_rdtsc();
2360
            for (i = 0; i < 100; ++i)
2361
                GHASH((&ctx), buf.c, sizeof(buf));
2362
            gcm_t = OPENSSL_rdtsc() - start;
2363
            printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2364
        }
2365
#  endif
2366
    }
2367
# endif
2368
2369
    return ret;
2370
}
2371
#endif