Coverage Report

Created: 2022-08-24 06:30

/src/libressl/crypto/modes/gcm128.c
Line
Count
Source (jump to first uncovered line)
1
/* $OpenBSD: gcm128.c,v 1.22 2018/01/24 23:03:37 kettenis Exp $ */
2
/* ====================================================================
3
 * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright
10
 *    notice, this list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in
14
 *    the documentation and/or other materials provided with the
15
 *    distribution.
16
 *
17
 * 3. All advertising materials mentioning features or use of this
18
 *    software must display the following acknowledgment:
19
 *    "This product includes software developed by the OpenSSL Project
20
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21
 *
22
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23
 *    endorse or promote products derived from this software without
24
 *    prior written permission. For written permission, please contact
25
 *    openssl-core@openssl.org.
26
 *
27
 * 5. Products derived from this software may not be called "OpenSSL"
28
 *    nor may "OpenSSL" appear in their names without prior written
29
 *    permission of the OpenSSL Project.
30
 *
31
 * 6. Redistributions of any form whatsoever must retain the following
32
 *    acknowledgment:
33
 *    "This product includes software developed by the OpenSSL Project
34
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35
 *
36
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
40
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47
 * OF THE POSSIBILITY OF SUCH DAMAGE.
48
 * ====================================================================
49
 */
50
51
#define OPENSSL_FIPSAPI
52
53
#include <openssl/crypto.h>
54
#include "modes_lcl.h"
55
#include <string.h>
56
57
#ifndef MODES_DEBUG
58
# ifndef NDEBUG
59
#  define NDEBUG
60
# endif
61
#endif
62
63
#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
64
/* redefine, because alignment is ensured */
65
#undef  GETU32
66
#define GETU32(p) BSWAP4(*(const u32 *)(p))
67
#undef  PUTU32
68
#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69
#endif
70
71
#define PACK(s)   ((size_t)(s)<<(sizeof(size_t)*8-16))
72
#define REDUCE1BIT(V) \
73
0
  do { \
74
0
    if (sizeof(size_t)==8) { \
75
0
      u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
76
0
      V.lo  = (V.hi<<63)|(V.lo>>1); \
77
0
      V.hi  = (V.hi>>1 )^T; \
78
0
    } else { \
79
0
      u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80
0
      V.lo  = (V.hi<<63)|(V.lo>>1); \
81
0
      V.hi  = (V.hi>>1 )^((u64)T<<32); \
82
0
    } \
83
0
  } while(0)
84
85
/*
86
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87
 * never be set to 8. 8 is effectively reserved for testing purposes.
88
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90
 * whole spectrum of possible table driven implementations. Why? In
91
 * non-"Shoup's" case memory access pattern is segmented in such manner,
92
 * that it's trivial to see that cache timing information can reveal
93
 * fair portion of intermediate hash value. Given that ciphertext is
94
 * always available to attacker, it's possible for him to attempt to
95
 * deduce secret parameter H and if successful, tamper with messages
96
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97
 * not as trivial, but there is no reason to believe that it's resistant
98
 * to cache-timing attack. And the thing about "8-bit" implementation is
99
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100
 * key + 1KB shared. Well, on pros side it should be twice as fast as
101
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102
 * was observed to run ~75% faster, closer to 100% for commercial
103
 * compilers... Yet "4-bit" procedure is preferred, because it's
104
 * believed to provide better security-performance balance and adequate
105
 * all-round performance. "All-round" refers to things like:
106
 *
107
 * - shorter setup time effectively improves overall timing for
108
 *   handling short messages;
109
 * - larger table allocation can become unbearable because of VM
110
 *   subsystem penalties (for example on Windows large enough free
111
 *   results in VM working set trimming, meaning that consequent
112
 *   malloc would immediately incur working set expansion);
113
 * - larger table has larger cache footprint, which can affect
114
 *   performance of other code paths (not necessarily even from same
115
 *   thread in Hyper-Threading world);
116
 *
117
 * Value of 1 is not appropriate for performance reasons.
118
 */
119
#if TABLE_BITS==8
120
121
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122
{
123
  int  i, j;
124
  u128 V;
125
126
  Htable[0].hi = 0;
127
  Htable[0].lo = 0;
128
  V.hi = H[0];
129
  V.lo = H[1];
130
131
  for (Htable[128]=V, i=64; i>0; i>>=1) {
132
    REDUCE1BIT(V);
133
    Htable[i] = V;
134
  }
135
136
  for (i=2; i<256; i<<=1) {
137
    u128 *Hi = Htable+i, H0 = *Hi;
138
    for (j=1; j<i; ++j) {
139
      Hi[j].hi = H0.hi^Htable[j].hi;
140
      Hi[j].lo = H0.lo^Htable[j].lo;
141
    }
142
  }
143
}
144
145
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146
{
147
  u128 Z = { 0, 0};
148
  const u8 *xi = (const u8 *)Xi+15;
149
  size_t rem, n = *xi;
150
  static const size_t rem_8bit[256] = {
151
    PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
152
    PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
153
    PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
154
    PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
155
    PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
156
    PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
157
    PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
158
    PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
159
    PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
160
    PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
161
    PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
162
    PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
163
    PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
164
    PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
165
    PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
166
    PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
167
    PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
168
    PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
169
    PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
170
    PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
171
    PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
172
    PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
173
    PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
174
    PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
175
    PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
176
    PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
177
    PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
178
    PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
179
    PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
180
    PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
181
    PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
182
    PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
183
    PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
184
    PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
185
    PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
186
    PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
187
    PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
188
    PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
189
    PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
190
    PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
191
    PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
192
    PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
193
    PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
194
    PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
195
    PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
196
    PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
197
    PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
198
    PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
199
    PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
200
    PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
201
    PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
202
    PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
203
    PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
204
    PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
205
    PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
206
    PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
207
    PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
208
    PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
209
    PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
210
    PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
211
    PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
212
    PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
213
    PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
214
    PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
215
216
  while (1) {
217
    Z.hi ^= Htable[n].hi;
218
    Z.lo ^= Htable[n].lo;
219
220
    if ((u8 *)Xi==xi) break;
221
222
    n = *(--xi);
223
224
    rem  = (size_t)Z.lo&0xff;
225
    Z.lo = (Z.hi<<56)|(Z.lo>>8);
226
    Z.hi = (Z.hi>>8);
227
#if SIZE_MAX == 0xffffffffffffffff
228
    Z.hi ^= rem_8bit[rem];
229
#else
230
    Z.hi ^= (u64)rem_8bit[rem]<<32;
231
#endif
232
  }
233
234
#if BYTE_ORDER == LITTLE_ENDIAN
235
#ifdef BSWAP8
236
  Xi[0] = BSWAP8(Z.hi);
237
  Xi[1] = BSWAP8(Z.lo);
238
#else
239
  u8 *p = (u8 *)Xi;
240
  u32 v;
241
  v = (u32)(Z.hi>>32);  PUTU32(p,v);
242
  v = (u32)(Z.hi);  PUTU32(p+4,v);
243
  v = (u32)(Z.lo>>32);  PUTU32(p+8,v);
244
  v = (u32)(Z.lo);  PUTU32(p+12,v);
245
#endif
246
#else /* BIG_ENDIAN */
247
  Xi[0] = Z.hi;
248
  Xi[1] = Z.lo;
249
#endif
250
}
251
#define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
252
253
#elif TABLE_BITS==4
254
255
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
256
0
{
257
0
  u128 V;
258
#if defined(OPENSSL_SMALL_FOOTPRINT)
259
  int  i;
260
#endif
261
262
0
  Htable[0].hi = 0;
263
0
  Htable[0].lo = 0;
264
0
  V.hi = H[0];
265
0
  V.lo = H[1];
266
267
#if defined(OPENSSL_SMALL_FOOTPRINT)
268
  for (Htable[8]=V, i=4; i>0; i>>=1) {
269
    REDUCE1BIT(V);
270
    Htable[i] = V;
271
  }
272
273
  for (i=2; i<16; i<<=1) {
274
    u128 *Hi = Htable+i;
275
    int   j;
276
    for (V=*Hi, j=1; j<i; ++j) {
277
      Hi[j].hi = V.hi^Htable[j].hi;
278
      Hi[j].lo = V.lo^Htable[j].lo;
279
    }
280
  }
281
#else
282
0
  Htable[8] = V;
283
0
  REDUCE1BIT(V);
284
0
  Htable[4] = V;
285
0
  REDUCE1BIT(V);
286
0
  Htable[2] = V;
287
0
  REDUCE1BIT(V);
288
0
  Htable[1] = V;
289
0
  Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
290
0
  V=Htable[4];
291
0
  Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
292
0
  Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
293
0
  Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
294
0
  V=Htable[8];
295
0
  Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
296
0
  Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
297
0
  Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
298
0
  Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
299
0
  Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
300
0
  Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
301
0
  Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
302
0
#endif
303
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
304
  /*
305
   * ARM assembler expects specific dword order in Htable.
306
   */
307
  {
308
    int j;
309
#if BYTE_ORDER == LITTLE_ENDIAN
310
    for (j=0;j<16;++j) {
311
      V = Htable[j];
312
      Htable[j].hi = V.lo;
313
      Htable[j].lo = V.hi;
314
    }
315
#else /* BIG_ENDIAN */
316
    for (j=0;j<16;++j) {
317
      V = Htable[j];
318
      Htable[j].hi = V.lo<<32|V.lo>>32;
319
      Htable[j].lo = V.hi<<32|V.hi>>32;
320
    }
321
#endif
322
  }
323
#endif
324
0
}
325
326
#ifndef GHASH_ASM
327
static const size_t rem_4bit[16] = {
328
  PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
329
  PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
330
  PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
331
  PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
332
333
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
334
{
335
  u128 Z;
336
  int cnt = 15;
337
  size_t rem, nlo, nhi;
338
339
  nlo  = ((const u8 *)Xi)[15];
340
  nhi  = nlo>>4;
341
  nlo &= 0xf;
342
343
  Z.hi = Htable[nlo].hi;
344
  Z.lo = Htable[nlo].lo;
345
346
  while (1) {
347
    rem  = (size_t)Z.lo&0xf;
348
    Z.lo = (Z.hi<<60)|(Z.lo>>4);
349
    Z.hi = (Z.hi>>4);
350
#if SIZE_MAX == 0xffffffffffffffff
351
    Z.hi ^= rem_4bit[rem];
352
#else
353
    Z.hi ^= (u64)rem_4bit[rem]<<32;
354
#endif
355
    Z.hi ^= Htable[nhi].hi;
356
    Z.lo ^= Htable[nhi].lo;
357
358
    if (--cnt<0)    break;
359
360
    nlo  = ((const u8 *)Xi)[cnt];
361
    nhi  = nlo>>4;
362
    nlo &= 0xf;
363
364
    rem  = (size_t)Z.lo&0xf;
365
    Z.lo = (Z.hi<<60)|(Z.lo>>4);
366
    Z.hi = (Z.hi>>4);
367
#if SIZE_MAX == 0xffffffffffffffff
368
    Z.hi ^= rem_4bit[rem];
369
#else
370
    Z.hi ^= (u64)rem_4bit[rem]<<32;
371
#endif
372
    Z.hi ^= Htable[nlo].hi;
373
    Z.lo ^= Htable[nlo].lo;
374
  }
375
376
#if BYTE_ORDER == LITTLE_ENDIAN
377
#ifdef BSWAP8
378
  Xi[0] = BSWAP8(Z.hi);
379
  Xi[1] = BSWAP8(Z.lo);
380
#else
381
  u8 *p = (u8 *)Xi;
382
  u32 v;
383
  v = (u32)(Z.hi>>32);  PUTU32(p,v);
384
  v = (u32)(Z.hi);  PUTU32(p+4,v);
385
  v = (u32)(Z.lo>>32);  PUTU32(p+8,v);
386
  v = (u32)(Z.lo);  PUTU32(p+12,v);
387
#endif
388
#else /* BIG_ENDIAN */
389
  Xi[0] = Z.hi;
390
  Xi[1] = Z.lo;
391
#endif
392
}
393
394
#if !defined(OPENSSL_SMALL_FOOTPRINT)
395
/*
396
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
397
 * details... Compiler-generated code doesn't seem to give any
398
 * performance improvement, at least not on x86[_64]. It's here
399
 * mostly as reference and a placeholder for possible future
400
 * non-trivial optimization[s]...
401
 */
402
static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
403
        const u8 *inp,size_t len)
404
{
405
    u128 Z;
406
    int cnt;
407
    size_t rem, nlo, nhi;
408
409
#if 1
410
    do {
411
  cnt  = 15;
412
  nlo  = ((const u8 *)Xi)[15];
413
  nlo ^= inp[15];
414
  nhi  = nlo>>4;
415
  nlo &= 0xf;
416
417
  Z.hi = Htable[nlo].hi;
418
  Z.lo = Htable[nlo].lo;
419
420
  while (1) {
421
    rem  = (size_t)Z.lo&0xf;
422
    Z.lo = (Z.hi<<60)|(Z.lo>>4);
423
    Z.hi = (Z.hi>>4);
424
#if SIZE_MAX == 0xffffffffffffffff
425
    Z.hi ^= rem_4bit[rem];
426
#else
427
    Z.hi ^= (u64)rem_4bit[rem]<<32;
428
#endif
429
    Z.hi ^= Htable[nhi].hi;
430
    Z.lo ^= Htable[nhi].lo;
431
432
    if (--cnt<0)    break;
433
434
    nlo  = ((const u8 *)Xi)[cnt];
435
    nlo ^= inp[cnt];
436
    nhi  = nlo>>4;
437
    nlo &= 0xf;
438
439
    rem  = (size_t)Z.lo&0xf;
440
    Z.lo = (Z.hi<<60)|(Z.lo>>4);
441
    Z.hi = (Z.hi>>4);
442
#if SIZE_MAX == 0xffffffffffffffff
443
    Z.hi ^= rem_4bit[rem];
444
#else
445
    Z.hi ^= (u64)rem_4bit[rem]<<32;
446
#endif
447
    Z.hi ^= Htable[nlo].hi;
448
    Z.lo ^= Htable[nlo].lo;
449
  }
450
#else
451
    /*
452
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
453
     * [should] give ~50% improvement... One could have PACK()-ed
454
     * the rem_8bit even here, but the priority is to minimize
455
     * cache footprint...
456
     */
457
    u128 Hshr4[16]; /* Htable shifted right by 4 bits */
458
    u8   Hshl4[16]; /* Htable shifted left  by 4 bits */
459
    static const unsigned short rem_8bit[256] = {
460
  0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
461
  0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
462
  0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
463
  0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
464
  0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
465
  0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
466
  0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
467
  0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
468
  0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
469
  0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
470
  0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
471
  0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
472
  0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
473
  0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
474
  0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
475
  0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
476
  0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
477
  0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
478
  0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
479
  0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
480
  0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
481
  0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
482
  0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
483
  0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
484
  0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
485
  0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
486
  0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
487
  0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
488
  0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
489
  0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
490
  0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
491
  0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
492
    /*
493
     * This pre-processing phase slows down procedure by approximately
494
     * same time as it makes each loop spin faster. In other words
495
     * single block performance is approximately same as straightforward
496
     * "4-bit" implementation, and then it goes only faster...
497
     */
498
    for (cnt=0; cnt<16; ++cnt) {
499
  Z.hi = Htable[cnt].hi;
500
  Z.lo = Htable[cnt].lo;
501
  Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
502
  Hshr4[cnt].hi = (Z.hi>>4);
503
  Hshl4[cnt]    = (u8)(Z.lo<<4);
504
    }
505
506
    do {
507
  for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
508
    nlo  = ((const u8 *)Xi)[cnt];
509
    nlo ^= inp[cnt];
510
    nhi  = nlo>>4;
511
    nlo &= 0xf;
512
513
    Z.hi ^= Htable[nlo].hi;
514
    Z.lo ^= Htable[nlo].lo;
515
516
    rem = (size_t)Z.lo&0xff;
517
518
    Z.lo = (Z.hi<<56)|(Z.lo>>8);
519
    Z.hi = (Z.hi>>8);
520
521
    Z.hi ^= Hshr4[nhi].hi;
522
    Z.lo ^= Hshr4[nhi].lo;
523
    Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
524
  }
525
526
  nlo  = ((const u8 *)Xi)[0];
527
  nlo ^= inp[0];
528
  nhi  = nlo>>4;
529
  nlo &= 0xf;
530
531
  Z.hi ^= Htable[nlo].hi;
532
  Z.lo ^= Htable[nlo].lo;
533
534
  rem = (size_t)Z.lo&0xf;
535
536
  Z.lo = (Z.hi<<60)|(Z.lo>>4);
537
  Z.hi = (Z.hi>>4);
538
539
  Z.hi ^= Htable[nhi].hi;
540
  Z.lo ^= Htable[nhi].lo;
541
  Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
542
#endif
543
544
#if BYTE_ORDER == LITTLE_ENDIAN
545
#ifdef BSWAP8
546
  Xi[0] = BSWAP8(Z.hi);
547
  Xi[1] = BSWAP8(Z.lo);
548
#else
549
  u8 *p = (u8 *)Xi;
550
  u32 v;
551
  v = (u32)(Z.hi>>32);  PUTU32(p,v);
552
  v = (u32)(Z.hi);  PUTU32(p+4,v);
553
  v = (u32)(Z.lo>>32);  PUTU32(p+8,v);
554
  v = (u32)(Z.lo);  PUTU32(p+12,v);
555
#endif
556
#else /* BIG_ENDIAN */
557
  Xi[0] = Z.hi;
558
  Xi[1] = Z.lo;
559
#endif
560
    } while (inp+=16, len-=16);
561
}
562
#endif
563
#else
564
void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
565
void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
566
#endif
567
568
#define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
569
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
570
#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
571
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
572
 * trashing effect. In other words idea is to hash data while it's
573
 * still in L1 cache after encryption pass... */
574
0
#define GHASH_CHUNK       (3*1024)
575
#endif
576
577
#else /* TABLE_BITS */
578
579
static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
580
{
581
  u128 V,Z = { 0,0 };
582
  long X;
583
  int  i,j;
584
  const long *xi = (const long *)Xi;
585
586
  V.hi = H[0];  /* H is in host byte order, no byte swapping */
587
  V.lo = H[1];
588
589
  for (j=0; j<16/sizeof(long); ++j) {
590
#if BYTE_ORDER == LITTLE_ENDIAN
591
#if SIZE_MAX == 0xffffffffffffffff
592
#ifdef BSWAP8
593
      X = (long)(BSWAP8(xi[j]));
594
#else
595
      const u8 *p = (const u8 *)(xi+j);
596
      X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
597
#endif
598
#else
599
      const u8 *p = (const u8 *)(xi+j);
600
      X = (long)GETU32(p);
601
#endif
602
#else /* BIG_ENDIAN */
603
    X = xi[j];
604
#endif
605
606
    for (i=0; i<8*sizeof(long); ++i, X<<=1) {
607
      u64 M = (u64)(X>>(8*sizeof(long)-1));
608
      Z.hi ^= V.hi&M;
609
      Z.lo ^= V.lo&M;
610
611
      REDUCE1BIT(V);
612
    }
613
  }
614
615
#if BYTE_ORDER == LITTLE_ENDIAN
616
#ifdef BSWAP8
617
  Xi[0] = BSWAP8(Z.hi);
618
  Xi[1] = BSWAP8(Z.lo);
619
#else
620
  u8 *p = (u8 *)Xi;
621
  u32 v;
622
  v = (u32)(Z.hi>>32);  PUTU32(p,v);
623
  v = (u32)(Z.hi);  PUTU32(p+4,v);
624
  v = (u32)(Z.lo>>32);  PUTU32(p+8,v);
625
  v = (u32)(Z.lo);  PUTU32(p+12,v);
626
#endif
627
#else /* BIG_ENDIAN */
628
  Xi[0] = Z.hi;
629
  Xi[1] = Z.lo;
630
#endif
631
}
632
#define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
633
634
#endif
635
636
#if defined(GHASH_ASM) && \
637
  (defined(__i386)  || defined(__i386__)  || \
638
   defined(__x86_64)  || defined(__x86_64__)  || \
639
   defined(_M_IX86) || defined(_M_AMD64)  || defined(_M_X64))
640
#include "x86_arch.h"
641
#endif
642
643
#if TABLE_BITS==4 && defined(GHASH_ASM)
644
# if  (defined(__i386)  || defined(__i386__)  || \
645
   defined(__x86_64)  || defined(__x86_64__)  || \
646
   defined(_M_IX86) || defined(_M_AMD64)  || defined(_M_X64))
647
#  define GHASH_ASM_X86_OR_64
648
#  define GCM_FUNCREF_4BIT
649
650
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
651
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
652
void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
653
654
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
655
#   define GHASH_ASM_X86
656
void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
657
void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658
659
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
660
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661
#  endif
662
# elif defined(__arm__) || defined(__arm)
663
#  include "arm_arch.h"
664
#  if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
665
#   define GHASH_ASM_ARM
666
#   define GCM_FUNCREF_4BIT
667
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
668
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669
#  endif
670
# endif
671
#endif
672
673
#ifdef GCM_FUNCREF_4BIT
674
# undef  GCM_MUL
675
0
# define GCM_MUL(ctx,Xi)  (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
676
# ifdef GHASH
677
#  undef  GHASH
678
0
#  define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
679
# endif
680
#endif
681
682
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
683
0
{
684
0
  memset(ctx,0,sizeof(*ctx));
685
0
  ctx->block = block;
686
0
  ctx->key   = key;
687
688
0
  (*block)(ctx->H.c,ctx->H.c,key);
689
690
0
#if BYTE_ORDER == LITTLE_ENDIAN
691
  /* H is stored in host byte order */
692
0
#ifdef BSWAP8
693
0
  ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
694
0
  ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
695
#else
696
  u8 *p = ctx->H.c;
697
  u64 hi,lo;
698
  hi = (u64)GETU32(p)  <<32|GETU32(p+4);
699
  lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
700
  ctx->H.u[0] = hi;
701
  ctx->H.u[1] = lo;
702
#endif
703
0
#endif
704
705
#if TABLE_BITS==8
706
  gcm_init_8bit(ctx->Htable,ctx->H.u);
707
#elif  TABLE_BITS==4
708
0
# if  defined(GHASH_ASM_X86_OR_64)
709
0
#  if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
710
  /* check FXSR and PCLMULQDQ bits */
711
0
  if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
712
0
      (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
713
0
    gcm_init_clmul(ctx->Htable,ctx->H.u);
714
0
    ctx->gmult = gcm_gmult_clmul;
715
0
    ctx->ghash = gcm_ghash_clmul;
716
0
    return;
717
0
  }
718
0
#  endif
719
0
  gcm_init_4bit(ctx->Htable,ctx->H.u);
720
#  if defined(GHASH_ASM_X86)      /* x86 only */
721
#   if  defined(OPENSSL_IA32_SSE2)
722
  if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */
723
#   else
724
  if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */
725
#   endif
726
    ctx->gmult = gcm_gmult_4bit_mmx;
727
    ctx->ghash = gcm_ghash_4bit_mmx;
728
  } else {
729
    ctx->gmult = gcm_gmult_4bit_x86;
730
    ctx->ghash = gcm_ghash_4bit_x86;
731
  }
732
#  else
733
0
  ctx->gmult = gcm_gmult_4bit;
734
0
  ctx->ghash = gcm_ghash_4bit;
735
0
#  endif
736
# elif  defined(GHASH_ASM_ARM)
737
  if (OPENSSL_armcap_P & ARMV7_NEON) {
738
    ctx->gmult = gcm_gmult_neon;
739
    ctx->ghash = gcm_ghash_neon;
740
  } else {
741
    gcm_init_4bit(ctx->Htable,ctx->H.u);
742
    ctx->gmult = gcm_gmult_4bit;
743
    ctx->ghash = gcm_ghash_4bit;
744
  }
745
# else
746
  gcm_init_4bit(ctx->Htable,ctx->H.u);
747
# endif
748
0
#endif
749
0
}
750
751
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
752
0
{
753
0
  unsigned int ctr;
754
0
#ifdef GCM_FUNCREF_4BIT
755
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
756
0
#endif
757
758
0
  ctx->Yi.u[0]  = 0;
759
0
  ctx->Yi.u[1]  = 0;
760
0
  ctx->Xi.u[0]  = 0;
761
0
  ctx->Xi.u[1]  = 0;
762
0
  ctx->len.u[0] = 0;  /* AAD length */
763
0
  ctx->len.u[1] = 0;  /* message length */
764
0
  ctx->ares = 0;
765
0
  ctx->mres = 0;
766
767
0
  if (len==12) {
768
0
    memcpy(ctx->Yi.c,iv,12);
769
0
    ctx->Yi.c[15]=1;
770
0
    ctr=1;
771
0
  }
772
0
  else {
773
0
    size_t i;
774
0
    u64 len0 = len;
775
776
0
    while (len>=16) {
777
0
      for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
778
0
      GCM_MUL(ctx,Yi);
779
0
      iv += 16;
780
0
      len -= 16;
781
0
    }
782
0
    if (len) {
783
0
      for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
784
0
      GCM_MUL(ctx,Yi);
785
0
    }
786
0
    len0 <<= 3;
787
0
#if BYTE_ORDER == LITTLE_ENDIAN
788
0
#ifdef BSWAP8
789
0
    ctx->Yi.u[1]  ^= BSWAP8(len0);
790
#else
791
    ctx->Yi.c[8]  ^= (u8)(len0>>56);
792
    ctx->Yi.c[9]  ^= (u8)(len0>>48);
793
    ctx->Yi.c[10] ^= (u8)(len0>>40);
794
    ctx->Yi.c[11] ^= (u8)(len0>>32);
795
    ctx->Yi.c[12] ^= (u8)(len0>>24);
796
    ctx->Yi.c[13] ^= (u8)(len0>>16);
797
    ctx->Yi.c[14] ^= (u8)(len0>>8);
798
    ctx->Yi.c[15] ^= (u8)(len0);
799
#endif
800
#else /* BIG_ENDIAN */
801
    ctx->Yi.u[1]  ^= len0;
802
#endif
803
804
0
    GCM_MUL(ctx,Yi);
805
806
0
#if BYTE_ORDER == LITTLE_ENDIAN
807
0
#ifdef BSWAP4
808
0
    ctr = BSWAP4(ctx->Yi.d[3]);
809
#else
810
    ctr = GETU32(ctx->Yi.c+12);
811
#endif
812
#else /* BIG_ENDIAN */
813
    ctr = ctx->Yi.d[3];
814
#endif
815
0
  }
816
817
0
  (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
818
0
  ++ctr;
819
0
#if BYTE_ORDER == LITTLE_ENDIAN
820
0
#ifdef BSWAP4
821
0
  ctx->Yi.d[3] = BSWAP4(ctr);
822
#else
823
  PUTU32(ctx->Yi.c+12,ctr);
824
#endif
825
#else /* BIG_ENDIAN */
826
  ctx->Yi.d[3] = ctr;
827
#endif
828
0
}
829
830
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
831
0
{
832
0
  size_t i;
833
0
  unsigned int n;
834
0
  u64 alen = ctx->len.u[0];
835
0
#ifdef GCM_FUNCREF_4BIT
836
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
837
0
# ifdef GHASH
838
0
  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
839
0
        const u8 *inp,size_t len) = ctx->ghash;
840
0
# endif
841
0
#endif
842
843
0
  if (ctx->len.u[1]) return -2;
844
845
0
  alen += len;
846
0
  if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
847
0
    return -1;
848
0
  ctx->len.u[0] = alen;
849
850
0
  n = ctx->ares;
851
0
  if (n) {
852
0
    while (n && len) {
853
0
      ctx->Xi.c[n] ^= *(aad++);
854
0
      --len;
855
0
      n = (n+1)%16;
856
0
    }
857
0
    if (n==0) GCM_MUL(ctx,Xi);
858
0
    else {
859
0
      ctx->ares = n;
860
0
      return 0;
861
0
    }
862
0
  }
863
864
0
#ifdef GHASH
865
0
  if ((i = (len&(size_t)-16))) {
866
0
    GHASH(ctx,aad,i);
867
0
    aad += i;
868
0
    len -= i;
869
0
  }
870
#else
871
  while (len>=16) {
872
    for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
873
    GCM_MUL(ctx,Xi);
874
    aad += 16;
875
    len -= 16;
876
  }
877
#endif
878
0
  if (len) {
879
0
    n = (unsigned int)len;
880
0
    for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
881
0
  }
882
883
0
  ctx->ares = n;
884
0
  return 0;
885
0
}
886
887
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
888
    const unsigned char *in, unsigned char *out,
889
    size_t len)
890
0
{
891
0
  unsigned int n, ctr;
892
0
  size_t i;
893
0
  u64        mlen  = ctx->len.u[1];
894
0
  block128_f block = ctx->block;
895
0
  void      *key   = ctx->key;
896
0
#ifdef GCM_FUNCREF_4BIT
897
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
898
0
# ifdef GHASH
899
0
  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
900
0
        const u8 *inp,size_t len) = ctx->ghash;
901
0
# endif
902
0
#endif
903
904
0
  mlen += len;
905
0
  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906
0
    return -1;
907
0
  ctx->len.u[1] = mlen;
908
909
0
  if (ctx->ares) {
910
    /* First call to encrypt finalizes GHASH(AAD) */
911
0
    GCM_MUL(ctx,Xi);
912
0
    ctx->ares = 0;
913
0
  }
914
915
0
#if BYTE_ORDER == LITTLE_ENDIAN
916
0
#ifdef BSWAP4
917
0
  ctr = BSWAP4(ctx->Yi.d[3]);
918
#else
919
  ctr = GETU32(ctx->Yi.c+12);
920
#endif
921
#else /* BIG_ENDIAN */
922
  ctr = ctx->Yi.d[3];
923
#endif
924
925
0
  n = ctx->mres;
926
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
927
0
  if (16%sizeof(size_t) == 0) do { /* always true actually */
928
0
    if (n) {
929
0
      while (n && len) {
930
0
        ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
931
0
        --len;
932
0
        n = (n+1)%16;
933
0
      }
934
0
      if (n==0) GCM_MUL(ctx,Xi);
935
0
      else {
936
0
        ctx->mres = n;
937
0
        return 0;
938
0
      }
939
0
    }
940
#ifdef __STRICT_ALIGNMENT
941
    if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
942
      break;
943
#endif
944
0
#if defined(GHASH) && defined(GHASH_CHUNK)
945
0
    while (len>=GHASH_CHUNK) {
946
0
        size_t j=GHASH_CHUNK;
947
948
0
        while (j) {
949
0
          size_t *out_t=(size_t *)out;
950
0
          const size_t *in_t=(const size_t *)in;
951
952
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
953
0
      ++ctr;
954
0
#if BYTE_ORDER == LITTLE_ENDIAN
955
0
#ifdef BSWAP4
956
0
      ctx->Yi.d[3] = BSWAP4(ctr);
957
#else
958
      PUTU32(ctx->Yi.c+12,ctr);
959
#endif
960
#else /* BIG_ENDIAN */
961
      ctx->Yi.d[3] = ctr;
962
#endif
963
0
      for (i=0; i<16/sizeof(size_t); ++i)
964
0
        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
965
0
      out += 16;
966
0
      in  += 16;
967
0
      j   -= 16;
968
0
        }
969
0
        GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
970
0
        len -= GHASH_CHUNK;
971
0
    }
972
0
    if ((i = (len&(size_t)-16))) {
973
0
        size_t j=i;
974
975
0
        while (len>=16) {
976
0
          size_t *out_t=(size_t *)out;
977
0
          const size_t *in_t=(const size_t *)in;
978
979
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
980
0
      ++ctr;
981
0
#if BYTE_ORDER == LITTLE_ENDIAN
982
0
#ifdef BSWAP4
983
0
      ctx->Yi.d[3] = BSWAP4(ctr);
984
#else
985
      PUTU32(ctx->Yi.c+12,ctr);
986
#endif
987
#else /* BIG_ENDIAN */
988
      ctx->Yi.d[3] = ctr;
989
#endif
990
0
      for (i=0; i<16/sizeof(size_t); ++i)
991
0
        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
992
0
      out += 16;
993
0
      in  += 16;
994
0
      len -= 16;
995
0
        }
996
0
        GHASH(ctx,out-j,j);
997
0
    }
998
#else
999
    while (len>=16) {
1000
          size_t *out_t=(size_t *)out;
1001
          const size_t *in_t=(const size_t *)in;
1002
1003
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1004
      ++ctr;
1005
#if BYTE_ORDER == LITTLE_ENDIAN
1006
#ifdef BSWAP4
1007
      ctx->Yi.d[3] = BSWAP4(ctr);
1008
#else
1009
      PUTU32(ctx->Yi.c+12,ctr);
1010
#endif
1011
#else /* BIG_ENDIAN */
1012
      ctx->Yi.d[3] = ctr;
1013
#endif
1014
      for (i=0; i<16/sizeof(size_t); ++i)
1015
        ctx->Xi.t[i] ^=
1016
        out_t[i] = in_t[i]^ctx->EKi.t[i];
1017
      GCM_MUL(ctx,Xi);
1018
      out += 16;
1019
      in  += 16;
1020
      len -= 16;
1021
    }
1022
#endif
1023
0
    if (len) {
1024
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1025
0
      ++ctr;
1026
0
#if BYTE_ORDER == LITTLE_ENDIAN
1027
0
#ifdef BSWAP4
1028
0
      ctx->Yi.d[3] = BSWAP4(ctr);
1029
#else
1030
      PUTU32(ctx->Yi.c+12,ctr);
1031
#endif
1032
#else /* BIG_ENDIAN */
1033
      ctx->Yi.d[3] = ctr;
1034
#endif
1035
0
      while (len--) {
1036
0
        ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1037
0
        ++n;
1038
0
      }
1039
0
    }
1040
1041
0
    ctx->mres = n;
1042
0
    return 0;
1043
0
  } while(0);
1044
0
#endif
1045
0
  for (i=0;i<len;++i) {
1046
0
    if (n==0) {
1047
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1048
0
      ++ctr;
1049
0
#if BYTE_ORDER == LITTLE_ENDIAN
1050
0
#ifdef BSWAP4
1051
0
      ctx->Yi.d[3] = BSWAP4(ctr);
1052
#else
1053
      PUTU32(ctx->Yi.c+12,ctr);
1054
#endif
1055
#else /* BIG_ENDIAN */
1056
      ctx->Yi.d[3] = ctr;
1057
#endif
1058
0
    }
1059
0
    ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1060
0
    n = (n+1)%16;
1061
0
    if (n==0)
1062
0
      GCM_MUL(ctx,Xi);
1063
0
  }
1064
1065
0
  ctx->mres = n;
1066
0
  return 0;
1067
0
}
1068
1069
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1070
    const unsigned char *in, unsigned char *out,
1071
    size_t len)
1072
0
{
1073
0
  unsigned int n, ctr;
1074
0
  size_t i;
1075
0
  u64        mlen  = ctx->len.u[1];
1076
0
  block128_f block = ctx->block;
1077
0
  void      *key   = ctx->key;
1078
0
#ifdef GCM_FUNCREF_4BIT
1079
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
1080
0
# ifdef GHASH
1081
0
  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1082
0
        const u8 *inp,size_t len) = ctx->ghash;
1083
0
# endif
1084
0
#endif
1085
1086
0
  mlen += len;
1087
0
  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1088
0
    return -1;
1089
0
  ctx->len.u[1] = mlen;
1090
1091
0
  if (ctx->ares) {
1092
    /* First call to decrypt finalizes GHASH(AAD) */
1093
0
    GCM_MUL(ctx,Xi);
1094
0
    ctx->ares = 0;
1095
0
  }
1096
1097
0
#if BYTE_ORDER == LITTLE_ENDIAN
1098
0
#ifdef BSWAP4
1099
0
  ctr = BSWAP4(ctx->Yi.d[3]);
1100
#else
1101
  ctr = GETU32(ctx->Yi.c+12);
1102
#endif
1103
#else /* BIG_ENDIAN */
1104
  ctr = ctx->Yi.d[3];
1105
#endif
1106
1107
0
  n = ctx->mres;
1108
0
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1109
0
  if (16%sizeof(size_t) == 0) do { /* always true actually */
1110
0
    if (n) {
1111
0
      while (n && len) {
1112
0
        u8 c = *(in++);
1113
0
        *(out++) = c^ctx->EKi.c[n];
1114
0
        ctx->Xi.c[n] ^= c;
1115
0
        --len;
1116
0
        n = (n+1)%16;
1117
0
      }
1118
0
      if (n==0) GCM_MUL (ctx,Xi);
1119
0
      else {
1120
0
        ctx->mres = n;
1121
0
        return 0;
1122
0
      }
1123
0
    }
1124
#ifdef __STRICT_ALIGNMENT
1125
    if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1126
      break;
1127
#endif
1128
0
#if defined(GHASH) && defined(GHASH_CHUNK)
1129
0
    while (len>=GHASH_CHUNK) {
1130
0
        size_t j=GHASH_CHUNK;
1131
1132
0
        GHASH(ctx,in,GHASH_CHUNK);
1133
0
        while (j) {
1134
0
          size_t *out_t=(size_t *)out;
1135
0
          const size_t *in_t=(const size_t *)in;
1136
1137
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1138
0
      ++ctr;
1139
0
#if BYTE_ORDER == LITTLE_ENDIAN
1140
0
#ifdef BSWAP4
1141
0
        ctx->Yi.d[3] = BSWAP4(ctr);
1142
#else
1143
        PUTU32(ctx->Yi.c+12,ctr);
1144
#endif
1145
#else /* BIG_ENDIAN */
1146
        ctx->Yi.d[3] = ctr;
1147
#endif
1148
0
      for (i=0; i<16/sizeof(size_t); ++i)
1149
0
        out_t[i] = in_t[i]^ctx->EKi.t[i];
1150
0
      out += 16;
1151
0
      in  += 16;
1152
0
      j   -= 16;
1153
0
        }
1154
0
        len -= GHASH_CHUNK;
1155
0
    }
1156
0
    if ((i = (len&(size_t)-16))) {
1157
0
        GHASH(ctx,in,i);
1158
0
        while (len>=16) {
1159
0
          size_t *out_t=(size_t *)out;
1160
0
          const size_t *in_t=(const size_t *)in;
1161
1162
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1163
0
      ++ctr;
1164
0
#if BYTE_ORDER == LITTLE_ENDIAN
1165
0
#ifdef BSWAP4
1166
0
      ctx->Yi.d[3] = BSWAP4(ctr);
1167
#else
1168
      PUTU32(ctx->Yi.c+12,ctr);
1169
#endif
1170
#else /* BIG_ENDIAN */
1171
      ctx->Yi.d[3] = ctr;
1172
#endif
1173
0
      for (i=0; i<16/sizeof(size_t); ++i)
1174
0
        out_t[i] = in_t[i]^ctx->EKi.t[i];
1175
0
      out += 16;
1176
0
      in  += 16;
1177
0
      len -= 16;
1178
0
        }
1179
0
    }
1180
#else
1181
    while (len>=16) {
1182
          size_t *out_t=(size_t *)out;
1183
          const size_t *in_t=(const size_t *)in;
1184
1185
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1186
      ++ctr;
1187
#if BYTE_ORDER == LITTLE_ENDIAN
1188
#ifdef BSWAP4
1189
      ctx->Yi.d[3] = BSWAP4(ctr);
1190
#else
1191
      PUTU32(ctx->Yi.c+12,ctr);
1192
#endif
1193
#else /* BIG_ENDIAN */
1194
      ctx->Yi.d[3] = ctr;
1195
#endif
1196
      for (i=0; i<16/sizeof(size_t); ++i) {
1197
        size_t c = in[i];
1198
        out[i] = c^ctx->EKi.t[i];
1199
        ctx->Xi.t[i] ^= c;
1200
      }
1201
      GCM_MUL(ctx,Xi);
1202
      out += 16;
1203
      in  += 16;
1204
      len -= 16;
1205
    }
1206
#endif
1207
0
    if (len) {
1208
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1209
0
      ++ctr;
1210
0
#if BYTE_ORDER == LITTLE_ENDIAN
1211
0
#ifdef BSWAP4
1212
0
      ctx->Yi.d[3] = BSWAP4(ctr);
1213
#else
1214
      PUTU32(ctx->Yi.c+12,ctr);
1215
#endif
1216
#else /* BIG_ENDIAN */
1217
      ctx->Yi.d[3] = ctr;
1218
#endif
1219
0
      while (len--) {
1220
0
        u8 c = in[n];
1221
0
        ctx->Xi.c[n] ^= c;
1222
0
        out[n] = c^ctx->EKi.c[n];
1223
0
        ++n;
1224
0
      }
1225
0
    }
1226
1227
0
    ctx->mres = n;
1228
0
    return 0;
1229
0
  } while(0);
1230
0
#endif
1231
0
  for (i=0;i<len;++i) {
1232
0
    u8 c;
1233
0
    if (n==0) {
1234
0
      (*block)(ctx->Yi.c,ctx->EKi.c,key);
1235
0
      ++ctr;
1236
0
#if BYTE_ORDER == LITTLE_ENDIAN
1237
0
#ifdef BSWAP4
1238
0
      ctx->Yi.d[3] = BSWAP4(ctr);
1239
#else
1240
      PUTU32(ctx->Yi.c+12,ctr);
1241
#endif
1242
#else /* BIG_ENDIAN */
1243
      ctx->Yi.d[3] = ctr;
1244
#endif
1245
0
    }
1246
0
    c = in[i];
1247
0
    out[i] = c^ctx->EKi.c[n];
1248
0
    ctx->Xi.c[n] ^= c;
1249
0
    n = (n+1)%16;
1250
0
    if (n==0)
1251
0
      GCM_MUL(ctx,Xi);
1252
0
  }
1253
1254
0
  ctx->mres = n;
1255
0
  return 0;
1256
0
}
1257
1258
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1259
    const unsigned char *in, unsigned char *out,
1260
    size_t len, ctr128_f stream)
1261
0
{
1262
0
  unsigned int n, ctr;
1263
0
  size_t i;
1264
0
  u64   mlen = ctx->len.u[1];
1265
0
  void *key  = ctx->key;
1266
0
#ifdef GCM_FUNCREF_4BIT
1267
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
1268
0
# ifdef GHASH
1269
0
  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1270
0
        const u8 *inp,size_t len) = ctx->ghash;
1271
0
# endif
1272
0
#endif
1273
1274
0
  mlen += len;
1275
0
  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1276
0
    return -1;
1277
0
  ctx->len.u[1] = mlen;
1278
1279
0
  if (ctx->ares) {
1280
    /* First call to encrypt finalizes GHASH(AAD) */
1281
0
    GCM_MUL(ctx,Xi);
1282
0
    ctx->ares = 0;
1283
0
  }
1284
1285
0
#if BYTE_ORDER == LITTLE_ENDIAN
1286
0
#ifdef BSWAP4
1287
0
  ctr = BSWAP4(ctx->Yi.d[3]);
1288
#else
1289
  ctr = GETU32(ctx->Yi.c+12);
1290
#endif
1291
#else /* BIG_ENDIAN */
1292
  ctr = ctx->Yi.d[3];
1293
#endif
1294
1295
0
  n = ctx->mres;
1296
0
  if (n) {
1297
0
    while (n && len) {
1298
0
      ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1299
0
      --len;
1300
0
      n = (n+1)%16;
1301
0
    }
1302
0
    if (n==0) GCM_MUL(ctx,Xi);
1303
0
    else {
1304
0
      ctx->mres = n;
1305
0
      return 0;
1306
0
    }
1307
0
  }
1308
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1309
0
  while (len>=GHASH_CHUNK) {
1310
0
    (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1311
0
    ctr += GHASH_CHUNK/16;
1312
0
#if BYTE_ORDER == LITTLE_ENDIAN
1313
0
#ifdef BSWAP4
1314
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1315
#else
1316
    PUTU32(ctx->Yi.c+12,ctr);
1317
#endif
1318
#else /* BIG_ENDIAN */
1319
    ctx->Yi.d[3] = ctr;
1320
#endif
1321
0
    GHASH(ctx,out,GHASH_CHUNK);
1322
0
    out += GHASH_CHUNK;
1323
0
    in  += GHASH_CHUNK;
1324
0
    len -= GHASH_CHUNK;
1325
0
  }
1326
0
#endif
1327
0
  if ((i = (len&(size_t)-16))) {
1328
0
    size_t j=i/16;
1329
1330
0
    (*stream)(in,out,j,key,ctx->Yi.c);
1331
0
    ctr += (unsigned int)j;
1332
0
#if BYTE_ORDER == LITTLE_ENDIAN
1333
0
#ifdef BSWAP4
1334
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1335
#else
1336
    PUTU32(ctx->Yi.c+12,ctr);
1337
#endif
1338
#else /* BIG_ENDIAN */
1339
    ctx->Yi.d[3] = ctr;
1340
#endif
1341
0
    in  += i;
1342
0
    len -= i;
1343
0
#if defined(GHASH)
1344
0
    GHASH(ctx,out,i);
1345
0
    out += i;
1346
#else
1347
    while (j--) {
1348
      for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1349
      GCM_MUL(ctx,Xi);
1350
      out += 16;
1351
    }
1352
#endif
1353
0
  }
1354
0
  if (len) {
1355
0
    (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1356
0
    ++ctr;
1357
0
#if BYTE_ORDER == LITTLE_ENDIAN
1358
0
#ifdef BSWAP4
1359
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1360
#else
1361
    PUTU32(ctx->Yi.c+12,ctr);
1362
#endif
1363
#else /* BIG_ENDIAN */
1364
    ctx->Yi.d[3] = ctr;
1365
#endif
1366
0
    while (len--) {
1367
0
      ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1368
0
      ++n;
1369
0
    }
1370
0
  }
1371
1372
0
  ctx->mres = n;
1373
0
  return 0;
1374
0
}
1375
1376
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1377
    const unsigned char *in, unsigned char *out,
1378
    size_t len,ctr128_f stream)
1379
0
{
1380
0
  unsigned int n, ctr;
1381
0
  size_t i;
1382
0
  u64   mlen = ctx->len.u[1];
1383
0
  void *key  = ctx->key;
1384
0
#ifdef GCM_FUNCREF_4BIT
1385
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
1386
0
# ifdef GHASH
1387
0
  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1388
0
        const u8 *inp,size_t len) = ctx->ghash;
1389
0
# endif
1390
0
#endif
1391
1392
0
  mlen += len;
1393
0
  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1394
0
    return -1;
1395
0
  ctx->len.u[1] = mlen;
1396
1397
0
  if (ctx->ares) {
1398
    /* First call to decrypt finalizes GHASH(AAD) */
1399
0
    GCM_MUL(ctx,Xi);
1400
0
    ctx->ares = 0;
1401
0
  }
1402
1403
0
#if BYTE_ORDER == LITTLE_ENDIAN
1404
0
#ifdef BSWAP4
1405
0
  ctr = BSWAP4(ctx->Yi.d[3]);
1406
#else
1407
  ctr = GETU32(ctx->Yi.c+12);
1408
#endif
1409
#else /* BIG_ENDIAN */
1410
  ctr = ctx->Yi.d[3];
1411
#endif
1412
1413
0
  n = ctx->mres;
1414
0
  if (n) {
1415
0
    while (n && len) {
1416
0
      u8 c = *(in++);
1417
0
      *(out++) = c^ctx->EKi.c[n];
1418
0
      ctx->Xi.c[n] ^= c;
1419
0
      --len;
1420
0
      n = (n+1)%16;
1421
0
    }
1422
0
    if (n==0) GCM_MUL (ctx,Xi);
1423
0
    else {
1424
0
      ctx->mres = n;
1425
0
      return 0;
1426
0
    }
1427
0
  }
1428
0
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1429
0
  while (len>=GHASH_CHUNK) {
1430
0
    GHASH(ctx,in,GHASH_CHUNK);
1431
0
    (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1432
0
    ctr += GHASH_CHUNK/16;
1433
0
#if BYTE_ORDER == LITTLE_ENDIAN
1434
0
#ifdef BSWAP4
1435
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1436
#else
1437
    PUTU32(ctx->Yi.c+12,ctr);
1438
#endif
1439
#else /* BIG_ENDIAN */
1440
    ctx->Yi.d[3] = ctr;
1441
#endif
1442
0
    out += GHASH_CHUNK;
1443
0
    in  += GHASH_CHUNK;
1444
0
    len -= GHASH_CHUNK;
1445
0
  }
1446
0
#endif
1447
0
  if ((i = (len&(size_t)-16))) {
1448
0
    size_t j=i/16;
1449
1450
0
#if defined(GHASH)
1451
0
    GHASH(ctx,in,i);
1452
#else
1453
    while (j--) {
1454
      size_t k;
1455
      for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1456
      GCM_MUL(ctx,Xi);
1457
      in += 16;
1458
    }
1459
    j   = i/16;
1460
    in -= i;
1461
#endif
1462
0
    (*stream)(in,out,j,key,ctx->Yi.c);
1463
0
    ctr += (unsigned int)j;
1464
0
#if BYTE_ORDER == LITTLE_ENDIAN
1465
0
#ifdef BSWAP4
1466
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1467
#else
1468
    PUTU32(ctx->Yi.c+12,ctr);
1469
#endif
1470
#else /* BIG_ENDIAN */
1471
    ctx->Yi.d[3] = ctr;
1472
#endif
1473
0
    out += i;
1474
0
    in  += i;
1475
0
    len -= i;
1476
0
  }
1477
0
  if (len) {
1478
0
    (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1479
0
    ++ctr;
1480
0
#if BYTE_ORDER == LITTLE_ENDIAN
1481
0
#ifdef BSWAP4
1482
0
    ctx->Yi.d[3] = BSWAP4(ctr);
1483
#else
1484
    PUTU32(ctx->Yi.c+12,ctr);
1485
#endif
1486
#else /* BIG_ENDIAN */
1487
    ctx->Yi.d[3] = ctr;
1488
#endif
1489
0
    while (len--) {
1490
0
      u8 c = in[n];
1491
0
      ctx->Xi.c[n] ^= c;
1492
0
      out[n] = c^ctx->EKi.c[n];
1493
0
      ++n;
1494
0
    }
1495
0
  }
1496
1497
0
  ctx->mres = n;
1498
0
  return 0;
1499
0
}
1500
1501
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1502
      size_t len)
1503
0
{
1504
0
  u64 alen = ctx->len.u[0]<<3;
1505
0
  u64 clen = ctx->len.u[1]<<3;
1506
0
#ifdef GCM_FUNCREF_4BIT
1507
0
  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])  = ctx->gmult;
1508
0
#endif
1509
1510
0
  if (ctx->mres || ctx->ares)
1511
0
    GCM_MUL(ctx,Xi);
1512
1513
0
#if BYTE_ORDER == LITTLE_ENDIAN
1514
0
#ifdef BSWAP8
1515
0
  alen = BSWAP8(alen);
1516
0
  clen = BSWAP8(clen);
1517
#else
1518
  {
1519
    u8 *p = ctx->len.c;
1520
1521
    ctx->len.u[0] = alen;
1522
    ctx->len.u[1] = clen;
1523
1524
    alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1525
    clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1526
  }
1527
#endif
1528
0
#endif
1529
1530
0
  ctx->Xi.u[0] ^= alen;
1531
0
  ctx->Xi.u[1] ^= clen;
1532
0
  GCM_MUL(ctx,Xi);
1533
1534
0
  ctx->Xi.u[0] ^= ctx->EK0.u[0];
1535
0
  ctx->Xi.u[1] ^= ctx->EK0.u[1];
1536
1537
0
  if (tag && len<=sizeof(ctx->Xi))
1538
0
    return memcmp(ctx->Xi.c,tag,len);
1539
0
  else
1540
0
    return -1;
1541
0
}
1542
1543
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1544
0
{
1545
0
  CRYPTO_gcm128_finish(ctx, NULL, 0);
1546
0
  memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1547
0
}
1548
1549
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1550
0
{
1551
0
  GCM128_CONTEXT *ret;
1552
1553
0
  if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1554
0
    CRYPTO_gcm128_init(ret,key,block);
1555
1556
0
  return ret;
1557
0
}
1558
1559
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1560
0
{
1561
0
  freezero(ctx, sizeof(*ctx));
1562
0
}