/src/openssl/crypto/modes/gcm128.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2010-2024 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #include <string.h>  | 
11  |  | #include <openssl/crypto.h>  | 
12  |  | #include "internal/cryptlib.h"  | 
13  |  | #include "internal/endian.h"  | 
14  |  | #include "crypto/modes.h"  | 
15  |  |  | 
16  |  | #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)  | 
17  |  | typedef size_t size_t_aX __attribute((__aligned__(1)));  | 
18  |  | #else  | 
19  |  | typedef size_t size_t_aX;  | 
20  |  | #endif  | 
21  |  |  | 
22  |  | #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)  | 
23  |  | /* redefine, because alignment is ensured */  | 
24  |  | # undef  GETU32  | 
25  |  | # define GETU32(p)       BSWAP4(*(const u32 *)(p))  | 
26  |  | # undef  PUTU32  | 
27  |  | # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)  | 
28  |  | #endif  | 
29  |  |  | 
30  |  | /* RISC-V uses C implementation as a fallback. */  | 
31  |  | #if defined(__riscv)  | 
32  |  | # define INCLUDE_C_GMULT_4BIT  | 
33  |  | # define INCLUDE_C_GHASH_4BIT  | 
34  |  | #endif  | 
35  |  |  | 
36  |  | #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))  | 
37  | 0  | #define REDUCE1BIT(V)   do { \ | 
38  | 0  |         if (sizeof(size_t)==8) { \ | 
39  | 0  |                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \  | 
40  | 0  |                 V.lo  = (V.hi<<63)|(V.lo>>1); \  | 
41  | 0  |                 V.hi  = (V.hi>>1 )^T; \  | 
42  | 0  |         } \  | 
43  | 0  |         else { \ | 
44  | 0  |                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \  | 
45  | 0  |                 V.lo  = (V.hi<<63)|(V.lo>>1); \  | 
46  | 0  |                 V.hi  = (V.hi>>1 )^((u64)T<<32); \  | 
47  | 0  |         } \  | 
48  | 0  | } while(0)  | 
49  |  |  | 
50  |  | /*-  | 
51  |  |  *  | 
52  |  |  * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.  | 
53  |  |  *  | 
54  |  |  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should  | 
55  |  |  * never be set to 8. 8 is effectively reserved for testing purposes.  | 
56  |  |  * TABLE_BITS>1 are lookup-table-driven implementations referred to as  | 
57  |  |  * "Shoup's" in GCM specification. In other words OpenSSL does not cover  | 
58  |  |  * whole spectrum of possible table driven implementations. Why? In  | 
59  |  |  * non-"Shoup's" case memory access pattern is segmented in such manner,  | 
60  |  |  * that it's trivial to see that cache timing information can reveal  | 
61  |  |  * fair portion of intermediate hash value. Given that ciphertext is  | 
62  |  |  * always available to attacker, it's possible for him to attempt to  | 
63  |  |  * deduce secret parameter H and if successful, tamper with messages  | 
64  |  |  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's  | 
65  |  |  * not as trivial, but there is no reason to believe that it's resistant  | 
66  |  |  * to cache-timing attack. And the thing about "8-bit" implementation is  | 
67  |  |  * that it consumes 16 (sixteen) times more memory, 4KB per individual  | 
68  |  |  * key + 1KB shared. Well, on pros side it should be twice as fast as  | 
69  |  |  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version  | 
70  |  |  * was observed to run ~75% faster, closer to 100% for commercial  | 
71  |  |  * compilers... Yet "4-bit" procedure is preferred, because it's  | 
72  |  |  * believed to provide better security-performance balance and adequate  | 
73  |  |  * all-round performance. "All-round" refers to things like:  | 
74  |  |  *  | 
75  |  |  * - shorter setup time effectively improves overall timing for  | 
76  |  |  *   handling short messages;  | 
77  |  |  * - larger table allocation can become unbearable because of VM  | 
78  |  |  *   subsystem penalties (for example on Windows large enough free  | 
79  |  |  *   results in VM working set trimming, meaning that consequent  | 
80  |  |  *   malloc would immediately incur working set expansion);  | 
81  |  |  * - larger table has larger cache footprint, which can affect  | 
82  |  |  *   performance of other code paths (not necessarily even from same  | 
83  |  |  *   thread in Hyper-Threading world);  | 
84  |  |  *  | 
85  |  |  * Value of 1 is not appropriate for performance reasons.  | 
86  |  |  */  | 
87  |  |  | 
88  |  | static void gcm_init_4bit(u128 Htable[16], const u64 H[2])  | 
89  | 0  | { | 
90  | 0  |     u128 V;  | 
91  |  | # if defined(OPENSSL_SMALL_FOOTPRINT)  | 
92  |  |     int i;  | 
93  |  | # endif  | 
94  |  | 
  | 
95  | 0  |     Htable[0].hi = 0;  | 
96  | 0  |     Htable[0].lo = 0;  | 
97  | 0  |     V.hi = H[0];  | 
98  | 0  |     V.lo = H[1];  | 
99  |  | 
  | 
100  |  | # if defined(OPENSSL_SMALL_FOOTPRINT)  | 
101  |  |     for (Htable[8] = V, i = 4; i > 0; i >>= 1) { | 
102  |  |         REDUCE1BIT(V);  | 
103  |  |         Htable[i] = V;  | 
104  |  |     }  | 
105  |  |  | 
106  |  |     for (i = 2; i < 16; i <<= 1) { | 
107  |  |         u128 *Hi = Htable + i;  | 
108  |  |         int j;  | 
109  |  |         for (V = *Hi, j = 1; j < i; ++j) { | 
110  |  |             Hi[j].hi = V.hi ^ Htable[j].hi;  | 
111  |  |             Hi[j].lo = V.lo ^ Htable[j].lo;  | 
112  |  |         }  | 
113  |  |     }  | 
114  |  | # else  | 
115  | 0  |     Htable[8] = V;  | 
116  | 0  |     REDUCE1BIT(V);  | 
117  | 0  |     Htable[4] = V;  | 
118  | 0  |     REDUCE1BIT(V);  | 
119  | 0  |     Htable[2] = V;  | 
120  | 0  |     REDUCE1BIT(V);  | 
121  | 0  |     Htable[1] = V;  | 
122  | 0  |     Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;  | 
123  | 0  |     V = Htable[4];  | 
124  | 0  |     Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;  | 
125  | 0  |     Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;  | 
126  | 0  |     Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;  | 
127  | 0  |     V = Htable[8];  | 
128  | 0  |     Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;  | 
129  | 0  |     Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;  | 
130  | 0  |     Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;  | 
131  | 0  |     Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;  | 
132  | 0  |     Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;  | 
133  | 0  |     Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;  | 
134  | 0  |     Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;  | 
135  | 0  | # endif  | 
136  |  | # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))  | 
137  |  |     /*  | 
138  |  |      * ARM assembler expects specific dword order in Htable.  | 
139  |  |      */  | 
140  |  |     { | 
141  |  |         int j;  | 
142  |  |         DECLARE_IS_ENDIAN;  | 
143  |  |  | 
144  |  |         if (IS_LITTLE_ENDIAN)  | 
145  |  |             for (j = 0; j < 16; ++j) { | 
146  |  |                 V = Htable[j];  | 
147  |  |                 Htable[j].hi = V.lo;  | 
148  |  |                 Htable[j].lo = V.hi;  | 
149  |  |         } else  | 
150  |  |             for (j = 0; j < 16; ++j) { | 
151  |  |                 V = Htable[j];  | 
152  |  |                 Htable[j].hi = V.lo << 32 | V.lo >> 32;  | 
153  |  |                 Htable[j].lo = V.hi << 32 | V.hi >> 32;  | 
154  |  |             }  | 
155  |  |     }  | 
156  |  | # endif  | 
157  | 0  | }  | 
158  |  |  | 
159  |  | # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)  | 
160  |  | static const size_t rem_4bit[16] = { | 
161  |  |     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),  | 
162  |  |     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),  | 
163  |  |     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),  | 
164  |  |     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)  | 
165  |  | };  | 
166  |  |  | 
167  |  | static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])  | 
168  | 0  | { | 
169  | 0  |     u128 Z;  | 
170  | 0  |     int cnt = 15;  | 
171  | 0  |     size_t rem, nlo, nhi;  | 
172  | 0  |     DECLARE_IS_ENDIAN;  | 
173  |  | 
  | 
174  | 0  |     nlo = ((const u8 *)Xi)[15];  | 
175  | 0  |     nhi = nlo >> 4;  | 
176  | 0  |     nlo &= 0xf;  | 
177  |  | 
  | 
178  | 0  |     Z.hi = Htable[nlo].hi;  | 
179  | 0  |     Z.lo = Htable[nlo].lo;  | 
180  |  | 
  | 
181  | 0  |     while (1) { | 
182  | 0  |         rem = (size_t)Z.lo & 0xf;  | 
183  | 0  |         Z.lo = (Z.hi << 60) | (Z.lo >> 4);  | 
184  | 0  |         Z.hi = (Z.hi >> 4);  | 
185  | 0  |         if (sizeof(size_t) == 8)  | 
186  | 0  |             Z.hi ^= rem_4bit[rem];  | 
187  | 0  |         else  | 
188  | 0  |             Z.hi ^= (u64)rem_4bit[rem] << 32;  | 
189  |  | 
  | 
190  | 0  |         Z.hi ^= Htable[nhi].hi;  | 
191  | 0  |         Z.lo ^= Htable[nhi].lo;  | 
192  |  | 
  | 
193  | 0  |         if (--cnt < 0)  | 
194  | 0  |             break;  | 
195  |  |  | 
196  | 0  |         nlo = ((const u8 *)Xi)[cnt];  | 
197  | 0  |         nhi = nlo >> 4;  | 
198  | 0  |         nlo &= 0xf;  | 
199  |  | 
  | 
200  | 0  |         rem = (size_t)Z.lo & 0xf;  | 
201  | 0  |         Z.lo = (Z.hi << 60) | (Z.lo >> 4);  | 
202  | 0  |         Z.hi = (Z.hi >> 4);  | 
203  | 0  |         if (sizeof(size_t) == 8)  | 
204  | 0  |             Z.hi ^= rem_4bit[rem];  | 
205  | 0  |         else  | 
206  | 0  |             Z.hi ^= (u64)rem_4bit[rem] << 32;  | 
207  |  | 
  | 
208  | 0  |         Z.hi ^= Htable[nlo].hi;  | 
209  | 0  |         Z.lo ^= Htable[nlo].lo;  | 
210  | 0  |     }  | 
211  |  | 
  | 
212  | 0  |     if (IS_LITTLE_ENDIAN) { | 
213  |  | #  ifdef BSWAP8  | 
214  |  |         Xi[0] = BSWAP8(Z.hi);  | 
215  |  |         Xi[1] = BSWAP8(Z.lo);  | 
216  |  | #  else  | 
217  | 0  |         u8 *p = (u8 *)Xi;  | 
218  | 0  |         u32 v;  | 
219  | 0  |         v = (u32)(Z.hi >> 32);  | 
220  | 0  |         PUTU32(p, v);  | 
221  | 0  |         v = (u32)(Z.hi);  | 
222  | 0  |         PUTU32(p + 4, v);  | 
223  | 0  |         v = (u32)(Z.lo >> 32);  | 
224  | 0  |         PUTU32(p + 8, v);  | 
225  | 0  |         v = (u32)(Z.lo);  | 
226  | 0  |         PUTU32(p + 12, v);  | 
227  | 0  | #  endif  | 
228  | 0  |     } else { | 
229  | 0  |         Xi[0] = Z.hi;  | 
230  | 0  |         Xi[1] = Z.lo;  | 
231  | 0  |     }  | 
232  | 0  | }  | 
233  |  |  | 
234  |  | # endif  | 
235  |  |  | 
236  |  | # if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)  | 
237  |  | #  if !defined(OPENSSL_SMALL_FOOTPRINT)  | 
238  |  | /*  | 
239  |  |  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for  | 
240  |  |  * details... Compiler-generated code doesn't seem to give any  | 
241  |  |  * performance improvement, at least not on x86[_64]. It's here  | 
242  |  |  * mostly as reference and a placeholder for possible future  | 
243  |  |  * non-trivial optimization[s]...  | 
244  |  |  */  | 
245  |  | static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],  | 
246  |  |                            const u8 *inp, size_t len)  | 
247  | 0  | { | 
248  | 0  |     u128 Z;  | 
249  | 0  |     int cnt;  | 
250  | 0  |     size_t rem, nlo, nhi;  | 
251  | 0  |     DECLARE_IS_ENDIAN;  | 
252  |  | 
  | 
253  | 0  |     do { | 
254  | 0  |         cnt = 15;  | 
255  | 0  |         nlo = ((const u8 *)Xi)[15];  | 
256  | 0  |         nlo ^= inp[15];  | 
257  | 0  |         nhi = nlo >> 4;  | 
258  | 0  |         nlo &= 0xf;  | 
259  |  | 
  | 
260  | 0  |         Z.hi = Htable[nlo].hi;  | 
261  | 0  |         Z.lo = Htable[nlo].lo;  | 
262  |  | 
  | 
263  | 0  |         while (1) { | 
264  | 0  |             rem = (size_t)Z.lo & 0xf;  | 
265  | 0  |             Z.lo = (Z.hi << 60) | (Z.lo >> 4);  | 
266  | 0  |             Z.hi = (Z.hi >> 4);  | 
267  | 0  |             if (sizeof(size_t) == 8)  | 
268  | 0  |                 Z.hi ^= rem_4bit[rem];  | 
269  | 0  |             else  | 
270  | 0  |                 Z.hi ^= (u64)rem_4bit[rem] << 32;  | 
271  |  | 
  | 
272  | 0  |             Z.hi ^= Htable[nhi].hi;  | 
273  | 0  |             Z.lo ^= Htable[nhi].lo;  | 
274  |  | 
  | 
275  | 0  |             if (--cnt < 0)  | 
276  | 0  |                 break;  | 
277  |  |  | 
278  | 0  |             nlo = ((const u8 *)Xi)[cnt];  | 
279  | 0  |             nlo ^= inp[cnt];  | 
280  | 0  |             nhi = nlo >> 4;  | 
281  | 0  |             nlo &= 0xf;  | 
282  |  | 
  | 
283  | 0  |             rem = (size_t)Z.lo & 0xf;  | 
284  | 0  |             Z.lo = (Z.hi << 60) | (Z.lo >> 4);  | 
285  | 0  |             Z.hi = (Z.hi >> 4);  | 
286  | 0  |             if (sizeof(size_t) == 8)  | 
287  | 0  |                 Z.hi ^= rem_4bit[rem];  | 
288  | 0  |             else  | 
289  | 0  |                 Z.hi ^= (u64)rem_4bit[rem] << 32;  | 
290  |  | 
  | 
291  | 0  |             Z.hi ^= Htable[nlo].hi;  | 
292  | 0  |             Z.lo ^= Htable[nlo].lo;  | 
293  | 0  |         }  | 
294  |  | 
  | 
295  | 0  |         if (IS_LITTLE_ENDIAN) { | 
296  |  | #   ifdef BSWAP8  | 
297  |  |             Xi[0] = BSWAP8(Z.hi);  | 
298  |  |             Xi[1] = BSWAP8(Z.lo);  | 
299  |  | #   else  | 
300  | 0  |             u8 *p = (u8 *)Xi;  | 
301  | 0  |             u32 v;  | 
302  | 0  |             v = (u32)(Z.hi >> 32);  | 
303  | 0  |             PUTU32(p, v);  | 
304  | 0  |             v = (u32)(Z.hi);  | 
305  | 0  |             PUTU32(p + 4, v);  | 
306  | 0  |             v = (u32)(Z.lo >> 32);  | 
307  | 0  |             PUTU32(p + 8, v);  | 
308  | 0  |             v = (u32)(Z.lo);  | 
309  | 0  |             PUTU32(p + 12, v);  | 
310  | 0  | #   endif  | 
311  | 0  |         } else { | 
312  | 0  |             Xi[0] = Z.hi;  | 
313  | 0  |             Xi[1] = Z.lo;  | 
314  | 0  |         }  | 
315  |  | 
  | 
316  | 0  |         inp += 16;  | 
317  |  |         /* Block size is 128 bits so len is a multiple of 16 */  | 
318  | 0  |         len -= 16;  | 
319  | 0  |     } while (len > 0);  | 
320  | 0  | }  | 
321  |  | #  endif  | 
322  |  | # else  | 
323  |  | void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);  | 
324  |  | void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
325  |  |                     size_t len);  | 
326  |  | # endif  | 
327  |  |  | 
328  | 0  | # define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)  | 
329  |  | # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)  | 
330  | 0  | #  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)  | 
331  |  | /*  | 
332  |  |  * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing  | 
333  |  |  * effect. In other words idea is to hash data while it's still in L1 cache  | 
334  |  |  * after encryption pass...  | 
335  |  |  */  | 
336  | 0  | #  define GHASH_CHUNK       (3*1024)  | 
337  |  | # endif  | 
338  |  |  | 
339  |  | #if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))  | 
340  |  | # if    !defined(I386_ONLY) && \  | 
341  |  |         (defined(__i386)        || defined(__i386__)    || \  | 
342  |  |          defined(__x86_64)      || defined(__x86_64__)  || \  | 
343  |  |          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))  | 
344  |  | #  define GHASH_ASM_X86_OR_64  | 
345  |  |  | 
346  |  | void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);  | 
347  |  | void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);  | 
348  |  | void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
349  |  |                      size_t len);  | 
350  |  |  | 
351  |  | #  if defined(__i386) || defined(__i386__) || defined(_M_IX86)  | 
352  |  | #   define gcm_init_avx   gcm_init_clmul  | 
353  |  | #   define gcm_gmult_avx  gcm_gmult_clmul  | 
354  |  | #   define gcm_ghash_avx  gcm_ghash_clmul  | 
355  |  | #  else  | 
356  |  | void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);  | 
357  |  | void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);  | 
358  |  | void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
359  |  |                    size_t len);  | 
360  |  | #  endif  | 
361  |  |  | 
362  |  | #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)  | 
363  |  | #   define GHASH_ASM_X86  | 
364  |  | void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);  | 
365  |  | void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
366  |  |                         size_t len);  | 
367  |  |  | 
368  |  | void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);  | 
369  |  | void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
370  |  |                         size_t len);  | 
371  |  | #  endif  | 
372  |  | # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)  | 
373  |  | #  include "arm_arch.h"  | 
374  |  | #  if __ARM_MAX_ARCH__>=7  | 
375  |  | #   define GHASH_ASM_ARM  | 
376  |  | #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)  | 
377  |  | #   if defined(__arm__) || defined(__arm)  | 
378  |  | #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)  | 
379  |  | #   endif  | 
380  |  | void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);  | 
381  |  | void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);  | 
382  |  | void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
383  |  |                     size_t len);  | 
384  |  | void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);  | 
385  |  | void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);  | 
386  |  | void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
387  |  |                   size_t len);  | 
388  |  | #  endif  | 
389  |  | # elif defined(__sparc__) || defined(__sparc)  | 
390  |  | #  include "crypto/sparc_arch.h"  | 
391  |  | #  define GHASH_ASM_SPARC  | 
392  |  | void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);  | 
393  |  | void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);  | 
394  |  | void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
395  |  |                     size_t len);  | 
396  |  | # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__POWERPC__) || defined(_ARCH_PPC))  | 
397  |  | #  include "crypto/ppc_arch.h"  | 
398  |  | #  define GHASH_ASM_PPC  | 
399  |  | void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);  | 
400  |  | void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);  | 
401  |  | void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,  | 
402  |  |                   size_t len);  | 
403  |  | # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64  | 
404  |  | #  include "crypto/riscv_arch.h"  | 
405  |  | #  define GHASH_ASM_RV64I  | 
406  |  | /* Zbc/Zbkc (scalar crypto with clmul) based routines. */  | 
407  |  | void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);  | 
408  |  | void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);  | 
409  |  | void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);  | 
410  |  | void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);  | 
411  |  | void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);  | 
412  |  | void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],  | 
413  |  |                          const u8 *inp, size_t len);  | 
414  |  | void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],  | 
415  |  |                                const u8 *inp, size_t len);  | 
416  |  | /* zvkb/Zvbc (vector crypto with vclmul) based routines. */  | 
417  |  | void gcm_init_rv64i_zvkb_zvbc(u128 Htable[16], const u64 Xi[2]);  | 
418  |  | void gcm_gmult_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16]);  | 
419  |  | void gcm_ghash_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16],  | 
420  |  |                                const u8 *inp, size_t len);  | 
421  |  | /* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */  | 
422  |  | void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);  | 
423  |  | void gcm_init_rv64i_zvkg_zvkb(u128 Htable[16], const u64 Xi[2]);  | 
424  |  | void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);  | 
425  |  | void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],  | 
426  |  |                           const u8 *inp, size_t len);  | 
427  |  | # endif  | 
428  |  | #endif  | 
429  |  |  | 
430  |  | static void gcm_get_funcs(struct gcm_funcs_st *ctx)  | 
431  | 0  | { | 
432  |  |     /* set defaults -- overridden below as needed */  | 
433  | 0  |     ctx->ginit = gcm_init_4bit;  | 
434  | 0  | #if !defined(GHASH_ASM)  | 
435  | 0  |     ctx->gmult = gcm_gmult_4bit;  | 
436  |  | #else  | 
437  |  |     ctx->gmult = NULL;  | 
438  |  | #endif  | 
439  | 0  | #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
440  | 0  |     ctx->ghash = gcm_ghash_4bit;  | 
441  |  | #else  | 
442  |  |     ctx->ghash = NULL;  | 
443  |  | #endif  | 
444  |  | 
  | 
445  |  | #if defined(GHASH_ASM_X86_OR_64)  | 
446  |  | # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)  | 
447  |  |     /* x86_64 */  | 
448  |  |     if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ | 
449  |  |         if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ | 
450  |  |             ctx->ginit = gcm_init_avx;  | 
451  |  |             ctx->gmult = gcm_gmult_avx;  | 
452  |  |             ctx->ghash = gcm_ghash_avx;  | 
453  |  |         } else { | 
454  |  |             ctx->ginit = gcm_init_clmul;  | 
455  |  |             ctx->gmult = gcm_gmult_clmul;  | 
456  |  |             ctx->ghash = gcm_ghash_clmul;  | 
457  |  |         }  | 
458  |  |         return;  | 
459  |  |     }  | 
460  |  | # endif  | 
461  |  | # if defined(GHASH_ASM_X86)  | 
462  |  |     /* x86 only */  | 
463  |  | #  if defined(OPENSSL_IA32_SSE2)  | 
464  |  |     if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ | 
465  |  |         ctx->gmult = gcm_gmult_4bit_mmx;  | 
466  |  |         ctx->ghash = gcm_ghash_4bit_mmx;  | 
467  |  |         return;  | 
468  |  |     }  | 
469  |  | #  else  | 
470  |  |     if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ | 
471  |  |         ctx->gmult = gcm_gmult_4bit_mmx;  | 
472  |  |         ctx->ghash = gcm_ghash_4bit_mmx;  | 
473  |  |         return;  | 
474  |  |     }  | 
475  |  | #  endif  | 
476  |  |     ctx->gmult = gcm_gmult_4bit_x86;  | 
477  |  |     ctx->ghash = gcm_ghash_4bit_x86;  | 
478  |  |     return;  | 
479  |  | # else  | 
480  |  |     /* x86_64 fallback defaults */  | 
481  |  |     ctx->gmult = gcm_gmult_4bit;  | 
482  |  |     ctx->ghash = gcm_ghash_4bit;  | 
483  |  |     return;  | 
484  |  | # endif  | 
485  |  | #elif defined(GHASH_ASM_ARM)  | 
486  |  |     /* ARM defaults */  | 
487  |  |     ctx->gmult = gcm_gmult_4bit;  | 
488  |  | # if !defined(OPENSSL_SMALL_FOOTPRINT)  | 
489  |  |     ctx->ghash = gcm_ghash_4bit;  | 
490  |  | # else  | 
491  |  |     ctx->ghash = NULL;  | 
492  |  | # endif  | 
493  |  | # ifdef PMULL_CAPABLE  | 
494  |  |     if (PMULL_CAPABLE) { | 
495  |  |         ctx->ginit = (gcm_init_fn)gcm_init_v8;  | 
496  |  |         ctx->gmult = gcm_gmult_v8;  | 
497  |  |         ctx->ghash = gcm_ghash_v8;  | 
498  |  |     }  | 
499  |  | # elif defined(NEON_CAPABLE)  | 
500  |  |     if (NEON_CAPABLE) { | 
501  |  |         ctx->ginit = gcm_init_neon;  | 
502  |  |         ctx->gmult = gcm_gmult_neon;  | 
503  |  |         ctx->ghash = gcm_ghash_neon;  | 
504  |  |     }  | 
505  |  | # endif  | 
506  |  |     return;  | 
507  |  | #elif defined(GHASH_ASM_SPARC)  | 
508  |  |     /* SPARC defaults */  | 
509  |  |     ctx->gmult = gcm_gmult_4bit;  | 
510  |  |     ctx->ghash = gcm_ghash_4bit;  | 
511  |  |     if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { | 
512  |  |         ctx->ginit = gcm_init_vis3;  | 
513  |  |         ctx->gmult = gcm_gmult_vis3;  | 
514  |  |         ctx->ghash = gcm_ghash_vis3;  | 
515  |  |     }  | 
516  |  |     return;  | 
517  |  | #elif defined(GHASH_ASM_PPC)  | 
518  |  |     /* PowerPC does not define GHASH_ASM; defaults set above */  | 
519  |  |     if (OPENSSL_ppccap_P & PPC_CRYPTO207) { | 
520  |  |         ctx->ginit = gcm_init_p8;  | 
521  |  |         ctx->gmult = gcm_gmult_p8;  | 
522  |  |         ctx->ghash = gcm_ghash_p8;  | 
523  |  |     }  | 
524  |  |     return;  | 
525  |  | #elif defined(GHASH_ASM_RV64I)  | 
526  |  |     /* RISCV defaults */  | 
527  |  |     ctx->gmult = gcm_gmult_4bit;  | 
528  |  |     ctx->ghash = gcm_ghash_4bit;  | 
529  |  |  | 
530  |  |     if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) { | 
531  |  |         if (RISCV_HAS_ZVKB())  | 
532  |  |             ctx->ginit = gcm_init_rv64i_zvkg_zvkb;  | 
533  |  |         else  | 
534  |  |             ctx->ginit = gcm_init_rv64i_zvkg;  | 
535  |  |         ctx->gmult = gcm_gmult_rv64i_zvkg;  | 
536  |  |         ctx->ghash = gcm_ghash_rv64i_zvkg;  | 
537  |  |     } else if (RISCV_HAS_ZVKB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) { | 
538  |  |         ctx->ginit = gcm_init_rv64i_zvkb_zvbc;  | 
539  |  |         ctx->gmult = gcm_gmult_rv64i_zvkb_zvbc;  | 
540  |  |         ctx->ghash = gcm_ghash_rv64i_zvkb_zvbc;  | 
541  |  |     } else if (RISCV_HAS_ZBC()) { | 
542  |  |         if (RISCV_HAS_ZBKB()) { | 
543  |  |             ctx->ginit = gcm_init_rv64i_zbc__zbkb;  | 
544  |  |             ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;  | 
545  |  |             ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;  | 
546  |  |         } else if (RISCV_HAS_ZBB()) { | 
547  |  |             ctx->ginit = gcm_init_rv64i_zbc__zbb;  | 
548  |  |             ctx->gmult = gcm_gmult_rv64i_zbc;  | 
549  |  |             ctx->ghash = gcm_ghash_rv64i_zbc;  | 
550  |  |         } else { | 
551  |  |             ctx->ginit = gcm_init_rv64i_zbc;  | 
552  |  |             ctx->gmult = gcm_gmult_rv64i_zbc;  | 
553  |  |             ctx->ghash = gcm_ghash_rv64i_zbc;  | 
554  |  |         }  | 
555  |  |     }  | 
556  |  |     return;  | 
557  |  | #elif defined(GHASH_ASM)  | 
558  |  |     /* all other architectures use the generic names */  | 
559  |  |     ctx->gmult = gcm_gmult_4bit;  | 
560  |  |     ctx->ghash = gcm_ghash_4bit;  | 
561  |  |     return;  | 
562  |  | #endif  | 
563  | 0  | }  | 
564  |  |  | 
565  |  | void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])  | 
566  | 0  | { | 
567  | 0  |     struct gcm_funcs_st funcs;  | 
568  |  | 
  | 
569  | 0  |     gcm_get_funcs(&funcs);  | 
570  | 0  |     funcs.ginit(Htable, H);  | 
571  | 0  | }  | 
572  |  |  | 
573  |  | void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])  | 
574  | 0  | { | 
575  | 0  |     struct gcm_funcs_st funcs;  | 
576  |  | 
  | 
577  | 0  |     gcm_get_funcs(&funcs);  | 
578  | 0  |     funcs.gmult(Xi, Htable);  | 
579  | 0  | }  | 
580  |  |  | 
581  |  | void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],  | 
582  |  |                          const u8 *inp, size_t len)  | 
583  | 0  | { | 
584  | 0  |     struct gcm_funcs_st funcs;  | 
585  | 0  |     u64 tmp[2];  | 
586  | 0  |     size_t i;  | 
587  |  | 
  | 
588  | 0  |     gcm_get_funcs(&funcs);  | 
589  | 0  |     if (funcs.ghash != NULL) { | 
590  | 0  |         funcs.ghash(Xi, Htable, inp, len);  | 
591  | 0  |     } else { | 
592  |  |         /* Emulate ghash if needed */  | 
593  | 0  |         for (i = 0; i < len; i += 16) { | 
594  | 0  |             memcpy(tmp, &inp[i], sizeof(tmp));  | 
595  | 0  |             Xi[0] ^= tmp[0];  | 
596  | 0  |             Xi[1] ^= tmp[1];  | 
597  | 0  |             funcs.gmult(Xi, Htable);  | 
598  | 0  |         }  | 
599  | 0  |     }  | 
600  | 0  | }  | 
601  |  |  | 
602  |  | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)  | 
603  | 0  | { | 
604  | 0  |     DECLARE_IS_ENDIAN;  | 
605  |  | 
  | 
606  | 0  |     memset(ctx, 0, sizeof(*ctx));  | 
607  | 0  |     ctx->block = block;  | 
608  | 0  |     ctx->key = key;  | 
609  |  | 
  | 
610  | 0  |     (*block) (ctx->H.c, ctx->H.c, key);  | 
611  |  | 
  | 
612  | 0  |     if (IS_LITTLE_ENDIAN) { | 
613  |  |         /* H is stored in host byte order */  | 
614  |  | #ifdef BSWAP8  | 
615  |  |         ctx->H.u[0] = BSWAP8(ctx->H.u[0]);  | 
616  |  |         ctx->H.u[1] = BSWAP8(ctx->H.u[1]);  | 
617  |  | #else  | 
618  | 0  |         u8 *p = ctx->H.c;  | 
619  | 0  |         u64 hi, lo;  | 
620  | 0  |         hi = (u64)GETU32(p) << 32 | GETU32(p + 4);  | 
621  | 0  |         lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);  | 
622  | 0  |         ctx->H.u[0] = hi;  | 
623  | 0  |         ctx->H.u[1] = lo;  | 
624  | 0  | #endif  | 
625  | 0  |     }  | 
626  |  | 
  | 
627  | 0  |     gcm_get_funcs(&ctx->funcs);  | 
628  | 0  |     ctx->funcs.ginit(ctx->Htable, ctx->H.u);  | 
629  | 0  | }  | 
630  |  |  | 
631  |  | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,  | 
632  |  |                          size_t len)  | 
633  | 0  | { | 
634  | 0  |     DECLARE_IS_ENDIAN;  | 
635  | 0  |     unsigned int ctr;  | 
636  |  | 
  | 
637  | 0  |     ctx->len.u[0] = 0;          /* AAD length */  | 
638  | 0  |     ctx->len.u[1] = 0;          /* message length */  | 
639  | 0  |     ctx->ares = 0;  | 
640  | 0  |     ctx->mres = 0;  | 
641  |  | 
  | 
642  | 0  |     if (len == 12) { | 
643  | 0  |         memcpy(ctx->Yi.c, iv, 12);  | 
644  | 0  |         ctx->Yi.c[12] = 0;  | 
645  | 0  |         ctx->Yi.c[13] = 0;  | 
646  | 0  |         ctx->Yi.c[14] = 0;  | 
647  | 0  |         ctx->Yi.c[15] = 1;  | 
648  | 0  |         ctr = 1;  | 
649  | 0  |     } else { | 
650  | 0  |         size_t i;  | 
651  | 0  |         u64 len0 = len;  | 
652  |  |  | 
653  |  |         /* Borrow ctx->Xi to calculate initial Yi */  | 
654  | 0  |         ctx->Xi.u[0] = 0;  | 
655  | 0  |         ctx->Xi.u[1] = 0;  | 
656  |  | 
  | 
657  | 0  |         while (len >= 16) { | 
658  | 0  |             for (i = 0; i < 16; ++i)  | 
659  | 0  |                 ctx->Xi.c[i] ^= iv[i];  | 
660  | 0  |             GCM_MUL(ctx);  | 
661  | 0  |             iv += 16;  | 
662  | 0  |             len -= 16;  | 
663  | 0  |         }  | 
664  | 0  |         if (len) { | 
665  | 0  |             for (i = 0; i < len; ++i)  | 
666  | 0  |                 ctx->Xi.c[i] ^= iv[i];  | 
667  | 0  |             GCM_MUL(ctx);  | 
668  | 0  |         }  | 
669  | 0  |         len0 <<= 3;  | 
670  | 0  |         if (IS_LITTLE_ENDIAN) { | 
671  |  | #ifdef BSWAP8  | 
672  |  |             ctx->Xi.u[1] ^= BSWAP8(len0);  | 
673  |  | #else  | 
674  | 0  |             ctx->Xi.c[8] ^= (u8)(len0 >> 56);  | 
675  | 0  |             ctx->Xi.c[9] ^= (u8)(len0 >> 48);  | 
676  | 0  |             ctx->Xi.c[10] ^= (u8)(len0 >> 40);  | 
677  | 0  |             ctx->Xi.c[11] ^= (u8)(len0 >> 32);  | 
678  | 0  |             ctx->Xi.c[12] ^= (u8)(len0 >> 24);  | 
679  | 0  |             ctx->Xi.c[13] ^= (u8)(len0 >> 16);  | 
680  | 0  |             ctx->Xi.c[14] ^= (u8)(len0 >> 8);  | 
681  | 0  |             ctx->Xi.c[15] ^= (u8)(len0);  | 
682  | 0  | #endif  | 
683  | 0  |         } else { | 
684  | 0  |             ctx->Xi.u[1] ^= len0;  | 
685  | 0  |         }  | 
686  |  | 
  | 
687  | 0  |         GCM_MUL(ctx);  | 
688  |  | 
  | 
689  | 0  |         if (IS_LITTLE_ENDIAN)  | 
690  |  | #ifdef BSWAP4  | 
691  |  |             ctr = BSWAP4(ctx->Xi.d[3]);  | 
692  |  | #else  | 
693  | 0  |             ctr = GETU32(ctx->Xi.c + 12);  | 
694  | 0  | #endif  | 
695  | 0  |         else  | 
696  | 0  |             ctr = ctx->Xi.d[3];  | 
697  |  |  | 
698  |  |         /* Copy borrowed Xi to Yi */  | 
699  | 0  |         ctx->Yi.u[0] = ctx->Xi.u[0];  | 
700  | 0  |         ctx->Yi.u[1] = ctx->Xi.u[1];  | 
701  | 0  |     }  | 
702  |  | 
  | 
703  | 0  |     ctx->Xi.u[0] = 0;  | 
704  | 0  |     ctx->Xi.u[1] = 0;  | 
705  |  | 
  | 
706  | 0  |     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);  | 
707  | 0  |     ++ctr;  | 
708  | 0  |     if (IS_LITTLE_ENDIAN)  | 
709  |  | #ifdef BSWAP4  | 
710  |  |         ctx->Yi.d[3] = BSWAP4(ctr);  | 
711  |  | #else  | 
712  | 0  |         PUTU32(ctx->Yi.c + 12, ctr);  | 
713  | 0  | #endif  | 
714  | 0  |     else  | 
715  | 0  |         ctx->Yi.d[3] = ctr;  | 
716  | 0  | }  | 
717  |  |  | 
718  |  | int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,  | 
719  |  |                       size_t len)  | 
720  | 0  | { | 
721  | 0  |     size_t i;  | 
722  | 0  |     unsigned int n;  | 
723  | 0  |     u64 alen = ctx->len.u[0];  | 
724  |  | 
  | 
725  | 0  |     if (ctx->len.u[1])  | 
726  | 0  |         return -2;  | 
727  |  |  | 
728  | 0  |     alen += len;  | 
729  | 0  |     if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))  | 
730  | 0  |         return -1;  | 
731  | 0  |     ctx->len.u[0] = alen;  | 
732  |  | 
  | 
733  | 0  |     n = ctx->ares;  | 
734  | 0  |     if (n) { | 
735  | 0  |         while (n && len) { | 
736  | 0  |             ctx->Xi.c[n] ^= *(aad++);  | 
737  | 0  |             --len;  | 
738  | 0  |             n = (n + 1) % 16;  | 
739  | 0  |         }  | 
740  | 0  |         if (n == 0)  | 
741  | 0  |             GCM_MUL(ctx);  | 
742  | 0  |         else { | 
743  | 0  |             ctx->ares = n;  | 
744  | 0  |             return 0;  | 
745  | 0  |         }  | 
746  | 0  |     }  | 
747  | 0  | #ifdef GHASH  | 
748  | 0  |     if ((i = (len & (size_t)-16))) { | 
749  | 0  |         GHASH(ctx, aad, i);  | 
750  | 0  |         aad += i;  | 
751  | 0  |         len -= i;  | 
752  | 0  |     }  | 
753  |  | #else  | 
754  |  |     while (len >= 16) { | 
755  |  |         for (i = 0; i < 16; ++i)  | 
756  |  |             ctx->Xi.c[i] ^= aad[i];  | 
757  |  |         GCM_MUL(ctx);  | 
758  |  |         aad += 16;  | 
759  |  |         len -= 16;  | 
760  |  |     }  | 
761  |  | #endif  | 
762  | 0  |     if (len) { | 
763  | 0  |         n = (unsigned int)len;  | 
764  | 0  |         for (i = 0; i < len; ++i)  | 
765  | 0  |             ctx->Xi.c[i] ^= aad[i];  | 
766  | 0  |     }  | 
767  |  | 
  | 
768  | 0  |     ctx->ares = n;  | 
769  | 0  |     return 0;  | 
770  | 0  | }  | 
771  |  |  | 
772  |  | int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,  | 
773  |  |                           const unsigned char *in, unsigned char *out,  | 
774  |  |                           size_t len)  | 
775  | 0  | { | 
776  | 0  |     DECLARE_IS_ENDIAN;  | 
777  | 0  |     unsigned int n, ctr, mres;  | 
778  | 0  |     size_t i;  | 
779  | 0  |     u64 mlen = ctx->len.u[1];  | 
780  | 0  |     block128_f block = ctx->block;  | 
781  | 0  |     void *key = ctx->key;  | 
782  |  | 
  | 
783  | 0  |     mlen += len;  | 
784  | 0  |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))  | 
785  | 0  |         return -1;  | 
786  | 0  |     ctx->len.u[1] = mlen;  | 
787  |  | 
  | 
788  | 0  |     mres = ctx->mres;  | 
789  |  | 
  | 
790  | 0  |     if (ctx->ares) { | 
791  |  |         /* First call to encrypt finalizes GHASH(AAD) */  | 
792  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
793  | 0  |         if (len == 0) { | 
794  | 0  |             GCM_MUL(ctx);  | 
795  | 0  |             ctx->ares = 0;  | 
796  | 0  |             return 0;  | 
797  | 0  |         }  | 
798  | 0  |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));  | 
799  | 0  |         ctx->Xi.u[0] = 0;  | 
800  | 0  |         ctx->Xi.u[1] = 0;  | 
801  | 0  |         mres = sizeof(ctx->Xi);  | 
802  |  | #else  | 
803  |  |         GCM_MUL(ctx);  | 
804  |  | #endif  | 
805  | 0  |         ctx->ares = 0;  | 
806  | 0  |     }  | 
807  |  |  | 
808  | 0  |     if (IS_LITTLE_ENDIAN)  | 
809  |  | #ifdef BSWAP4  | 
810  |  |         ctr = BSWAP4(ctx->Yi.d[3]);  | 
811  |  | #else  | 
812  | 0  |         ctr = GETU32(ctx->Yi.c + 12);  | 
813  | 0  | #endif  | 
814  | 0  |     else  | 
815  | 0  |         ctr = ctx->Yi.d[3];  | 
816  |  | 
  | 
817  | 0  |     n = mres % 16;  | 
818  | 0  | #if !defined(OPENSSL_SMALL_FOOTPRINT)  | 
819  | 0  |     if (16 % sizeof(size_t) == 0) { /* always true actually */ | 
820  | 0  |         do { | 
821  | 0  |             if (n) { | 
822  | 0  | # if defined(GHASH)  | 
823  | 0  |                 while (n && len) { | 
824  | 0  |                     ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];  | 
825  | 0  |                     --len;  | 
826  | 0  |                     n = (n + 1) % 16;  | 
827  | 0  |                 }  | 
828  | 0  |                 if (n == 0) { | 
829  | 0  |                     GHASH(ctx, ctx->Xn, mres);  | 
830  | 0  |                     mres = 0;  | 
831  | 0  |                 } else { | 
832  | 0  |                     ctx->mres = mres;  | 
833  | 0  |                     return 0;  | 
834  | 0  |                 }  | 
835  |  | # else  | 
836  |  |                 while (n && len) { | 
837  |  |                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];  | 
838  |  |                     --len;  | 
839  |  |                     n = (n + 1) % 16;  | 
840  |  |                 }  | 
841  |  |                 if (n == 0) { | 
842  |  |                     GCM_MUL(ctx);  | 
843  |  |                     mres = 0;  | 
844  |  |                 } else { | 
845  |  |                     ctx->mres = n;  | 
846  |  |                     return 0;  | 
847  |  |                 }  | 
848  |  | # endif  | 
849  | 0  |             }  | 
850  |  | # if defined(STRICT_ALIGNMENT)  | 
851  |  |             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)  | 
852  |  |                 break;  | 
853  |  | # endif  | 
854  | 0  | # if defined(GHASH)  | 
855  | 0  |             if (len >= 16 && mres) { | 
856  | 0  |                 GHASH(ctx, ctx->Xn, mres);  | 
857  | 0  |                 mres = 0;  | 
858  | 0  |             }  | 
859  | 0  | #  if defined(GHASH_CHUNK)  | 
860  | 0  |             while (len >= GHASH_CHUNK) { | 
861  | 0  |                 size_t j = GHASH_CHUNK;  | 
862  |  | 
  | 
863  | 0  |                 while (j) { | 
864  | 0  |                     size_t_aX *out_t = (size_t_aX *)out;  | 
865  | 0  |                     const size_t_aX *in_t = (const size_t_aX *)in;  | 
866  |  | 
  | 
867  | 0  |                     (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
868  | 0  |                     ++ctr;  | 
869  | 0  |                     if (IS_LITTLE_ENDIAN)  | 
870  |  | #   ifdef BSWAP4  | 
871  |  |                         ctx->Yi.d[3] = BSWAP4(ctr);  | 
872  |  | #   else  | 
873  | 0  |                         PUTU32(ctx->Yi.c + 12, ctr);  | 
874  | 0  | #   endif  | 
875  | 0  |                     else  | 
876  | 0  |                         ctx->Yi.d[3] = ctr;  | 
877  | 0  |                     for (i = 0; i < 16 / sizeof(size_t); ++i)  | 
878  | 0  |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];  | 
879  | 0  |                     out += 16;  | 
880  | 0  |                     in += 16;  | 
881  | 0  |                     j -= 16;  | 
882  | 0  |                 }  | 
883  | 0  |                 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);  | 
884  | 0  |                 len -= GHASH_CHUNK;  | 
885  | 0  |             }  | 
886  | 0  | #  endif  | 
887  | 0  |             if ((i = (len & (size_t)-16))) { | 
888  | 0  |                 size_t j = i;  | 
889  |  | 
  | 
890  | 0  |                 while (len >= 16) { | 
891  | 0  |                     size_t_aX *out_t = (size_t_aX *)out;  | 
892  | 0  |                     const size_t_aX *in_t = (const size_t_aX *)in;  | 
893  |  | 
  | 
894  | 0  |                     (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
895  | 0  |                     ++ctr;  | 
896  | 0  |                     if (IS_LITTLE_ENDIAN)  | 
897  |  | #  ifdef BSWAP4  | 
898  |  |                         ctx->Yi.d[3] = BSWAP4(ctr);  | 
899  |  | #  else  | 
900  | 0  |                         PUTU32(ctx->Yi.c + 12, ctr);  | 
901  | 0  | #  endif  | 
902  | 0  |                     else  | 
903  | 0  |                         ctx->Yi.d[3] = ctr;  | 
904  | 0  |                     for (i = 0; i < 16 / sizeof(size_t); ++i)  | 
905  | 0  |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];  | 
906  | 0  |                     out += 16;  | 
907  | 0  |                     in += 16;  | 
908  | 0  |                     len -= 16;  | 
909  | 0  |                 }  | 
910  | 0  |                 GHASH(ctx, out - j, j);  | 
911  | 0  |             }  | 
912  |  | # else  | 
913  |  |             while (len >= 16) { | 
914  |  |                 size_t *out_t = (size_t *)out;  | 
915  |  |                 const size_t *in_t = (const size_t *)in;  | 
916  |  |  | 
917  |  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
918  |  |                 ++ctr;  | 
919  |  |                 if (IS_LITTLE_ENDIAN)  | 
920  |  | #  ifdef BSWAP4  | 
921  |  |                     ctx->Yi.d[3] = BSWAP4(ctr);  | 
922  |  | #  else  | 
923  |  |                     PUTU32(ctx->Yi.c + 12, ctr);  | 
924  |  | #  endif  | 
925  |  |                 else  | 
926  |  |                     ctx->Yi.d[3] = ctr;  | 
927  |  |                 for (i = 0; i < 16 / sizeof(size_t); ++i)  | 
928  |  |                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];  | 
929  |  |                 GCM_MUL(ctx);  | 
930  |  |                 out += 16;  | 
931  |  |                 in += 16;  | 
932  |  |                 len -= 16;  | 
933  |  |             }  | 
934  |  | # endif  | 
935  | 0  |             if (len) { | 
936  | 0  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
937  | 0  |                 ++ctr;  | 
938  | 0  |                 if (IS_LITTLE_ENDIAN)  | 
939  |  | # ifdef BSWAP4  | 
940  |  |                     ctx->Yi.d[3] = BSWAP4(ctr);  | 
941  |  | # else  | 
942  | 0  |                     PUTU32(ctx->Yi.c + 12, ctr);  | 
943  | 0  | # endif  | 
944  | 0  |                 else  | 
945  | 0  |                     ctx->Yi.d[3] = ctr;  | 
946  | 0  | # if defined(GHASH)  | 
947  | 0  |                 while (len--) { | 
948  | 0  |                     ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];  | 
949  | 0  |                     ++n;  | 
950  | 0  |                 }  | 
951  |  | # else  | 
952  |  |                 while (len--) { | 
953  |  |                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];  | 
954  |  |                     ++n;  | 
955  |  |                 }  | 
956  |  |                 mres = n;  | 
957  |  | # endif  | 
958  | 0  |             }  | 
959  |  | 
  | 
960  | 0  |             ctx->mres = mres;  | 
961  | 0  |             return 0;  | 
962  | 0  |         } while (0);  | 
963  | 0  |     }  | 
964  | 0  | #endif  | 
965  | 0  |     for (i = 0; i < len; ++i) { | 
966  | 0  |         if (n == 0) { | 
967  | 0  |             (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
968  | 0  |             ++ctr;  | 
969  | 0  |             if (IS_LITTLE_ENDIAN)  | 
970  |  | #ifdef BSWAP4  | 
971  |  |                 ctx->Yi.d[3] = BSWAP4(ctr);  | 
972  |  | #else  | 
973  | 0  |                 PUTU32(ctx->Yi.c + 12, ctr);  | 
974  | 0  | #endif  | 
975  | 0  |             else  | 
976  | 0  |                 ctx->Yi.d[3] = ctr;  | 
977  | 0  |         }  | 
978  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
979  | 0  |         ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];  | 
980  | 0  |         n = (n + 1) % 16;  | 
981  | 0  |         if (mres == sizeof(ctx->Xn)) { | 
982  | 0  |             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));  | 
983  | 0  |             mres = 0;  | 
984  | 0  |         }  | 
985  |  | #else  | 
986  |  |         ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];  | 
987  |  |         mres = n = (n + 1) % 16;  | 
988  |  |         if (n == 0)  | 
989  |  |             GCM_MUL(ctx);  | 
990  |  | #endif  | 
991  | 0  |     }  | 
992  |  | 
  | 
993  | 0  |     ctx->mres = mres;  | 
994  | 0  |     return 0;  | 
995  | 0  | }  | 
996  |  |  | 
997  |  | int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,  | 
998  |  |                           const unsigned char *in, unsigned char *out,  | 
999  |  |                           size_t len)  | 
1000  | 0  | { | 
1001  | 0  |     DECLARE_IS_ENDIAN;  | 
1002  | 0  |     unsigned int n, ctr, mres;  | 
1003  | 0  |     size_t i;  | 
1004  | 0  |     u64 mlen = ctx->len.u[1];  | 
1005  | 0  |     block128_f block = ctx->block;  | 
1006  | 0  |     void *key = ctx->key;  | 
1007  |  | 
  | 
1008  | 0  |     mlen += len;  | 
1009  | 0  |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))  | 
1010  | 0  |         return -1;  | 
1011  | 0  |     ctx->len.u[1] = mlen;  | 
1012  |  | 
  | 
1013  | 0  |     mres = ctx->mres;  | 
1014  |  | 
  | 
1015  | 0  |     if (ctx->ares) { | 
1016  |  |         /* First call to decrypt finalizes GHASH(AAD) */  | 
1017  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
1018  | 0  |         if (len == 0) { | 
1019  | 0  |             GCM_MUL(ctx);  | 
1020  | 0  |             ctx->ares = 0;  | 
1021  | 0  |             return 0;  | 
1022  | 0  |         }  | 
1023  | 0  |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));  | 
1024  | 0  |         ctx->Xi.u[0] = 0;  | 
1025  | 0  |         ctx->Xi.u[1] = 0;  | 
1026  | 0  |         mres = sizeof(ctx->Xi);  | 
1027  |  | #else  | 
1028  |  |         GCM_MUL(ctx);  | 
1029  |  | #endif  | 
1030  | 0  |         ctx->ares = 0;  | 
1031  | 0  |     }  | 
1032  |  |  | 
1033  | 0  |     if (IS_LITTLE_ENDIAN)  | 
1034  |  | #ifdef BSWAP4  | 
1035  |  |         ctr = BSWAP4(ctx->Yi.d[3]);  | 
1036  |  | #else  | 
1037  | 0  |         ctr = GETU32(ctx->Yi.c + 12);  | 
1038  | 0  | #endif  | 
1039  | 0  |     else  | 
1040  | 0  |         ctr = ctx->Yi.d[3];  | 
1041  |  | 
  | 
1042  | 0  |     n = mres % 16;  | 
1043  | 0  | #if !defined(OPENSSL_SMALL_FOOTPRINT)  | 
1044  | 0  |     if (16 % sizeof(size_t) == 0) { /* always true actually */ | 
1045  | 0  |         do { | 
1046  | 0  |             if (n) { | 
1047  | 0  | # if defined(GHASH)  | 
1048  | 0  |                 while (n && len) { | 
1049  | 0  |                     *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];  | 
1050  | 0  |                     --len;  | 
1051  | 0  |                     n = (n + 1) % 16;  | 
1052  | 0  |                 }  | 
1053  | 0  |                 if (n == 0) { | 
1054  | 0  |                     GHASH(ctx, ctx->Xn, mres);  | 
1055  | 0  |                     mres = 0;  | 
1056  | 0  |                 } else { | 
1057  | 0  |                     ctx->mres = mres;  | 
1058  | 0  |                     return 0;  | 
1059  | 0  |                 }  | 
1060  |  | # else  | 
1061  |  |                 while (n && len) { | 
1062  |  |                     u8 c = *(in++);  | 
1063  |  |                     *(out++) = c ^ ctx->EKi.c[n];  | 
1064  |  |                     ctx->Xi.c[n] ^= c;  | 
1065  |  |                     --len;  | 
1066  |  |                     n = (n + 1) % 16;  | 
1067  |  |                 }  | 
1068  |  |                 if (n == 0) { | 
1069  |  |                     GCM_MUL(ctx);  | 
1070  |  |                     mres = 0;  | 
1071  |  |                 } else { | 
1072  |  |                     ctx->mres = n;  | 
1073  |  |                     return 0;  | 
1074  |  |                 }  | 
1075  |  | # endif  | 
1076  | 0  |             }  | 
1077  |  | # if defined(STRICT_ALIGNMENT)  | 
1078  |  |             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)  | 
1079  |  |                 break;  | 
1080  |  | # endif  | 
1081  | 0  | # if defined(GHASH)  | 
1082  | 0  |             if (len >= 16 && mres) { | 
1083  | 0  |                 GHASH(ctx, ctx->Xn, mres);  | 
1084  | 0  |                 mres = 0;  | 
1085  | 0  |             }  | 
1086  | 0  | #  if defined(GHASH_CHUNK)  | 
1087  | 0  |             while (len >= GHASH_CHUNK) { | 
1088  | 0  |                 size_t j = GHASH_CHUNK;  | 
1089  |  | 
  | 
1090  | 0  |                 GHASH(ctx, in, GHASH_CHUNK);  | 
1091  | 0  |                 while (j) { | 
1092  | 0  |                     size_t_aX *out_t = (size_t_aX *)out;  | 
1093  | 0  |                     const size_t_aX *in_t = (const size_t_aX *)in;  | 
1094  |  | 
  | 
1095  | 0  |                     (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1096  | 0  |                     ++ctr;  | 
1097  | 0  |                     if (IS_LITTLE_ENDIAN)  | 
1098  |  | #   ifdef BSWAP4  | 
1099  |  |                         ctx->Yi.d[3] = BSWAP4(ctr);  | 
1100  |  | #   else  | 
1101  | 0  |                         PUTU32(ctx->Yi.c + 12, ctr);  | 
1102  | 0  | #   endif  | 
1103  | 0  |                     else  | 
1104  | 0  |                         ctx->Yi.d[3] = ctr;  | 
1105  | 0  |                     for (i = 0; i < 16 / sizeof(size_t); ++i)  | 
1106  | 0  |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];  | 
1107  | 0  |                     out += 16;  | 
1108  | 0  |                     in += 16;  | 
1109  | 0  |                     j -= 16;  | 
1110  | 0  |                 }  | 
1111  | 0  |                 len -= GHASH_CHUNK;  | 
1112  | 0  |             }  | 
1113  | 0  | #  endif  | 
1114  | 0  |             if ((i = (len & (size_t)-16))) { | 
1115  | 0  |                 GHASH(ctx, in, i);  | 
1116  | 0  |                 while (len >= 16) { | 
1117  | 0  |                     size_t_aX *out_t = (size_t_aX *)out;  | 
1118  | 0  |                     const size_t_aX *in_t = (const size_t_aX *)in;  | 
1119  |  | 
  | 
1120  | 0  |                     (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1121  | 0  |                     ++ctr;  | 
1122  | 0  |                     if (IS_LITTLE_ENDIAN)  | 
1123  |  | #  ifdef BSWAP4  | 
1124  |  |                         ctx->Yi.d[3] = BSWAP4(ctr);  | 
1125  |  | #  else  | 
1126  | 0  |                         PUTU32(ctx->Yi.c + 12, ctr);  | 
1127  | 0  | #  endif  | 
1128  | 0  |                     else  | 
1129  | 0  |                         ctx->Yi.d[3] = ctr;  | 
1130  | 0  |                     for (i = 0; i < 16 / sizeof(size_t); ++i)  | 
1131  | 0  |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];  | 
1132  | 0  |                     out += 16;  | 
1133  | 0  |                     in += 16;  | 
1134  | 0  |                     len -= 16;  | 
1135  | 0  |                 }  | 
1136  | 0  |             }  | 
1137  |  | # else  | 
1138  |  |             while (len >= 16) { | 
1139  |  |                 size_t *out_t = (size_t *)out;  | 
1140  |  |                 const size_t *in_t = (const size_t *)in;  | 
1141  |  |  | 
1142  |  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1143  |  |                 ++ctr;  | 
1144  |  |                 if (IS_LITTLE_ENDIAN)  | 
1145  |  | #  ifdef BSWAP4  | 
1146  |  |                     ctx->Yi.d[3] = BSWAP4(ctr);  | 
1147  |  | #  else  | 
1148  |  |                     PUTU32(ctx->Yi.c + 12, ctr);  | 
1149  |  | #  endif  | 
1150  |  |                 else  | 
1151  |  |                     ctx->Yi.d[3] = ctr;  | 
1152  |  |                 for (i = 0; i < 16 / sizeof(size_t); ++i) { | 
1153  |  |                     size_t c = in_t[i];  | 
1154  |  |                     out_t[i] = c ^ ctx->EKi.t[i];  | 
1155  |  |                     ctx->Xi.t[i] ^= c;  | 
1156  |  |                 }  | 
1157  |  |                 GCM_MUL(ctx);  | 
1158  |  |                 out += 16;  | 
1159  |  |                 in += 16;  | 
1160  |  |                 len -= 16;  | 
1161  |  |             }  | 
1162  |  | # endif  | 
1163  | 0  |             if (len) { | 
1164  | 0  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1165  | 0  |                 ++ctr;  | 
1166  | 0  |                 if (IS_LITTLE_ENDIAN)  | 
1167  |  | # ifdef BSWAP4  | 
1168  |  |                     ctx->Yi.d[3] = BSWAP4(ctr);  | 
1169  |  | # else  | 
1170  | 0  |                     PUTU32(ctx->Yi.c + 12, ctr);  | 
1171  | 0  | # endif  | 
1172  | 0  |                 else  | 
1173  | 0  |                     ctx->Yi.d[3] = ctr;  | 
1174  | 0  | # if defined(GHASH)  | 
1175  | 0  |                 while (len--) { | 
1176  | 0  |                     out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];  | 
1177  | 0  |                     ++n;  | 
1178  | 0  |                 }  | 
1179  |  | # else  | 
1180  |  |                 while (len--) { | 
1181  |  |                     u8 c = in[n];  | 
1182  |  |                     ctx->Xi.c[n] ^= c;  | 
1183  |  |                     out[n] = c ^ ctx->EKi.c[n];  | 
1184  |  |                     ++n;  | 
1185  |  |                 }  | 
1186  |  |                 mres = n;  | 
1187  |  | # endif  | 
1188  | 0  |             }  | 
1189  |  | 
  | 
1190  | 0  |             ctx->mres = mres;  | 
1191  | 0  |             return 0;  | 
1192  | 0  |         } while (0);  | 
1193  | 0  |     }  | 
1194  | 0  | #endif  | 
1195  | 0  |     for (i = 0; i < len; ++i) { | 
1196  | 0  |         u8 c;  | 
1197  | 0  |         if (n == 0) { | 
1198  | 0  |             (*block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1199  | 0  |             ++ctr;  | 
1200  | 0  |             if (IS_LITTLE_ENDIAN)  | 
1201  |  | #ifdef BSWAP4  | 
1202  |  |                 ctx->Yi.d[3] = BSWAP4(ctr);  | 
1203  |  | #else  | 
1204  | 0  |                 PUTU32(ctx->Yi.c + 12, ctr);  | 
1205  | 0  | #endif  | 
1206  | 0  |             else  | 
1207  | 0  |                 ctx->Yi.d[3] = ctr;  | 
1208  | 0  |         }  | 
1209  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
1210  | 0  |         out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];  | 
1211  | 0  |         n = (n + 1) % 16;  | 
1212  | 0  |         if (mres == sizeof(ctx->Xn)) { | 
1213  | 0  |             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));  | 
1214  | 0  |             mres = 0;  | 
1215  | 0  |         }  | 
1216  |  | #else  | 
1217  |  |         c = in[i];  | 
1218  |  |         out[i] = c ^ ctx->EKi.c[n];  | 
1219  |  |         ctx->Xi.c[n] ^= c;  | 
1220  |  |         mres = n = (n + 1) % 16;  | 
1221  |  |         if (n == 0)  | 
1222  |  |             GCM_MUL(ctx);  | 
1223  |  | #endif  | 
1224  | 0  |     }  | 
1225  |  | 
  | 
1226  | 0  |     ctx->mres = mres;  | 
1227  | 0  |     return 0;  | 
1228  | 0  | }  | 
1229  |  |  | 
1230  |  | int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,  | 
1231  |  |                                 const unsigned char *in, unsigned char *out,  | 
1232  |  |                                 size_t len, ctr128_f stream)  | 
1233  | 0  | { | 
1234  |  | #if defined(OPENSSL_SMALL_FOOTPRINT)  | 
1235  |  |     return CRYPTO_gcm128_encrypt(ctx, in, out, len);  | 
1236  |  | #else  | 
1237  | 0  |     DECLARE_IS_ENDIAN;  | 
1238  | 0  |     unsigned int n, ctr, mres;  | 
1239  | 0  |     size_t i;  | 
1240  | 0  |     u64 mlen = ctx->len.u[1];  | 
1241  | 0  |     void *key = ctx->key;  | 
1242  |  | 
  | 
1243  | 0  |     mlen += len;  | 
1244  | 0  |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))  | 
1245  | 0  |         return -1;  | 
1246  | 0  |     ctx->len.u[1] = mlen;  | 
1247  |  | 
  | 
1248  | 0  |     mres = ctx->mres;  | 
1249  |  | 
  | 
1250  | 0  |     if (ctx->ares) { | 
1251  |  |         /* First call to encrypt finalizes GHASH(AAD) */  | 
1252  | 0  | #if defined(GHASH)  | 
1253  | 0  |         if (len == 0) { | 
1254  | 0  |             GCM_MUL(ctx);  | 
1255  | 0  |             ctx->ares = 0;  | 
1256  | 0  |             return 0;  | 
1257  | 0  |         }  | 
1258  | 0  |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));  | 
1259  | 0  |         ctx->Xi.u[0] = 0;  | 
1260  | 0  |         ctx->Xi.u[1] = 0;  | 
1261  | 0  |         mres = sizeof(ctx->Xi);  | 
1262  |  | #else  | 
1263  |  |         GCM_MUL(ctx);  | 
1264  |  | #endif  | 
1265  | 0  |         ctx->ares = 0;  | 
1266  | 0  |     }  | 
1267  |  |  | 
1268  | 0  |     if (IS_LITTLE_ENDIAN)  | 
1269  |  | # ifdef BSWAP4  | 
1270  |  |         ctr = BSWAP4(ctx->Yi.d[3]);  | 
1271  |  | # else  | 
1272  | 0  |         ctr = GETU32(ctx->Yi.c + 12);  | 
1273  | 0  | # endif  | 
1274  | 0  |     else  | 
1275  | 0  |         ctr = ctx->Yi.d[3];  | 
1276  |  | 
  | 
1277  | 0  |     n = mres % 16;  | 
1278  | 0  |     if (n) { | 
1279  | 0  | # if defined(GHASH)  | 
1280  | 0  |         while (n && len) { | 
1281  | 0  |             ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];  | 
1282  | 0  |             --len;  | 
1283  | 0  |             n = (n + 1) % 16;  | 
1284  | 0  |         }  | 
1285  | 0  |         if (n == 0) { | 
1286  | 0  |             GHASH(ctx, ctx->Xn, mres);  | 
1287  | 0  |             mres = 0;  | 
1288  | 0  |         } else { | 
1289  | 0  |             ctx->mres = mres;  | 
1290  | 0  |             return 0;  | 
1291  | 0  |         }  | 
1292  |  | # else  | 
1293  |  |         while (n && len) { | 
1294  |  |             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];  | 
1295  |  |             --len;  | 
1296  |  |             n = (n + 1) % 16;  | 
1297  |  |         }  | 
1298  |  |         if (n == 0) { | 
1299  |  |             GCM_MUL(ctx);  | 
1300  |  |             mres = 0;  | 
1301  |  |         } else { | 
1302  |  |             ctx->mres = n;  | 
1303  |  |             return 0;  | 
1304  |  |         }  | 
1305  |  | # endif  | 
1306  | 0  |     }  | 
1307  | 0  | # if defined(GHASH)  | 
1308  | 0  |         if (len >= 16 && mres) { | 
1309  | 0  |             GHASH(ctx, ctx->Xn, mres);  | 
1310  | 0  |             mres = 0;  | 
1311  | 0  |         }  | 
1312  | 0  | #  if defined(GHASH_CHUNK)  | 
1313  | 0  |     while (len >= GHASH_CHUNK) { | 
1314  | 0  |         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);  | 
1315  | 0  |         ctr += GHASH_CHUNK / 16;  | 
1316  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1317  |  | #   ifdef BSWAP4  | 
1318  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1319  |  | #   else  | 
1320  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1321  | 0  | #   endif  | 
1322  | 0  |         else  | 
1323  | 0  |             ctx->Yi.d[3] = ctr;  | 
1324  | 0  |         GHASH(ctx, out, GHASH_CHUNK);  | 
1325  | 0  |         out += GHASH_CHUNK;  | 
1326  | 0  |         in += GHASH_CHUNK;  | 
1327  | 0  |         len -= GHASH_CHUNK;  | 
1328  | 0  |     }  | 
1329  | 0  | #  endif  | 
1330  | 0  | # endif  | 
1331  | 0  |     if ((i = (len & (size_t)-16))) { | 
1332  | 0  |         size_t j = i / 16;  | 
1333  |  | 
  | 
1334  | 0  |         (*stream) (in, out, j, key, ctx->Yi.c);  | 
1335  | 0  |         ctr += (unsigned int)j;  | 
1336  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1337  |  | # ifdef BSWAP4  | 
1338  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1339  |  | # else  | 
1340  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1341  | 0  | # endif  | 
1342  | 0  |         else  | 
1343  | 0  |             ctx->Yi.d[3] = ctr;  | 
1344  | 0  |         in += i;  | 
1345  | 0  |         len -= i;  | 
1346  | 0  | # if defined(GHASH)  | 
1347  | 0  |         GHASH(ctx, out, i);  | 
1348  | 0  |         out += i;  | 
1349  |  | # else  | 
1350  |  |         while (j--) { | 
1351  |  |             for (i = 0; i < 16; ++i)  | 
1352  |  |                 ctx->Xi.c[i] ^= out[i];  | 
1353  |  |             GCM_MUL(ctx);  | 
1354  |  |             out += 16;  | 
1355  |  |         }  | 
1356  |  | # endif  | 
1357  | 0  |     }  | 
1358  | 0  |     if (len) { | 
1359  | 0  |         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1360  | 0  |         ++ctr;  | 
1361  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1362  |  | # ifdef BSWAP4  | 
1363  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1364  |  | # else  | 
1365  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1366  | 0  | # endif  | 
1367  | 0  |         else  | 
1368  | 0  |             ctx->Yi.d[3] = ctr;  | 
1369  | 0  |         while (len--) { | 
1370  | 0  | # if defined(GHASH)  | 
1371  | 0  |             ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];  | 
1372  |  | # else  | 
1373  |  |             ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];  | 
1374  |  | # endif  | 
1375  | 0  |             ++n;  | 
1376  | 0  |         }  | 
1377  | 0  |     }  | 
1378  |  | 
  | 
1379  | 0  |     ctx->mres = mres;  | 
1380  | 0  |     return 0;  | 
1381  | 0  | #endif  | 
1382  | 0  | }  | 
1383  |  |  | 
1384  |  | int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,  | 
1385  |  |                                 const unsigned char *in, unsigned char *out,  | 
1386  |  |                                 size_t len, ctr128_f stream)  | 
1387  | 0  | { | 
1388  |  | #if defined(OPENSSL_SMALL_FOOTPRINT)  | 
1389  |  |     return CRYPTO_gcm128_decrypt(ctx, in, out, len);  | 
1390  |  | #else  | 
1391  | 0  |     DECLARE_IS_ENDIAN;  | 
1392  | 0  |     unsigned int n, ctr, mres;  | 
1393  | 0  |     size_t i;  | 
1394  | 0  |     u64 mlen = ctx->len.u[1];  | 
1395  | 0  |     void *key = ctx->key;  | 
1396  |  | 
  | 
1397  | 0  |     mlen += len;  | 
1398  | 0  |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))  | 
1399  | 0  |         return -1;  | 
1400  | 0  |     ctx->len.u[1] = mlen;  | 
1401  |  | 
  | 
1402  | 0  |     mres = ctx->mres;  | 
1403  |  | 
  | 
1404  | 0  |     if (ctx->ares) { | 
1405  |  |         /* First call to decrypt finalizes GHASH(AAD) */  | 
1406  | 0  | # if defined(GHASH)  | 
1407  | 0  |         if (len == 0) { | 
1408  | 0  |             GCM_MUL(ctx);  | 
1409  | 0  |             ctx->ares = 0;  | 
1410  | 0  |             return 0;  | 
1411  | 0  |         }  | 
1412  | 0  |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));  | 
1413  | 0  |         ctx->Xi.u[0] = 0;  | 
1414  | 0  |         ctx->Xi.u[1] = 0;  | 
1415  | 0  |         mres = sizeof(ctx->Xi);  | 
1416  |  | # else  | 
1417  |  |         GCM_MUL(ctx);  | 
1418  |  | # endif  | 
1419  | 0  |         ctx->ares = 0;  | 
1420  | 0  |     }  | 
1421  |  |  | 
1422  | 0  |     if (IS_LITTLE_ENDIAN)  | 
1423  |  | # ifdef BSWAP4  | 
1424  |  |         ctr = BSWAP4(ctx->Yi.d[3]);  | 
1425  |  | # else  | 
1426  | 0  |         ctr = GETU32(ctx->Yi.c + 12);  | 
1427  | 0  | # endif  | 
1428  | 0  |     else  | 
1429  | 0  |         ctr = ctx->Yi.d[3];  | 
1430  |  | 
  | 
1431  | 0  |     n = mres % 16;  | 
1432  | 0  |     if (n) { | 
1433  | 0  | # if defined(GHASH)  | 
1434  | 0  |         while (n && len) { | 
1435  | 0  |             *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];  | 
1436  | 0  |             --len;  | 
1437  | 0  |             n = (n + 1) % 16;  | 
1438  | 0  |         }  | 
1439  | 0  |         if (n == 0) { | 
1440  | 0  |             GHASH(ctx, ctx->Xn, mres);  | 
1441  | 0  |             mres = 0;  | 
1442  | 0  |         } else { | 
1443  | 0  |             ctx->mres = mres;  | 
1444  | 0  |             return 0;  | 
1445  | 0  |         }  | 
1446  |  | # else  | 
1447  |  |         while (n && len) { | 
1448  |  |             u8 c = *(in++);  | 
1449  |  |             *(out++) = c ^ ctx->EKi.c[n];  | 
1450  |  |             ctx->Xi.c[n] ^= c;  | 
1451  |  |             --len;  | 
1452  |  |             n = (n + 1) % 16;  | 
1453  |  |         }  | 
1454  |  |         if (n == 0) { | 
1455  |  |             GCM_MUL(ctx);  | 
1456  |  |             mres = 0;  | 
1457  |  |         } else { | 
1458  |  |             ctx->mres = n;  | 
1459  |  |             return 0;  | 
1460  |  |         }  | 
1461  |  | # endif  | 
1462  | 0  |     }  | 
1463  | 0  | # if defined(GHASH)  | 
1464  | 0  |     if (len >= 16 && mres) { | 
1465  | 0  |         GHASH(ctx, ctx->Xn, mres);  | 
1466  | 0  |         mres = 0;  | 
1467  | 0  |     }  | 
1468  | 0  | #  if defined(GHASH_CHUNK)  | 
1469  | 0  |     while (len >= GHASH_CHUNK) { | 
1470  | 0  |         GHASH(ctx, in, GHASH_CHUNK);  | 
1471  | 0  |         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);  | 
1472  | 0  |         ctr += GHASH_CHUNK / 16;  | 
1473  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1474  |  | #   ifdef BSWAP4  | 
1475  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1476  |  | #   else  | 
1477  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1478  | 0  | #   endif  | 
1479  | 0  |         else  | 
1480  | 0  |             ctx->Yi.d[3] = ctr;  | 
1481  | 0  |         out += GHASH_CHUNK;  | 
1482  | 0  |         in += GHASH_CHUNK;  | 
1483  | 0  |         len -= GHASH_CHUNK;  | 
1484  | 0  |     }  | 
1485  | 0  | #  endif  | 
1486  | 0  | # endif  | 
1487  | 0  |     if ((i = (len & (size_t)-16))) { | 
1488  | 0  |         size_t j = i / 16;  | 
1489  |  | 
  | 
1490  | 0  | # if defined(GHASH)  | 
1491  | 0  |         GHASH(ctx, in, i);  | 
1492  |  | # else  | 
1493  |  |         while (j--) { | 
1494  |  |             size_t k;  | 
1495  |  |             for (k = 0; k < 16; ++k)  | 
1496  |  |                 ctx->Xi.c[k] ^= in[k];  | 
1497  |  |             GCM_MUL(ctx);  | 
1498  |  |             in += 16;  | 
1499  |  |         }  | 
1500  |  |         j = i / 16;  | 
1501  |  |         in -= i;  | 
1502  |  | # endif  | 
1503  | 0  |         (*stream) (in, out, j, key, ctx->Yi.c);  | 
1504  | 0  |         ctr += (unsigned int)j;  | 
1505  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1506  |  | # ifdef BSWAP4  | 
1507  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1508  |  | # else  | 
1509  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1510  | 0  | # endif  | 
1511  | 0  |         else  | 
1512  | 0  |             ctx->Yi.d[3] = ctr;  | 
1513  | 0  |         out += i;  | 
1514  | 0  |         in += i;  | 
1515  | 0  |         len -= i;  | 
1516  | 0  |     }  | 
1517  | 0  |     if (len) { | 
1518  | 0  |         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);  | 
1519  | 0  |         ++ctr;  | 
1520  | 0  |         if (IS_LITTLE_ENDIAN)  | 
1521  |  | # ifdef BSWAP4  | 
1522  |  |             ctx->Yi.d[3] = BSWAP4(ctr);  | 
1523  |  | # else  | 
1524  | 0  |             PUTU32(ctx->Yi.c + 12, ctr);  | 
1525  | 0  | # endif  | 
1526  | 0  |         else  | 
1527  | 0  |             ctx->Yi.d[3] = ctr;  | 
1528  | 0  |         while (len--) { | 
1529  | 0  | # if defined(GHASH)  | 
1530  | 0  |             out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];  | 
1531  |  | # else  | 
1532  |  |             u8 c = in[n];  | 
1533  |  |             ctx->Xi.c[mres++] ^= c;  | 
1534  |  |             out[n] = c ^ ctx->EKi.c[n];  | 
1535  |  | # endif  | 
1536  | 0  |             ++n;  | 
1537  | 0  |         }  | 
1538  | 0  |     }  | 
1539  |  | 
  | 
1540  | 0  |     ctx->mres = mres;  | 
1541  | 0  |     return 0;  | 
1542  | 0  | #endif  | 
1543  | 0  | }  | 
1544  |  |  | 
1545  |  | int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,  | 
1546  |  |                          size_t len)  | 
1547  | 0  | { | 
1548  | 0  |     DECLARE_IS_ENDIAN;  | 
1549  | 0  |     u64 alen = ctx->len.u[0] << 3;  | 
1550  | 0  |     u64 clen = ctx->len.u[1] << 3;  | 
1551  |  | 
  | 
1552  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
1553  | 0  |     u128 bitlen;  | 
1554  | 0  |     unsigned int mres = ctx->mres;  | 
1555  |  | 
  | 
1556  | 0  |     if (mres) { | 
1557  | 0  |         unsigned blocks = (mres + 15) & -16;  | 
1558  |  | 
  | 
1559  | 0  |         memset(ctx->Xn + mres, 0, blocks - mres);  | 
1560  | 0  |         mres = blocks;  | 
1561  | 0  |         if (mres == sizeof(ctx->Xn)) { | 
1562  | 0  |             GHASH(ctx, ctx->Xn, mres);  | 
1563  | 0  |             mres = 0;  | 
1564  | 0  |         }  | 
1565  | 0  |     } else if (ctx->ares) { | 
1566  | 0  |         GCM_MUL(ctx);  | 
1567  | 0  |     }  | 
1568  |  | #else  | 
1569  |  |     if (ctx->mres || ctx->ares)  | 
1570  |  |         GCM_MUL(ctx);  | 
1571  |  | #endif  | 
1572  |  | 
  | 
1573  | 0  |     if (IS_LITTLE_ENDIAN) { | 
1574  |  | #ifdef BSWAP8  | 
1575  |  |         alen = BSWAP8(alen);  | 
1576  |  |         clen = BSWAP8(clen);  | 
1577  |  | #else  | 
1578  | 0  |         u8 *p = ctx->len.c;  | 
1579  |  | 
  | 
1580  | 0  |         ctx->len.u[0] = alen;  | 
1581  | 0  |         ctx->len.u[1] = clen;  | 
1582  |  | 
  | 
1583  | 0  |         alen = (u64)GETU32(p) << 32 | GETU32(p + 4);  | 
1584  | 0  |         clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);  | 
1585  | 0  | #endif  | 
1586  | 0  |     }  | 
1587  |  | 
  | 
1588  | 0  | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)  | 
1589  | 0  |     bitlen.hi = alen;  | 
1590  | 0  |     bitlen.lo = clen;  | 
1591  | 0  |     memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));  | 
1592  | 0  |     mres += sizeof(bitlen);  | 
1593  | 0  |     GHASH(ctx, ctx->Xn, mres);  | 
1594  |  | #else  | 
1595  |  |     ctx->Xi.u[0] ^= alen;  | 
1596  |  |     ctx->Xi.u[1] ^= clen;  | 
1597  |  |     GCM_MUL(ctx);  | 
1598  |  | #endif  | 
1599  |  | 
  | 
1600  | 0  |     ctx->Xi.u[0] ^= ctx->EK0.u[0];  | 
1601  | 0  |     ctx->Xi.u[1] ^= ctx->EK0.u[1];  | 
1602  |  | 
  | 
1603  | 0  |     if (tag && len <= sizeof(ctx->Xi))  | 
1604  | 0  |         return CRYPTO_memcmp(ctx->Xi.c, tag, len);  | 
1605  | 0  |     else  | 
1606  | 0  |         return -1;  | 
1607  | 0  | }  | 
1608  |  |  | 
1609  |  | void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)  | 
1610  | 0  | { | 
1611  | 0  |     CRYPTO_gcm128_finish(ctx, NULL, 0);  | 
1612  | 0  |     memcpy(tag, ctx->Xi.c,  | 
1613  | 0  |            len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));  | 
1614  | 0  | }  | 
1615  |  |  | 
1616  |  | GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)  | 
1617  | 0  | { | 
1618  | 0  |     GCM128_CONTEXT *ret;  | 
1619  |  | 
  | 
1620  | 0  |     if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)  | 
1621  | 0  |         CRYPTO_gcm128_init(ret, key, block);  | 
1622  |  | 
  | 
1623  | 0  |     return ret;  | 
1624  | 0  | }  | 
1625  |  |  | 
1626  |  | void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)  | 
1627  | 0  | { | 
1628  | 0  |     OPENSSL_clear_free(ctx, sizeof(*ctx));  | 
1629  | 0  | }  |