/src/openssl111/crypto/modes/gcm128.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved. | 
| 3 |  |  * | 
| 4 |  |  * Licensed under the OpenSSL license (the "License").  You may not use | 
| 5 |  |  * this file except in compliance with the License.  You can obtain a copy | 
| 6 |  |  * in the file LICENSE in the source distribution or at | 
| 7 |  |  * https://www.openssl.org/source/license.html | 
| 8 |  |  */ | 
| 9 |  |  | 
| 10 |  | #include <openssl/crypto.h> | 
| 11 |  | #include "modes_local.h" | 
| 12 |  | #include <string.h> | 
| 13 |  |  | 
| 14 |  | #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) | 
| 15 |  | typedef size_t size_t_aX __attribute((__aligned__(1))); | 
| 16 |  | #else | 
| 17 |  | typedef size_t size_t_aX; | 
| 18 |  | #endif | 
| 19 |  |  | 
| 20 |  | #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) | 
| 21 |  | /* redefine, because alignment is ensured */ | 
| 22 |  | # undef  GETU32 | 
| 23 |  | # define GETU32(p)       BSWAP4(*(const u32 *)(p)) | 
| 24 |  | # undef  PUTU32 | 
| 25 |  | # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v) | 
| 26 |  | #endif | 
| 27 |  |  | 
| 28 |  | #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16)) | 
| 29 | 0 | #define REDUCE1BIT(V)   do { \ | 
| 30 | 0 |         if (sizeof(size_t)==8) { \ | 
| 31 | 0 |                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ | 
| 32 | 0 |                 V.lo  = (V.hi<<63)|(V.lo>>1); \ | 
| 33 | 0 |                 V.hi  = (V.hi>>1 )^T; \ | 
| 34 | 0 |         } \ | 
| 35 | 0 |         else { \ | 
| 36 | 0 |                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ | 
| 37 | 0 |                 V.lo  = (V.hi<<63)|(V.lo>>1); \ | 
| 38 | 0 |                 V.hi  = (V.hi>>1 )^((u64)T<<32); \ | 
| 39 | 0 |         } \ | 
| 40 | 0 | } while(0) | 
| 41 |  |  | 
| 42 |  | /*- | 
| 43 |  |  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should | 
| 44 |  |  * never be set to 8. 8 is effectively reserved for testing purposes. | 
| 45 |  |  * TABLE_BITS>1 are lookup-table-driven implementations referred to as | 
| 46 |  |  * "Shoup's" in GCM specification. In other words OpenSSL does not cover | 
| 47 |  |  * whole spectrum of possible table driven implementations. Why? In | 
| 48 |  |  * non-"Shoup's" case memory access pattern is segmented in such manner, | 
| 49 |  |  * that it's trivial to see that cache timing information can reveal | 
| 50 |  |  * fair portion of intermediate hash value. Given that ciphertext is | 
| 51 |  |  * always available to attacker, it's possible for him to attempt to | 
| 52 |  |  * deduce secret parameter H and if successful, tamper with messages | 
| 53 |  |  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's | 
| 54 |  |  * not as trivial, but there is no reason to believe that it's resistant | 
| 55 |  |  * to cache-timing attack. And the thing about "8-bit" implementation is | 
| 56 |  |  * that it consumes 16 (sixteen) times more memory, 4KB per individual | 
| 57 |  |  * key + 1KB shared. Well, on pros side it should be twice as fast as | 
| 58 |  |  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version | 
| 59 |  |  * was observed to run ~75% faster, closer to 100% for commercial | 
| 60 |  |  * compilers... Yet "4-bit" procedure is preferred, because it's | 
| 61 |  |  * believed to provide better security-performance balance and adequate | 
| 62 |  |  * all-round performance. "All-round" refers to things like: | 
| 63 |  |  * | 
| 64 |  |  * - shorter setup time effectively improves overall timing for | 
| 65 |  |  *   handling short messages; | 
| 66 |  |  * - larger table allocation can become unbearable because of VM | 
| 67 |  |  *   subsystem penalties (for example on Windows large enough free | 
| 68 |  |  *   results in VM working set trimming, meaning that consequent | 
| 69 |  |  *   malloc would immediately incur working set expansion); | 
| 70 |  |  * - larger table has larger cache footprint, which can affect | 
| 71 |  |  *   performance of other code paths (not necessarily even from same | 
| 72 |  |  *   thread in Hyper-Threading world); | 
| 73 |  |  * | 
| 74 |  |  * Value of 1 is not appropriate for performance reasons. | 
| 75 |  |  */ | 
| 76 |  | #if     TABLE_BITS==8 | 
| 77 |  |  | 
| 78 |  | static void gcm_init_8bit(u128 Htable[256], u64 H[2]) | 
| 79 |  | { | 
| 80 |  |     int i, j; | 
| 81 |  |     u128 V; | 
| 82 |  |  | 
| 83 |  |     Htable[0].hi = 0; | 
| 84 |  |     Htable[0].lo = 0; | 
| 85 |  |     V.hi = H[0]; | 
| 86 |  |     V.lo = H[1]; | 
| 87 |  |  | 
| 88 |  |     for (Htable[128] = V, i = 64; i > 0; i >>= 1) { | 
| 89 |  |         REDUCE1BIT(V); | 
| 90 |  |         Htable[i] = V; | 
| 91 |  |     } | 
| 92 |  |  | 
| 93 |  |     for (i = 2; i < 256; i <<= 1) { | 
| 94 |  |         u128 *Hi = Htable + i, H0 = *Hi; | 
| 95 |  |         for (j = 1; j < i; ++j) { | 
| 96 |  |             Hi[j].hi = H0.hi ^ Htable[j].hi; | 
| 97 |  |             Hi[j].lo = H0.lo ^ Htable[j].lo; | 
| 98 |  |         } | 
| 99 |  |     } | 
| 100 |  | } | 
| 101 |  |  | 
| 102 |  | static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) | 
| 103 |  | { | 
| 104 |  |     u128 Z = { 0, 0 }; | 
| 105 |  |     const u8 *xi = (const u8 *)Xi + 15; | 
| 106 |  |     size_t rem, n = *xi; | 
| 107 |  |     const union { | 
| 108 |  |         long one; | 
| 109 |  |         char little; | 
| 110 |  |     } is_endian = { 1 }; | 
| 111 |  |     static const size_t rem_8bit[256] = { | 
| 112 |  |         PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), | 
| 113 |  |         PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), | 
| 114 |  |         PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), | 
| 115 |  |         PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), | 
| 116 |  |         PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), | 
| 117 |  |         PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), | 
| 118 |  |         PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), | 
| 119 |  |         PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), | 
| 120 |  |         PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), | 
| 121 |  |         PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), | 
| 122 |  |         PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), | 
| 123 |  |         PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), | 
| 124 |  |         PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), | 
| 125 |  |         PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), | 
| 126 |  |         PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), | 
| 127 |  |         PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), | 
| 128 |  |         PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), | 
| 129 |  |         PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), | 
| 130 |  |         PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), | 
| 131 |  |         PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), | 
| 132 |  |         PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), | 
| 133 |  |         PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), | 
| 134 |  |         PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), | 
| 135 |  |         PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), | 
| 136 |  |         PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), | 
| 137 |  |         PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), | 
| 138 |  |         PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), | 
| 139 |  |         PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), | 
| 140 |  |         PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), | 
| 141 |  |         PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), | 
| 142 |  |         PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), | 
| 143 |  |         PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), | 
| 144 |  |         PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), | 
| 145 |  |         PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), | 
| 146 |  |         PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), | 
| 147 |  |         PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), | 
| 148 |  |         PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), | 
| 149 |  |         PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), | 
| 150 |  |         PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), | 
| 151 |  |         PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), | 
| 152 |  |         PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), | 
| 153 |  |         PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), | 
| 154 |  |         PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), | 
| 155 |  |         PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), | 
| 156 |  |         PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), | 
| 157 |  |         PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), | 
| 158 |  |         PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), | 
| 159 |  |         PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), | 
| 160 |  |         PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), | 
| 161 |  |         PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), | 
| 162 |  |         PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), | 
| 163 |  |         PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), | 
| 164 |  |         PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), | 
| 165 |  |         PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), | 
| 166 |  |         PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), | 
| 167 |  |         PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), | 
| 168 |  |         PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), | 
| 169 |  |         PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), | 
| 170 |  |         PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), | 
| 171 |  |         PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), | 
| 172 |  |         PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), | 
| 173 |  |         PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), | 
| 174 |  |         PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), | 
| 175 |  |         PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) | 
| 176 |  |     }; | 
| 177 |  |  | 
| 178 |  |     while (1) { | 
| 179 |  |         Z.hi ^= Htable[n].hi; | 
| 180 |  |         Z.lo ^= Htable[n].lo; | 
| 181 |  |  | 
| 182 |  |         if ((u8 *)Xi == xi) | 
| 183 |  |             break; | 
| 184 |  |  | 
| 185 |  |         n = *(--xi); | 
| 186 |  |  | 
| 187 |  |         rem = (size_t)Z.lo & 0xff; | 
| 188 |  |         Z.lo = (Z.hi << 56) | (Z.lo >> 8); | 
| 189 |  |         Z.hi = (Z.hi >> 8); | 
| 190 |  |         if (sizeof(size_t) == 8) | 
| 191 |  |             Z.hi ^= rem_8bit[rem]; | 
| 192 |  |         else | 
| 193 |  |             Z.hi ^= (u64)rem_8bit[rem] << 32; | 
| 194 |  |     } | 
| 195 |  |  | 
| 196 |  |     if (is_endian.little) { | 
| 197 |  | # ifdef BSWAP8 | 
| 198 |  |         Xi[0] = BSWAP8(Z.hi); | 
| 199 |  |         Xi[1] = BSWAP8(Z.lo); | 
| 200 |  | # else | 
| 201 |  |         u8 *p = (u8 *)Xi; | 
| 202 |  |         u32 v; | 
| 203 |  |         v = (u32)(Z.hi >> 32); | 
| 204 |  |         PUTU32(p, v); | 
| 205 |  |         v = (u32)(Z.hi); | 
| 206 |  |         PUTU32(p + 4, v); | 
| 207 |  |         v = (u32)(Z.lo >> 32); | 
| 208 |  |         PUTU32(p + 8, v); | 
| 209 |  |         v = (u32)(Z.lo); | 
| 210 |  |         PUTU32(p + 12, v); | 
| 211 |  | # endif | 
| 212 |  |     } else { | 
| 213 |  |         Xi[0] = Z.hi; | 
| 214 |  |         Xi[1] = Z.lo; | 
| 215 |  |     } | 
| 216 |  | } | 
| 217 |  |  | 
| 218 |  | # define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) | 
| 219 |  |  | 
| 220 |  | #elif   TABLE_BITS==4 | 
| 221 |  |  | 
| 222 |  | static void gcm_init_4bit(u128 Htable[16], u64 H[2]) | 
| 223 | 0 | { | 
| 224 | 0 |     u128 V; | 
| 225 |  | # if defined(OPENSSL_SMALL_FOOTPRINT) | 
| 226 |  |     int i; | 
| 227 |  | # endif | 
| 228 |  | 
 | 
| 229 | 0 |     Htable[0].hi = 0; | 
| 230 | 0 |     Htable[0].lo = 0; | 
| 231 | 0 |     V.hi = H[0]; | 
| 232 | 0 |     V.lo = H[1]; | 
| 233 |  | 
 | 
| 234 |  | # if defined(OPENSSL_SMALL_FOOTPRINT) | 
| 235 |  |     for (Htable[8] = V, i = 4; i > 0; i >>= 1) { | 
| 236 |  |         REDUCE1BIT(V); | 
| 237 |  |         Htable[i] = V; | 
| 238 |  |     } | 
| 239 |  |  | 
| 240 |  |     for (i = 2; i < 16; i <<= 1) { | 
| 241 |  |         u128 *Hi = Htable + i; | 
| 242 |  |         int j; | 
| 243 |  |         for (V = *Hi, j = 1; j < i; ++j) { | 
| 244 |  |             Hi[j].hi = V.hi ^ Htable[j].hi; | 
| 245 |  |             Hi[j].lo = V.lo ^ Htable[j].lo; | 
| 246 |  |         } | 
| 247 |  |     } | 
| 248 |  | # else | 
| 249 | 0 |     Htable[8] = V; | 
| 250 | 0 |     REDUCE1BIT(V); | 
| 251 | 0 |     Htable[4] = V; | 
| 252 | 0 |     REDUCE1BIT(V); | 
| 253 | 0 |     Htable[2] = V; | 
| 254 | 0 |     REDUCE1BIT(V); | 
| 255 | 0 |     Htable[1] = V; | 
| 256 | 0 |     Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; | 
| 257 | 0 |     V = Htable[4]; | 
| 258 | 0 |     Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; | 
| 259 | 0 |     Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; | 
| 260 | 0 |     Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; | 
| 261 | 0 |     V = Htable[8]; | 
| 262 | 0 |     Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; | 
| 263 | 0 |     Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; | 
| 264 | 0 |     Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; | 
| 265 | 0 |     Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; | 
| 266 | 0 |     Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; | 
| 267 | 0 |     Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; | 
| 268 | 0 |     Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; | 
| 269 | 0 | # endif | 
| 270 |  | # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) | 
| 271 |  |     /* | 
| 272 |  |      * ARM assembler expects specific dword order in Htable. | 
| 273 |  |      */ | 
| 274 |  |     { | 
| 275 |  |         int j; | 
| 276 |  |         const union { | 
| 277 |  |             long one; | 
| 278 |  |             char little; | 
| 279 |  |         } is_endian = { 1 }; | 
| 280 |  |  | 
| 281 |  |         if (is_endian.little) | 
| 282 |  |             for (j = 0; j < 16; ++j) { | 
| 283 |  |                 V = Htable[j]; | 
| 284 |  |                 Htable[j].hi = V.lo; | 
| 285 |  |                 Htable[j].lo = V.hi; | 
| 286 |  |         } else | 
| 287 |  |             for (j = 0; j < 16; ++j) { | 
| 288 |  |                 V = Htable[j]; | 
| 289 |  |                 Htable[j].hi = V.lo << 32 | V.lo >> 32; | 
| 290 |  |                 Htable[j].lo = V.hi << 32 | V.hi >> 32; | 
| 291 |  |             } | 
| 292 |  |     } | 
| 293 |  | # endif | 
| 294 | 0 | } | 
| 295 |  |  | 
| 296 |  | # ifndef GHASH_ASM | 
| 297 |  | static const size_t rem_4bit[16] = { | 
| 298 |  |     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), | 
| 299 |  |     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), | 
| 300 |  |     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), | 
| 301 |  |     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) | 
| 302 |  | }; | 
| 303 |  |  | 
| 304 |  | static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) | 
| 305 |  | { | 
| 306 |  |     u128 Z; | 
| 307 |  |     int cnt = 15; | 
| 308 |  |     size_t rem, nlo, nhi; | 
| 309 |  |     const union { | 
| 310 |  |         long one; | 
| 311 |  |         char little; | 
| 312 |  |     } is_endian = { 1 }; | 
| 313 |  |  | 
| 314 |  |     nlo = ((const u8 *)Xi)[15]; | 
| 315 |  |     nhi = nlo >> 4; | 
| 316 |  |     nlo &= 0xf; | 
| 317 |  |  | 
| 318 |  |     Z.hi = Htable[nlo].hi; | 
| 319 |  |     Z.lo = Htable[nlo].lo; | 
| 320 |  |  | 
| 321 |  |     while (1) { | 
| 322 |  |         rem = (size_t)Z.lo & 0xf; | 
| 323 |  |         Z.lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 324 |  |         Z.hi = (Z.hi >> 4); | 
| 325 |  |         if (sizeof(size_t) == 8) | 
| 326 |  |             Z.hi ^= rem_4bit[rem]; | 
| 327 |  |         else | 
| 328 |  |             Z.hi ^= (u64)rem_4bit[rem] << 32; | 
| 329 |  |  | 
| 330 |  |         Z.hi ^= Htable[nhi].hi; | 
| 331 |  |         Z.lo ^= Htable[nhi].lo; | 
| 332 |  |  | 
| 333 |  |         if (--cnt < 0) | 
| 334 |  |             break; | 
| 335 |  |  | 
| 336 |  |         nlo = ((const u8 *)Xi)[cnt]; | 
| 337 |  |         nhi = nlo >> 4; | 
| 338 |  |         nlo &= 0xf; | 
| 339 |  |  | 
| 340 |  |         rem = (size_t)Z.lo & 0xf; | 
| 341 |  |         Z.lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 342 |  |         Z.hi = (Z.hi >> 4); | 
| 343 |  |         if (sizeof(size_t) == 8) | 
| 344 |  |             Z.hi ^= rem_4bit[rem]; | 
| 345 |  |         else | 
| 346 |  |             Z.hi ^= (u64)rem_4bit[rem] << 32; | 
| 347 |  |  | 
| 348 |  |         Z.hi ^= Htable[nlo].hi; | 
| 349 |  |         Z.lo ^= Htable[nlo].lo; | 
| 350 |  |     } | 
| 351 |  |  | 
| 352 |  |     if (is_endian.little) { | 
| 353 |  | #  ifdef BSWAP8 | 
| 354 |  |         Xi[0] = BSWAP8(Z.hi); | 
| 355 |  |         Xi[1] = BSWAP8(Z.lo); | 
| 356 |  | #  else | 
| 357 |  |         u8 *p = (u8 *)Xi; | 
| 358 |  |         u32 v; | 
| 359 |  |         v = (u32)(Z.hi >> 32); | 
| 360 |  |         PUTU32(p, v); | 
| 361 |  |         v = (u32)(Z.hi); | 
| 362 |  |         PUTU32(p + 4, v); | 
| 363 |  |         v = (u32)(Z.lo >> 32); | 
| 364 |  |         PUTU32(p + 8, v); | 
| 365 |  |         v = (u32)(Z.lo); | 
| 366 |  |         PUTU32(p + 12, v); | 
| 367 |  | #  endif | 
| 368 |  |     } else { | 
| 369 |  |         Xi[0] = Z.hi; | 
| 370 |  |         Xi[1] = Z.lo; | 
| 371 |  |     } | 
| 372 |  | } | 
| 373 |  |  | 
| 374 |  | #  if !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 375 |  | /* | 
| 376 |  |  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for | 
| 377 |  |  * details... Compiler-generated code doesn't seem to give any | 
| 378 |  |  * performance improvement, at least not on x86[_64]. It's here | 
| 379 |  |  * mostly as reference and a placeholder for possible future | 
| 380 |  |  * non-trivial optimization[s]... | 
| 381 |  |  */ | 
| 382 |  | static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], | 
| 383 |  |                            const u8 *inp, size_t len) | 
| 384 |  | { | 
| 385 |  |     u128 Z; | 
| 386 |  |     int cnt; | 
| 387 |  |     size_t rem, nlo, nhi; | 
| 388 |  |     const union { | 
| 389 |  |         long one; | 
| 390 |  |         char little; | 
| 391 |  |     } is_endian = { 1 }; | 
| 392 |  |  | 
| 393 |  | #   if 1 | 
| 394 |  |     do { | 
| 395 |  |         cnt = 15; | 
| 396 |  |         nlo = ((const u8 *)Xi)[15]; | 
| 397 |  |         nlo ^= inp[15]; | 
| 398 |  |         nhi = nlo >> 4; | 
| 399 |  |         nlo &= 0xf; | 
| 400 |  |  | 
| 401 |  |         Z.hi = Htable[nlo].hi; | 
| 402 |  |         Z.lo = Htable[nlo].lo; | 
| 403 |  |  | 
| 404 |  |         while (1) { | 
| 405 |  |             rem = (size_t)Z.lo & 0xf; | 
| 406 |  |             Z.lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 407 |  |             Z.hi = (Z.hi >> 4); | 
| 408 |  |             if (sizeof(size_t) == 8) | 
| 409 |  |                 Z.hi ^= rem_4bit[rem]; | 
| 410 |  |             else | 
| 411 |  |                 Z.hi ^= (u64)rem_4bit[rem] << 32; | 
| 412 |  |  | 
| 413 |  |             Z.hi ^= Htable[nhi].hi; | 
| 414 |  |             Z.lo ^= Htable[nhi].lo; | 
| 415 |  |  | 
| 416 |  |             if (--cnt < 0) | 
| 417 |  |                 break; | 
| 418 |  |  | 
| 419 |  |             nlo = ((const u8 *)Xi)[cnt]; | 
| 420 |  |             nlo ^= inp[cnt]; | 
| 421 |  |             nhi = nlo >> 4; | 
| 422 |  |             nlo &= 0xf; | 
| 423 |  |  | 
| 424 |  |             rem = (size_t)Z.lo & 0xf; | 
| 425 |  |             Z.lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 426 |  |             Z.hi = (Z.hi >> 4); | 
| 427 |  |             if (sizeof(size_t) == 8) | 
| 428 |  |                 Z.hi ^= rem_4bit[rem]; | 
| 429 |  |             else | 
| 430 |  |                 Z.hi ^= (u64)rem_4bit[rem] << 32; | 
| 431 |  |  | 
| 432 |  |             Z.hi ^= Htable[nlo].hi; | 
| 433 |  |             Z.lo ^= Htable[nlo].lo; | 
| 434 |  |         } | 
| 435 |  | #   else | 
| 436 |  |     /* | 
| 437 |  |      * Extra 256+16 bytes per-key plus 512 bytes shared tables | 
| 438 |  |      * [should] give ~50% improvement... One could have PACK()-ed | 
| 439 |  |      * the rem_8bit even here, but the priority is to minimize | 
| 440 |  |      * cache footprint... | 
| 441 |  |      */ | 
| 442 |  |     u128 Hshr4[16];             /* Htable shifted right by 4 bits */ | 
| 443 |  |     u8 Hshl4[16];               /* Htable shifted left by 4 bits */ | 
| 444 |  |     static const unsigned short rem_8bit[256] = { | 
| 445 |  |         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, | 
| 446 |  |         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, | 
| 447 |  |         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, | 
| 448 |  |         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, | 
| 449 |  |         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, | 
| 450 |  |         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, | 
| 451 |  |         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, | 
| 452 |  |         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, | 
| 453 |  |         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, | 
| 454 |  |         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, | 
| 455 |  |         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, | 
| 456 |  |         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, | 
| 457 |  |         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, | 
| 458 |  |         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, | 
| 459 |  |         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, | 
| 460 |  |         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, | 
| 461 |  |         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, | 
| 462 |  |         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, | 
| 463 |  |         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, | 
| 464 |  |         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, | 
| 465 |  |         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, | 
| 466 |  |         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, | 
| 467 |  |         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, | 
| 468 |  |         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, | 
| 469 |  |         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, | 
| 470 |  |         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, | 
| 471 |  |         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, | 
| 472 |  |         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, | 
| 473 |  |         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, | 
| 474 |  |         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, | 
| 475 |  |         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, | 
| 476 |  |         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE | 
| 477 |  |     }; | 
| 478 |  |     /* | 
| 479 |  |      * This pre-processing phase slows down procedure by approximately | 
| 480 |  |      * same time as it makes each loop spin faster. In other words | 
| 481 |  |      * single block performance is approximately same as straightforward | 
| 482 |  |      * "4-bit" implementation, and then it goes only faster... | 
| 483 |  |      */ | 
| 484 |  |     for (cnt = 0; cnt < 16; ++cnt) { | 
| 485 |  |         Z.hi = Htable[cnt].hi; | 
| 486 |  |         Z.lo = Htable[cnt].lo; | 
| 487 |  |         Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 488 |  |         Hshr4[cnt].hi = (Z.hi >> 4); | 
| 489 |  |         Hshl4[cnt] = (u8)(Z.lo << 4); | 
| 490 |  |     } | 
| 491 |  |  | 
| 492 |  |     do { | 
| 493 |  |         for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { | 
| 494 |  |             nlo = ((const u8 *)Xi)[cnt]; | 
| 495 |  |             nlo ^= inp[cnt]; | 
| 496 |  |             nhi = nlo >> 4; | 
| 497 |  |             nlo &= 0xf; | 
| 498 |  |  | 
| 499 |  |             Z.hi ^= Htable[nlo].hi; | 
| 500 |  |             Z.lo ^= Htable[nlo].lo; | 
| 501 |  |  | 
| 502 |  |             rem = (size_t)Z.lo & 0xff; | 
| 503 |  |  | 
| 504 |  |             Z.lo = (Z.hi << 56) | (Z.lo >> 8); | 
| 505 |  |             Z.hi = (Z.hi >> 8); | 
| 506 |  |  | 
| 507 |  |             Z.hi ^= Hshr4[nhi].hi; | 
| 508 |  |             Z.lo ^= Hshr4[nhi].lo; | 
| 509 |  |             Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; | 
| 510 |  |         } | 
| 511 |  |  | 
| 512 |  |         nlo = ((const u8 *)Xi)[0]; | 
| 513 |  |         nlo ^= inp[0]; | 
| 514 |  |         nhi = nlo >> 4; | 
| 515 |  |         nlo &= 0xf; | 
| 516 |  |  | 
| 517 |  |         Z.hi ^= Htable[nlo].hi; | 
| 518 |  |         Z.lo ^= Htable[nlo].lo; | 
| 519 |  |  | 
| 520 |  |         rem = (size_t)Z.lo & 0xf; | 
| 521 |  |  | 
| 522 |  |         Z.lo = (Z.hi << 60) | (Z.lo >> 4); | 
| 523 |  |         Z.hi = (Z.hi >> 4); | 
| 524 |  |  | 
| 525 |  |         Z.hi ^= Htable[nhi].hi; | 
| 526 |  |         Z.lo ^= Htable[nhi].lo; | 
| 527 |  |         Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; | 
| 528 |  | #   endif | 
| 529 |  |  | 
| 530 |  |         if (is_endian.little) { | 
| 531 |  | #   ifdef BSWAP8 | 
| 532 |  |             Xi[0] = BSWAP8(Z.hi); | 
| 533 |  |             Xi[1] = BSWAP8(Z.lo); | 
| 534 |  | #   else | 
| 535 |  |             u8 *p = (u8 *)Xi; | 
| 536 |  |             u32 v; | 
| 537 |  |             v = (u32)(Z.hi >> 32); | 
| 538 |  |             PUTU32(p, v); | 
| 539 |  |             v = (u32)(Z.hi); | 
| 540 |  |             PUTU32(p + 4, v); | 
| 541 |  |             v = (u32)(Z.lo >> 32); | 
| 542 |  |             PUTU32(p + 8, v); | 
| 543 |  |             v = (u32)(Z.lo); | 
| 544 |  |             PUTU32(p + 12, v); | 
| 545 |  | #   endif | 
| 546 |  |         } else { | 
| 547 |  |             Xi[0] = Z.hi; | 
| 548 |  |             Xi[1] = Z.lo; | 
| 549 |  |         } | 
| 550 |  |     } while (inp += 16, len -= 16); | 
| 551 |  | } | 
| 552 |  | #  endif | 
| 553 |  | # else | 
| 554 |  | void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); | 
| 555 |  | void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 556 |  |                     size_t len); | 
| 557 |  | # endif | 
| 558 |  |  | 
| 559 |  | # define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) | 
| 560 |  | # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 561 |  | #  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) | 
| 562 |  | /* | 
| 563 |  |  * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing | 
| 564 |  |  * effect. In other words idea is to hash data while it's still in L1 cache | 
| 565 |  |  * after encryption pass... | 
| 566 |  |  */ | 
| 567 | 0 | #  define GHASH_CHUNK       (3*1024) | 
| 568 |  | # endif | 
| 569 |  |  | 
| 570 |  | #else                           /* TABLE_BITS */ | 
| 571 |  |  | 
| 572 |  | static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) | 
| 573 |  | { | 
| 574 |  |     u128 V, Z = { 0, 0 }; | 
| 575 |  |     long X; | 
| 576 |  |     int i, j; | 
| 577 |  |     const long *xi = (const long *)Xi; | 
| 578 |  |     const union { | 
| 579 |  |         long one; | 
| 580 |  |         char little; | 
| 581 |  |     } is_endian = { 1 }; | 
| 582 |  |  | 
| 583 |  |     V.hi = H[0];                /* H is in host byte order, no byte swapping */ | 
| 584 |  |     V.lo = H[1]; | 
| 585 |  |  | 
| 586 |  |     for (j = 0; j < 16 / sizeof(long); ++j) { | 
| 587 |  |         if (is_endian.little) { | 
| 588 |  |             if (sizeof(long) == 8) { | 
| 589 |  | # ifdef BSWAP8 | 
| 590 |  |                 X = (long)(BSWAP8(xi[j])); | 
| 591 |  | # else | 
| 592 |  |                 const u8 *p = (const u8 *)(xi + j); | 
| 593 |  |                 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); | 
| 594 |  | # endif | 
| 595 |  |             } else { | 
| 596 |  |                 const u8 *p = (const u8 *)(xi + j); | 
| 597 |  |                 X = (long)GETU32(p); | 
| 598 |  |             } | 
| 599 |  |         } else | 
| 600 |  |             X = xi[j]; | 
| 601 |  |  | 
| 602 |  |         for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { | 
| 603 |  |             u64 M = (u64)(X >> (8 * sizeof(long) - 1)); | 
| 604 |  |             Z.hi ^= V.hi & M; | 
| 605 |  |             Z.lo ^= V.lo & M; | 
| 606 |  |  | 
| 607 |  |             REDUCE1BIT(V); | 
| 608 |  |         } | 
| 609 |  |     } | 
| 610 |  |  | 
| 611 |  |     if (is_endian.little) { | 
| 612 |  | # ifdef BSWAP8 | 
| 613 |  |         Xi[0] = BSWAP8(Z.hi); | 
| 614 |  |         Xi[1] = BSWAP8(Z.lo); | 
| 615 |  | # else | 
| 616 |  |         u8 *p = (u8 *)Xi; | 
| 617 |  |         u32 v; | 
| 618 |  |         v = (u32)(Z.hi >> 32); | 
| 619 |  |         PUTU32(p, v); | 
| 620 |  |         v = (u32)(Z.hi); | 
| 621 |  |         PUTU32(p + 4, v); | 
| 622 |  |         v = (u32)(Z.lo >> 32); | 
| 623 |  |         PUTU32(p + 8, v); | 
| 624 |  |         v = (u32)(Z.lo); | 
| 625 |  |         PUTU32(p + 12, v); | 
| 626 |  | # endif | 
| 627 |  |     } else { | 
| 628 |  |         Xi[0] = Z.hi; | 
| 629 |  |         Xi[1] = Z.lo; | 
| 630 |  |     } | 
| 631 |  | } | 
| 632 |  |  | 
| 633 |  | # define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) | 
| 634 |  |  | 
| 635 |  | #endif | 
| 636 |  |  | 
| 637 |  | #if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) | 
| 638 |  | # if    !defined(I386_ONLY) && \ | 
| 639 |  |         (defined(__i386)        || defined(__i386__)    || \ | 
| 640 |  |          defined(__x86_64)      || defined(__x86_64__)  || \ | 
| 641 |  |          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64)) | 
| 642 |  | #  define GHASH_ASM_X86_OR_64 | 
| 643 |  | #  define GCM_FUNCREF_4BIT | 
| 644 |  | extern unsigned int OPENSSL_ia32cap_P[]; | 
| 645 |  |  | 
| 646 |  | void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); | 
| 647 |  | void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); | 
| 648 |  | void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 649 |  |                      size_t len); | 
| 650 |  |  | 
| 651 |  | #  if defined(__i386) || defined(__i386__) || defined(_M_IX86) | 
| 652 |  | #   define gcm_init_avx   gcm_init_clmul | 
| 653 |  | #   define gcm_gmult_avx  gcm_gmult_clmul | 
| 654 |  | #   define gcm_ghash_avx  gcm_ghash_clmul | 
| 655 |  | #  else | 
| 656 |  | void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); | 
| 657 |  | void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); | 
| 658 |  | void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 659 |  |                    size_t len); | 
| 660 |  | #  endif | 
| 661 |  |  | 
| 662 |  | #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86) | 
| 663 |  | #   define GHASH_ASM_X86 | 
| 664 |  | void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); | 
| 665 |  | void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 666 |  |                         size_t len); | 
| 667 |  |  | 
| 668 |  | void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); | 
| 669 |  | void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 670 |  |                         size_t len); | 
| 671 |  | #  endif | 
| 672 |  | # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) | 
| 673 |  | #  include "arm_arch.h" | 
| 674 |  | #  if __ARM_MAX_ARCH__>=7 | 
| 675 |  | #   define GHASH_ASM_ARM | 
| 676 |  | #   define GCM_FUNCREF_4BIT | 
| 677 |  | #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL) | 
| 678 |  | #   if defined(__arm__) || defined(__arm) | 
| 679 |  | #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON) | 
| 680 |  | #   endif | 
| 681 |  | void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); | 
| 682 |  | void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); | 
| 683 |  | void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 684 |  |                     size_t len); | 
| 685 |  | void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); | 
| 686 |  | void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); | 
| 687 |  | void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 688 |  |                   size_t len); | 
| 689 |  | #  endif | 
| 690 |  | # elif defined(__sparc__) || defined(__sparc) | 
| 691 |  | #  include "sparc_arch.h" | 
| 692 |  | #  define GHASH_ASM_SPARC | 
| 693 |  | #  define GCM_FUNCREF_4BIT | 
| 694 |  | extern unsigned int OPENSSL_sparcv9cap_P[]; | 
| 695 |  | void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); | 
| 696 |  | void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); | 
| 697 |  | void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 698 |  |                     size_t len); | 
| 699 |  | # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) | 
| 700 |  | #  include "ppc_arch.h" | 
| 701 |  | #  define GHASH_ASM_PPC | 
| 702 |  | #  define GCM_FUNCREF_4BIT | 
| 703 |  | void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); | 
| 704 |  | void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); | 
| 705 |  | void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, | 
| 706 |  |                   size_t len); | 
| 707 |  | # endif | 
| 708 |  | #endif | 
| 709 |  |  | 
| 710 |  | #ifdef GCM_FUNCREF_4BIT | 
| 711 |  | # undef  GCM_MUL | 
| 712 | 0 | # define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) | 
| 713 |  | # ifdef GHASH | 
| 714 |  | #  undef  GHASH | 
| 715 | 0 | #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) | 
| 716 |  | # endif | 
| 717 |  | #endif | 
| 718 |  |  | 
| 719 |  | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) | 
| 720 | 0 | { | 
| 721 | 0 |     const union { | 
| 722 | 0 |         long one; | 
| 723 | 0 |         char little; | 
| 724 | 0 |     } is_endian = { 1 }; | 
| 725 |  | 
 | 
| 726 | 0 |     memset(ctx, 0, sizeof(*ctx)); | 
| 727 | 0 |     ctx->block = block; | 
| 728 | 0 |     ctx->key = key; | 
| 729 |  | 
 | 
| 730 | 0 |     (*block) (ctx->H.c, ctx->H.c, key); | 
| 731 |  | 
 | 
| 732 | 0 |     if (is_endian.little) { | 
| 733 |  |         /* H is stored in host byte order */ | 
| 734 |  | #ifdef BSWAP8 | 
| 735 |  |         ctx->H.u[0] = BSWAP8(ctx->H.u[0]); | 
| 736 |  |         ctx->H.u[1] = BSWAP8(ctx->H.u[1]); | 
| 737 |  | #else | 
| 738 | 0 |         u8 *p = ctx->H.c; | 
| 739 | 0 |         u64 hi, lo; | 
| 740 | 0 |         hi = (u64)GETU32(p) << 32 | GETU32(p + 4); | 
| 741 | 0 |         lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); | 
| 742 | 0 |         ctx->H.u[0] = hi; | 
| 743 | 0 |         ctx->H.u[1] = lo; | 
| 744 | 0 | #endif | 
| 745 | 0 |     } | 
| 746 |  | #if     TABLE_BITS==8 | 
| 747 |  |     gcm_init_8bit(ctx->Htable, ctx->H.u); | 
| 748 |  | #elif   TABLE_BITS==4 | 
| 749 | 0 | # if    defined(GHASH) | 
| 750 | 0 | #  define CTX__GHASH(f) (ctx->ghash = (f)) | 
| 751 |  | # else | 
| 752 |  | #  define CTX__GHASH(f) (ctx->ghash = NULL) | 
| 753 |  | # endif | 
| 754 | 0 | # if    defined(GHASH_ASM_X86_OR_64) | 
| 755 | 0 | #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) | 
| 756 | 0 |     if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ | 
| 757 | 0 |         if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ | 
| 758 | 0 |             gcm_init_avx(ctx->Htable, ctx->H.u); | 
| 759 | 0 |             ctx->gmult = gcm_gmult_avx; | 
| 760 | 0 |             CTX__GHASH(gcm_ghash_avx); | 
| 761 | 0 |         } else { | 
| 762 | 0 |             gcm_init_clmul(ctx->Htable, ctx->H.u); | 
| 763 | 0 |             ctx->gmult = gcm_gmult_clmul; | 
| 764 | 0 |             CTX__GHASH(gcm_ghash_clmul); | 
| 765 | 0 |         } | 
| 766 | 0 |         return; | 
| 767 | 0 |     } | 
| 768 | 0 | #  endif | 
| 769 | 0 |     gcm_init_4bit(ctx->Htable, ctx->H.u); | 
| 770 |  | #  if   defined(GHASH_ASM_X86)  /* x86 only */ | 
| 771 |  | #   if  defined(OPENSSL_IA32_SSE2) | 
| 772 |  |     if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ | 
| 773 |  | #   else | 
| 774 |  |     if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ | 
| 775 |  | #   endif | 
| 776 |  |         ctx->gmult = gcm_gmult_4bit_mmx; | 
| 777 |  |         CTX__GHASH(gcm_ghash_4bit_mmx); | 
| 778 |  |     } else { | 
| 779 |  |         ctx->gmult = gcm_gmult_4bit_x86; | 
| 780 |  |         CTX__GHASH(gcm_ghash_4bit_x86); | 
| 781 |  |     } | 
| 782 |  | #  else | 
| 783 | 0 |     ctx->gmult = gcm_gmult_4bit; | 
| 784 | 0 |     CTX__GHASH(gcm_ghash_4bit); | 
| 785 | 0 | #  endif | 
| 786 |  | # elif  defined(GHASH_ASM_ARM) | 
| 787 |  | #  ifdef PMULL_CAPABLE | 
| 788 |  |     if (PMULL_CAPABLE) { | 
| 789 |  |         gcm_init_v8(ctx->Htable, ctx->H.u); | 
| 790 |  |         ctx->gmult = gcm_gmult_v8; | 
| 791 |  |         CTX__GHASH(gcm_ghash_v8); | 
| 792 |  |     } else | 
| 793 |  | #  endif | 
| 794 |  | #  ifdef NEON_CAPABLE | 
| 795 |  |     if (NEON_CAPABLE) { | 
| 796 |  |         gcm_init_neon(ctx->Htable, ctx->H.u); | 
| 797 |  |         ctx->gmult = gcm_gmult_neon; | 
| 798 |  |         CTX__GHASH(gcm_ghash_neon); | 
| 799 |  |     } else | 
| 800 |  | #  endif | 
| 801 |  |     { | 
| 802 |  |         gcm_init_4bit(ctx->Htable, ctx->H.u); | 
| 803 |  |         ctx->gmult = gcm_gmult_4bit; | 
| 804 |  |         CTX__GHASH(gcm_ghash_4bit); | 
| 805 |  |     } | 
| 806 |  | # elif  defined(GHASH_ASM_SPARC) | 
| 807 |  |     if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { | 
| 808 |  |         gcm_init_vis3(ctx->Htable, ctx->H.u); | 
| 809 |  |         ctx->gmult = gcm_gmult_vis3; | 
| 810 |  |         CTX__GHASH(gcm_ghash_vis3); | 
| 811 |  |     } else { | 
| 812 |  |         gcm_init_4bit(ctx->Htable, ctx->H.u); | 
| 813 |  |         ctx->gmult = gcm_gmult_4bit; | 
| 814 |  |         CTX__GHASH(gcm_ghash_4bit); | 
| 815 |  |     } | 
| 816 |  | # elif  defined(GHASH_ASM_PPC) | 
| 817 |  |     if (OPENSSL_ppccap_P & PPC_CRYPTO207) { | 
| 818 |  |         gcm_init_p8(ctx->Htable, ctx->H.u); | 
| 819 |  |         ctx->gmult = gcm_gmult_p8; | 
| 820 |  |         CTX__GHASH(gcm_ghash_p8); | 
| 821 |  |     } else { | 
| 822 |  |         gcm_init_4bit(ctx->Htable, ctx->H.u); | 
| 823 |  |         ctx->gmult = gcm_gmult_4bit; | 
| 824 |  |         CTX__GHASH(gcm_ghash_4bit); | 
| 825 |  |     } | 
| 826 |  | # else | 
| 827 |  |     gcm_init_4bit(ctx->Htable, ctx->H.u); | 
| 828 |  | # endif | 
| 829 | 0 | # undef CTX__GHASH | 
| 830 | 0 | #endif | 
| 831 | 0 | } | 
| 832 |  |  | 
| 833 |  | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, | 
| 834 |  |                          size_t len) | 
| 835 | 0 | { | 
| 836 | 0 |     const union { | 
| 837 | 0 |         long one; | 
| 838 | 0 |         char little; | 
| 839 | 0 |     } is_endian = { 1 }; | 
| 840 | 0 |     unsigned int ctr; | 
| 841 | 0 | #ifdef GCM_FUNCREF_4BIT | 
| 842 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 843 | 0 | #endif | 
| 844 |  | 
 | 
| 845 | 0 |     ctx->len.u[0] = 0;          /* AAD length */ | 
| 846 | 0 |     ctx->len.u[1] = 0;          /* message length */ | 
| 847 | 0 |     ctx->ares = 0; | 
| 848 | 0 |     ctx->mres = 0; | 
| 849 |  | 
 | 
| 850 | 0 |     if (len == 12) { | 
| 851 | 0 |         memcpy(ctx->Yi.c, iv, 12); | 
| 852 | 0 |         ctx->Yi.c[12] = 0; | 
| 853 | 0 |         ctx->Yi.c[13] = 0; | 
| 854 | 0 |         ctx->Yi.c[14] = 0; | 
| 855 | 0 |         ctx->Yi.c[15] = 1; | 
| 856 | 0 |         ctr = 1; | 
| 857 | 0 |     } else { | 
| 858 | 0 |         size_t i; | 
| 859 | 0 |         u64 len0 = len; | 
| 860 |  |  | 
| 861 |  |         /* Borrow ctx->Xi to calculate initial Yi */ | 
| 862 | 0 |         ctx->Xi.u[0] = 0; | 
| 863 | 0 |         ctx->Xi.u[1] = 0; | 
| 864 |  | 
 | 
| 865 | 0 |         while (len >= 16) { | 
| 866 | 0 |             for (i = 0; i < 16; ++i) | 
| 867 | 0 |                 ctx->Xi.c[i] ^= iv[i]; | 
| 868 | 0 |             GCM_MUL(ctx); | 
| 869 | 0 |             iv += 16; | 
| 870 | 0 |             len -= 16; | 
| 871 | 0 |         } | 
| 872 | 0 |         if (len) { | 
| 873 | 0 |             for (i = 0; i < len; ++i) | 
| 874 | 0 |                 ctx->Xi.c[i] ^= iv[i]; | 
| 875 | 0 |             GCM_MUL(ctx); | 
| 876 | 0 |         } | 
| 877 | 0 |         len0 <<= 3; | 
| 878 | 0 |         if (is_endian.little) { | 
| 879 |  | #ifdef BSWAP8 | 
| 880 |  |             ctx->Xi.u[1] ^= BSWAP8(len0); | 
| 881 |  | #else | 
| 882 | 0 |             ctx->Xi.c[8] ^= (u8)(len0 >> 56); | 
| 883 | 0 |             ctx->Xi.c[9] ^= (u8)(len0 >> 48); | 
| 884 | 0 |             ctx->Xi.c[10] ^= (u8)(len0 >> 40); | 
| 885 | 0 |             ctx->Xi.c[11] ^= (u8)(len0 >> 32); | 
| 886 | 0 |             ctx->Xi.c[12] ^= (u8)(len0 >> 24); | 
| 887 | 0 |             ctx->Xi.c[13] ^= (u8)(len0 >> 16); | 
| 888 | 0 |             ctx->Xi.c[14] ^= (u8)(len0 >> 8); | 
| 889 | 0 |             ctx->Xi.c[15] ^= (u8)(len0); | 
| 890 | 0 | #endif | 
| 891 | 0 |         } else { | 
| 892 | 0 |             ctx->Xi.u[1] ^= len0; | 
| 893 | 0 |         } | 
| 894 |  | 
 | 
| 895 | 0 |         GCM_MUL(ctx); | 
| 896 |  | 
 | 
| 897 | 0 |         if (is_endian.little) | 
| 898 |  | #ifdef BSWAP4 | 
| 899 |  |             ctr = BSWAP4(ctx->Xi.d[3]); | 
| 900 |  | #else | 
| 901 | 0 |             ctr = GETU32(ctx->Xi.c + 12); | 
| 902 | 0 | #endif | 
| 903 | 0 |         else | 
| 904 | 0 |             ctr = ctx->Xi.d[3]; | 
| 905 |  |  | 
| 906 |  |         /* Copy borrowed Xi to Yi */ | 
| 907 | 0 |         ctx->Yi.u[0] = ctx->Xi.u[0]; | 
| 908 | 0 |         ctx->Yi.u[1] = ctx->Xi.u[1]; | 
| 909 | 0 |     } | 
| 910 |  | 
 | 
| 911 | 0 |     ctx->Xi.u[0] = 0; | 
| 912 | 0 |     ctx->Xi.u[1] = 0; | 
| 913 |  | 
 | 
| 914 | 0 |     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); | 
| 915 | 0 |     ++ctr; | 
| 916 | 0 |     if (is_endian.little) | 
| 917 |  | #ifdef BSWAP4 | 
| 918 |  |         ctx->Yi.d[3] = BSWAP4(ctr); | 
| 919 |  | #else | 
| 920 | 0 |         PUTU32(ctx->Yi.c + 12, ctr); | 
| 921 | 0 | #endif | 
| 922 | 0 |     else | 
| 923 | 0 |         ctx->Yi.d[3] = ctr; | 
| 924 | 0 | } | 
| 925 |  |  | 
| 926 |  | int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, | 
| 927 |  |                       size_t len) | 
| 928 | 0 | { | 
| 929 | 0 |     size_t i; | 
| 930 | 0 |     unsigned int n; | 
| 931 | 0 |     u64 alen = ctx->len.u[0]; | 
| 932 | 0 | #ifdef GCM_FUNCREF_4BIT | 
| 933 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 934 | 0 | # ifdef GHASH | 
| 935 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 936 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 937 | 0 | # endif | 
| 938 | 0 | #endif | 
| 939 |  | 
 | 
| 940 | 0 |     if (ctx->len.u[1]) | 
| 941 | 0 |         return -2; | 
| 942 |  |  | 
| 943 | 0 |     alen += len; | 
| 944 | 0 |     if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) | 
| 945 | 0 |         return -1; | 
| 946 | 0 |     ctx->len.u[0] = alen; | 
| 947 |  | 
 | 
| 948 | 0 |     n = ctx->ares; | 
| 949 | 0 |     if (n) { | 
| 950 | 0 |         while (n && len) { | 
| 951 | 0 |             ctx->Xi.c[n] ^= *(aad++); | 
| 952 | 0 |             --len; | 
| 953 | 0 |             n = (n + 1) % 16; | 
| 954 | 0 |         } | 
| 955 | 0 |         if (n == 0) | 
| 956 | 0 |             GCM_MUL(ctx); | 
| 957 | 0 |         else { | 
| 958 | 0 |             ctx->ares = n; | 
| 959 | 0 |             return 0; | 
| 960 | 0 |         } | 
| 961 | 0 |     } | 
| 962 | 0 | #ifdef GHASH | 
| 963 | 0 |     if ((i = (len & (size_t)-16))) { | 
| 964 | 0 |         GHASH(ctx, aad, i); | 
| 965 | 0 |         aad += i; | 
| 966 | 0 |         len -= i; | 
| 967 | 0 |     } | 
| 968 |  | #else | 
| 969 |  |     while (len >= 16) { | 
| 970 |  |         for (i = 0; i < 16; ++i) | 
| 971 |  |             ctx->Xi.c[i] ^= aad[i]; | 
| 972 |  |         GCM_MUL(ctx); | 
| 973 |  |         aad += 16; | 
| 974 |  |         len -= 16; | 
| 975 |  |     } | 
| 976 |  | #endif | 
| 977 | 0 |     if (len) { | 
| 978 | 0 |         n = (unsigned int)len; | 
| 979 | 0 |         for (i = 0; i < len; ++i) | 
| 980 | 0 |             ctx->Xi.c[i] ^= aad[i]; | 
| 981 | 0 |     } | 
| 982 |  | 
 | 
| 983 | 0 |     ctx->ares = n; | 
| 984 | 0 |     return 0; | 
| 985 | 0 | } | 
| 986 |  |  | 
| 987 |  | int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, | 
| 988 |  |                           const unsigned char *in, unsigned char *out, | 
| 989 |  |                           size_t len) | 
| 990 | 0 | { | 
| 991 | 0 |     const union { | 
| 992 | 0 |         long one; | 
| 993 | 0 |         char little; | 
| 994 | 0 |     } is_endian = { 1 }; | 
| 995 | 0 |     unsigned int n, ctr, mres; | 
| 996 | 0 |     size_t i; | 
| 997 | 0 |     u64 mlen = ctx->len.u[1]; | 
| 998 | 0 |     block128_f block = ctx->block; | 
| 999 | 0 |     void *key = ctx->key; | 
| 1000 | 0 | #ifdef GCM_FUNCREF_4BIT | 
| 1001 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 1002 | 0 | # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1003 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 1004 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 1005 | 0 | # endif | 
| 1006 | 0 | #endif | 
| 1007 |  | 
 | 
| 1008 | 0 |     mlen += len; | 
| 1009 | 0 |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) | 
| 1010 | 0 |         return -1; | 
| 1011 | 0 |     ctx->len.u[1] = mlen; | 
| 1012 |  | 
 | 
| 1013 | 0 |     mres = ctx->mres; | 
| 1014 |  | 
 | 
| 1015 | 0 |     if (ctx->ares) { | 
| 1016 |  |         /* First call to encrypt finalizes GHASH(AAD) */ | 
| 1017 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1018 | 0 |         if (len == 0) { | 
| 1019 | 0 |             GCM_MUL(ctx); | 
| 1020 | 0 |             ctx->ares = 0; | 
| 1021 | 0 |             return 0; | 
| 1022 | 0 |         } | 
| 1023 | 0 |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); | 
| 1024 | 0 |         ctx->Xi.u[0] = 0; | 
| 1025 | 0 |         ctx->Xi.u[1] = 0; | 
| 1026 | 0 |         mres = sizeof(ctx->Xi); | 
| 1027 |  | #else | 
| 1028 |  |         GCM_MUL(ctx); | 
| 1029 |  | #endif | 
| 1030 | 0 |         ctx->ares = 0; | 
| 1031 | 0 |     } | 
| 1032 |  |  | 
| 1033 | 0 |     if (is_endian.little) | 
| 1034 |  | #ifdef BSWAP4 | 
| 1035 |  |         ctr = BSWAP4(ctx->Yi.d[3]); | 
| 1036 |  | #else | 
| 1037 | 0 |         ctr = GETU32(ctx->Yi.c + 12); | 
| 1038 | 0 | #endif | 
| 1039 | 0 |     else | 
| 1040 | 0 |         ctr = ctx->Yi.d[3]; | 
| 1041 |  | 
 | 
| 1042 | 0 |     n = mres % 16; | 
| 1043 | 0 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1044 | 0 |     if (16 % sizeof(size_t) == 0) { /* always true actually */ | 
| 1045 | 0 |         do { | 
| 1046 | 0 |             if (n) { | 
| 1047 | 0 | # if defined(GHASH) | 
| 1048 | 0 |                 while (n && len) { | 
| 1049 | 0 |                     ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; | 
| 1050 | 0 |                     --len; | 
| 1051 | 0 |                     n = (n + 1) % 16; | 
| 1052 | 0 |                 } | 
| 1053 | 0 |                 if (n == 0) { | 
| 1054 | 0 |                     GHASH(ctx, ctx->Xn, mres); | 
| 1055 | 0 |                     mres = 0; | 
| 1056 | 0 |                 } else { | 
| 1057 | 0 |                     ctx->mres = mres; | 
| 1058 | 0 |                     return 0; | 
| 1059 | 0 |                 } | 
| 1060 |  | # else | 
| 1061 |  |                 while (n && len) { | 
| 1062 |  |                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; | 
| 1063 |  |                     --len; | 
| 1064 |  |                     n = (n + 1) % 16; | 
| 1065 |  |                 } | 
| 1066 |  |                 if (n == 0) { | 
| 1067 |  |                     GCM_MUL(ctx); | 
| 1068 |  |                     mres = 0; | 
| 1069 |  |                 } else { | 
| 1070 |  |                     ctx->mres = n; | 
| 1071 |  |                     return 0; | 
| 1072 |  |                 } | 
| 1073 |  | # endif | 
| 1074 | 0 |             } | 
| 1075 | 0 | # if defined(STRICT_ALIGNMENT) | 
| 1076 | 0 |             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) | 
| 1077 | 0 |                 break; | 
| 1078 | 0 | # endif | 
| 1079 | 0 | # if defined(GHASH) | 
| 1080 | 0 |             if (len >= 16 && mres) { | 
| 1081 | 0 |                 GHASH(ctx, ctx->Xn, mres); | 
| 1082 | 0 |                 mres = 0; | 
| 1083 | 0 |             } | 
| 1084 | 0 | #  if defined(GHASH_CHUNK) | 
| 1085 | 0 |             while (len >= GHASH_CHUNK) { | 
| 1086 | 0 |                 size_t j = GHASH_CHUNK; | 
| 1087 |  | 
 | 
| 1088 | 0 |                 while (j) { | 
| 1089 | 0 |                     size_t_aX *out_t = (size_t_aX *)out; | 
| 1090 | 0 |                     const size_t_aX *in_t = (const size_t_aX *)in; | 
| 1091 |  | 
 | 
| 1092 | 0 |                     (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1093 | 0 |                     ++ctr; | 
| 1094 | 0 |                     if (is_endian.little) | 
| 1095 |  | #   ifdef BSWAP4 | 
| 1096 |  |                         ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1097 |  | #   else | 
| 1098 | 0 |                         PUTU32(ctx->Yi.c + 12, ctr); | 
| 1099 | 0 | #   endif | 
| 1100 | 0 |                     else | 
| 1101 | 0 |                         ctx->Yi.d[3] = ctr; | 
| 1102 | 0 |                     for (i = 0; i < 16 / sizeof(size_t); ++i) | 
| 1103 | 0 |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 
| 1104 | 0 |                     out += 16; | 
| 1105 | 0 |                     in += 16; | 
| 1106 | 0 |                     j -= 16; | 
| 1107 | 0 |                 } | 
| 1108 | 0 |                 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); | 
| 1109 | 0 |                 len -= GHASH_CHUNK; | 
| 1110 | 0 |             } | 
| 1111 | 0 | #  endif | 
| 1112 | 0 |             if ((i = (len & (size_t)-16))) { | 
| 1113 | 0 |                 size_t j = i; | 
| 1114 |  | 
 | 
| 1115 | 0 |                 while (len >= 16) { | 
| 1116 | 0 |                     size_t_aX *out_t = (size_t_aX *)out; | 
| 1117 | 0 |                     const size_t_aX *in_t = (const size_t_aX *)in; | 
| 1118 |  | 
 | 
| 1119 | 0 |                     (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1120 | 0 |                     ++ctr; | 
| 1121 | 0 |                     if (is_endian.little) | 
| 1122 |  | #  ifdef BSWAP4 | 
| 1123 |  |                         ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1124 |  | #  else | 
| 1125 | 0 |                         PUTU32(ctx->Yi.c + 12, ctr); | 
| 1126 | 0 | #  endif | 
| 1127 | 0 |                     else | 
| 1128 | 0 |                         ctx->Yi.d[3] = ctr; | 
| 1129 | 0 |                     for (i = 0; i < 16 / sizeof(size_t); ++i) | 
| 1130 | 0 |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 
| 1131 | 0 |                     out += 16; | 
| 1132 | 0 |                     in += 16; | 
| 1133 | 0 |                     len -= 16; | 
| 1134 | 0 |                 } | 
| 1135 | 0 |                 GHASH(ctx, out - j, j); | 
| 1136 | 0 |             } | 
| 1137 |  | # else | 
| 1138 |  |             while (len >= 16) { | 
| 1139 |  |                 size_t *out_t = (size_t *)out; | 
| 1140 |  |                 const size_t *in_t = (const size_t *)in; | 
| 1141 |  |  | 
| 1142 |  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1143 |  |                 ++ctr; | 
| 1144 |  |                 if (is_endian.little) | 
| 1145 |  | #  ifdef BSWAP4 | 
| 1146 |  |                     ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1147 |  | #  else | 
| 1148 |  |                     PUTU32(ctx->Yi.c + 12, ctr); | 
| 1149 |  | #  endif | 
| 1150 |  |                 else | 
| 1151 |  |                     ctx->Yi.d[3] = ctr; | 
| 1152 |  |                 for (i = 0; i < 16 / sizeof(size_t); ++i) | 
| 1153 |  |                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 
| 1154 |  |                 GCM_MUL(ctx); | 
| 1155 |  |                 out += 16; | 
| 1156 |  |                 in += 16; | 
| 1157 |  |                 len -= 16; | 
| 1158 |  |             } | 
| 1159 |  | # endif | 
| 1160 | 0 |             if (len) { | 
| 1161 | 0 |                 (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1162 | 0 |                 ++ctr; | 
| 1163 | 0 |                 if (is_endian.little) | 
| 1164 |  | # ifdef BSWAP4 | 
| 1165 |  |                     ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1166 |  | # else | 
| 1167 | 0 |                     PUTU32(ctx->Yi.c + 12, ctr); | 
| 1168 | 0 | # endif | 
| 1169 | 0 |                 else | 
| 1170 | 0 |                     ctx->Yi.d[3] = ctr; | 
| 1171 | 0 | # if defined(GHASH) | 
| 1172 | 0 |                 while (len--) { | 
| 1173 | 0 |                     ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; | 
| 1174 | 0 |                     ++n; | 
| 1175 | 0 |                 } | 
| 1176 |  | # else | 
| 1177 |  |                 while (len--) { | 
| 1178 |  |                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; | 
| 1179 |  |                     ++n; | 
| 1180 |  |                 } | 
| 1181 |  |                 mres = n; | 
| 1182 |  | # endif | 
| 1183 | 0 |             } | 
| 1184 |  | 
 | 
| 1185 | 0 |             ctx->mres = mres; | 
| 1186 | 0 |             return 0; | 
| 1187 | 0 |         } while (0); | 
| 1188 | 0 |     } | 
| 1189 | 0 | #endif | 
| 1190 | 0 |     for (i = 0; i < len; ++i) { | 
| 1191 | 0 |         if (n == 0) { | 
| 1192 | 0 |             (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1193 | 0 |             ++ctr; | 
| 1194 | 0 |             if (is_endian.little) | 
| 1195 |  | #ifdef BSWAP4 | 
| 1196 |  |                 ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1197 |  | #else | 
| 1198 | 0 |                 PUTU32(ctx->Yi.c + 12, ctr); | 
| 1199 | 0 | #endif | 
| 1200 | 0 |             else | 
| 1201 | 0 |                 ctx->Yi.d[3] = ctr; | 
| 1202 | 0 |         } | 
| 1203 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1204 | 0 |         ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; | 
| 1205 | 0 |         n = (n + 1) % 16; | 
| 1206 | 0 |         if (mres == sizeof(ctx->Xn)) { | 
| 1207 | 0 |             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); | 
| 1208 | 0 |             mres = 0; | 
| 1209 | 0 |         } | 
| 1210 |  | #else | 
| 1211 |  |         ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; | 
| 1212 |  |         mres = n = (n + 1) % 16; | 
| 1213 |  |         if (n == 0) | 
| 1214 |  |             GCM_MUL(ctx); | 
| 1215 |  | #endif | 
| 1216 | 0 |     } | 
| 1217 |  | 
 | 
| 1218 | 0 |     ctx->mres = mres; | 
| 1219 | 0 |     return 0; | 
| 1220 | 0 | } | 
| 1221 |  |  | 
| 1222 |  | int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, | 
| 1223 |  |                           const unsigned char *in, unsigned char *out, | 
| 1224 |  |                           size_t len) | 
| 1225 | 0 | { | 
| 1226 | 0 |     const union { | 
| 1227 | 0 |         long one; | 
| 1228 | 0 |         char little; | 
| 1229 | 0 |     } is_endian = { 1 }; | 
| 1230 | 0 |     unsigned int n, ctr, mres; | 
| 1231 | 0 |     size_t i; | 
| 1232 | 0 |     u64 mlen = ctx->len.u[1]; | 
| 1233 | 0 |     block128_f block = ctx->block; | 
| 1234 | 0 |     void *key = ctx->key; | 
| 1235 | 0 | #ifdef GCM_FUNCREF_4BIT | 
| 1236 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 1237 | 0 | # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1238 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 1239 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 1240 | 0 | # endif | 
| 1241 | 0 | #endif | 
| 1242 |  | 
 | 
| 1243 | 0 |     mlen += len; | 
| 1244 | 0 |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) | 
| 1245 | 0 |         return -1; | 
| 1246 | 0 |     ctx->len.u[1] = mlen; | 
| 1247 |  | 
 | 
| 1248 | 0 |     mres = ctx->mres; | 
| 1249 |  | 
 | 
| 1250 | 0 |     if (ctx->ares) { | 
| 1251 |  |         /* First call to decrypt finalizes GHASH(AAD) */ | 
| 1252 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1253 | 0 |         if (len == 0) { | 
| 1254 | 0 |             GCM_MUL(ctx); | 
| 1255 | 0 |             ctx->ares = 0; | 
| 1256 | 0 |             return 0; | 
| 1257 | 0 |         } | 
| 1258 | 0 |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); | 
| 1259 | 0 |         ctx->Xi.u[0] = 0; | 
| 1260 | 0 |         ctx->Xi.u[1] = 0; | 
| 1261 | 0 |         mres = sizeof(ctx->Xi); | 
| 1262 |  | #else | 
| 1263 |  |         GCM_MUL(ctx); | 
| 1264 |  | #endif | 
| 1265 | 0 |         ctx->ares = 0; | 
| 1266 | 0 |     } | 
| 1267 |  |  | 
| 1268 | 0 |     if (is_endian.little) | 
| 1269 |  | #ifdef BSWAP4 | 
| 1270 |  |         ctr = BSWAP4(ctx->Yi.d[3]); | 
| 1271 |  | #else | 
| 1272 | 0 |         ctr = GETU32(ctx->Yi.c + 12); | 
| 1273 | 0 | #endif | 
| 1274 | 0 |     else | 
| 1275 | 0 |         ctr = ctx->Yi.d[3]; | 
| 1276 |  | 
 | 
| 1277 | 0 |     n = mres % 16; | 
| 1278 | 0 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1279 | 0 |     if (16 % sizeof(size_t) == 0) { /* always true actually */ | 
| 1280 | 0 |         do { | 
| 1281 | 0 |             if (n) { | 
| 1282 | 0 | # if defined(GHASH) | 
| 1283 | 0 |                 while (n && len) { | 
| 1284 | 0 |                     *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; | 
| 1285 | 0 |                     --len; | 
| 1286 | 0 |                     n = (n + 1) % 16; | 
| 1287 | 0 |                 } | 
| 1288 | 0 |                 if (n == 0) { | 
| 1289 | 0 |                     GHASH(ctx, ctx->Xn, mres); | 
| 1290 | 0 |                     mres = 0; | 
| 1291 | 0 |                 } else { | 
| 1292 | 0 |                     ctx->mres = mres; | 
| 1293 | 0 |                     return 0; | 
| 1294 | 0 |                 } | 
| 1295 |  | # else | 
| 1296 |  |                 while (n && len) { | 
| 1297 |  |                     u8 c = *(in++); | 
| 1298 |  |                     *(out++) = c ^ ctx->EKi.c[n]; | 
| 1299 |  |                     ctx->Xi.c[n] ^= c; | 
| 1300 |  |                     --len; | 
| 1301 |  |                     n = (n + 1) % 16; | 
| 1302 |  |                 } | 
| 1303 |  |                 if (n == 0) { | 
| 1304 |  |                     GCM_MUL(ctx); | 
| 1305 |  |                     mres = 0; | 
| 1306 |  |                 } else { | 
| 1307 |  |                     ctx->mres = n; | 
| 1308 |  |                     return 0; | 
| 1309 |  |                 } | 
| 1310 |  | # endif | 
| 1311 | 0 |             } | 
| 1312 | 0 | # if defined(STRICT_ALIGNMENT) | 
| 1313 | 0 |             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) | 
| 1314 | 0 |                 break; | 
| 1315 | 0 | # endif | 
| 1316 | 0 | # if defined(GHASH) | 
| 1317 | 0 |             if (len >= 16 && mres) { | 
| 1318 | 0 |                 GHASH(ctx, ctx->Xn, mres); | 
| 1319 | 0 |                 mres = 0; | 
| 1320 | 0 |             } | 
| 1321 | 0 | #  if defined(GHASH_CHUNK) | 
| 1322 | 0 |             while (len >= GHASH_CHUNK) { | 
| 1323 | 0 |                 size_t j = GHASH_CHUNK; | 
| 1324 |  | 
 | 
| 1325 | 0 |                 GHASH(ctx, in, GHASH_CHUNK); | 
| 1326 | 0 |                 while (j) { | 
| 1327 | 0 |                     size_t_aX *out_t = (size_t_aX *)out; | 
| 1328 | 0 |                     const size_t_aX *in_t = (const size_t_aX *)in; | 
| 1329 |  | 
 | 
| 1330 | 0 |                     (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1331 | 0 |                     ++ctr; | 
| 1332 | 0 |                     if (is_endian.little) | 
| 1333 |  | #   ifdef BSWAP4 | 
| 1334 |  |                         ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1335 |  | #   else | 
| 1336 | 0 |                         PUTU32(ctx->Yi.c + 12, ctr); | 
| 1337 | 0 | #   endif | 
| 1338 | 0 |                     else | 
| 1339 | 0 |                         ctx->Yi.d[3] = ctr; | 
| 1340 | 0 |                     for (i = 0; i < 16 / sizeof(size_t); ++i) | 
| 1341 | 0 |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 
| 1342 | 0 |                     out += 16; | 
| 1343 | 0 |                     in += 16; | 
| 1344 | 0 |                     j -= 16; | 
| 1345 | 0 |                 } | 
| 1346 | 0 |                 len -= GHASH_CHUNK; | 
| 1347 | 0 |             } | 
| 1348 | 0 | #  endif | 
| 1349 | 0 |             if ((i = (len & (size_t)-16))) { | 
| 1350 | 0 |                 GHASH(ctx, in, i); | 
| 1351 | 0 |                 while (len >= 16) { | 
| 1352 | 0 |                     size_t_aX *out_t = (size_t_aX *)out; | 
| 1353 | 0 |                     const size_t_aX *in_t = (const size_t_aX *)in; | 
| 1354 |  | 
 | 
| 1355 | 0 |                     (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1356 | 0 |                     ++ctr; | 
| 1357 | 0 |                     if (is_endian.little) | 
| 1358 |  | #  ifdef BSWAP4 | 
| 1359 |  |                         ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1360 |  | #  else | 
| 1361 | 0 |                         PUTU32(ctx->Yi.c + 12, ctr); | 
| 1362 | 0 | #  endif | 
| 1363 | 0 |                     else | 
| 1364 | 0 |                         ctx->Yi.d[3] = ctr; | 
| 1365 | 0 |                     for (i = 0; i < 16 / sizeof(size_t); ++i) | 
| 1366 | 0 |                         out_t[i] = in_t[i] ^ ctx->EKi.t[i]; | 
| 1367 | 0 |                     out += 16; | 
| 1368 | 0 |                     in += 16; | 
| 1369 | 0 |                     len -= 16; | 
| 1370 | 0 |                 } | 
| 1371 | 0 |             } | 
| 1372 |  | # else | 
| 1373 |  |             while (len >= 16) { | 
| 1374 |  |                 size_t *out_t = (size_t *)out; | 
| 1375 |  |                 const size_t *in_t = (const size_t *)in; | 
| 1376 |  |  | 
| 1377 |  |                 (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1378 |  |                 ++ctr; | 
| 1379 |  |                 if (is_endian.little) | 
| 1380 |  | #  ifdef BSWAP4 | 
| 1381 |  |                     ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1382 |  | #  else | 
| 1383 |  |                     PUTU32(ctx->Yi.c + 12, ctr); | 
| 1384 |  | #  endif | 
| 1385 |  |                 else | 
| 1386 |  |                     ctx->Yi.d[3] = ctr; | 
| 1387 |  |                 for (i = 0; i < 16 / sizeof(size_t); ++i) { | 
| 1388 |  |                     size_t c = in_t[i]; | 
| 1389 |  |                     out_t[i] = c ^ ctx->EKi.t[i]; | 
| 1390 |  |                     ctx->Xi.t[i] ^= c; | 
| 1391 |  |                 } | 
| 1392 |  |                 GCM_MUL(ctx); | 
| 1393 |  |                 out += 16; | 
| 1394 |  |                 in += 16; | 
| 1395 |  |                 len -= 16; | 
| 1396 |  |             } | 
| 1397 |  | # endif | 
| 1398 | 0 |             if (len) { | 
| 1399 | 0 |                 (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1400 | 0 |                 ++ctr; | 
| 1401 | 0 |                 if (is_endian.little) | 
| 1402 |  | # ifdef BSWAP4 | 
| 1403 |  |                     ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1404 |  | # else | 
| 1405 | 0 |                     PUTU32(ctx->Yi.c + 12, ctr); | 
| 1406 | 0 | # endif | 
| 1407 | 0 |                 else | 
| 1408 | 0 |                     ctx->Yi.d[3] = ctr; | 
| 1409 | 0 | # if defined(GHASH) | 
| 1410 | 0 |                 while (len--) { | 
| 1411 | 0 |                     out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; | 
| 1412 | 0 |                     ++n; | 
| 1413 | 0 |                 } | 
| 1414 |  | # else | 
| 1415 |  |                 while (len--) { | 
| 1416 |  |                     u8 c = in[n]; | 
| 1417 |  |                     ctx->Xi.c[n] ^= c; | 
| 1418 |  |                     out[n] = c ^ ctx->EKi.c[n]; | 
| 1419 |  |                     ++n; | 
| 1420 |  |                 } | 
| 1421 |  |                 mres = n; | 
| 1422 |  | # endif | 
| 1423 | 0 |             } | 
| 1424 |  | 
 | 
| 1425 | 0 |             ctx->mres = mres; | 
| 1426 | 0 |             return 0; | 
| 1427 | 0 |         } while (0); | 
| 1428 | 0 |     } | 
| 1429 | 0 | #endif | 
| 1430 | 0 |     for (i = 0; i < len; ++i) { | 
| 1431 | 0 |         u8 c; | 
| 1432 | 0 |         if (n == 0) { | 
| 1433 | 0 |             (*block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1434 | 0 |             ++ctr; | 
| 1435 | 0 |             if (is_endian.little) | 
| 1436 |  | #ifdef BSWAP4 | 
| 1437 |  |                 ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1438 |  | #else | 
| 1439 | 0 |                 PUTU32(ctx->Yi.c + 12, ctr); | 
| 1440 | 0 | #endif | 
| 1441 | 0 |             else | 
| 1442 | 0 |                 ctx->Yi.d[3] = ctr; | 
| 1443 | 0 |         } | 
| 1444 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1445 | 0 |         out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; | 
| 1446 | 0 |         n = (n + 1) % 16; | 
| 1447 | 0 |         if (mres == sizeof(ctx->Xn)) { | 
| 1448 | 0 |             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); | 
| 1449 | 0 |             mres = 0; | 
| 1450 | 0 |         } | 
| 1451 |  | #else | 
| 1452 |  |         c = in[i]; | 
| 1453 |  |         out[i] = c ^ ctx->EKi.c[n]; | 
| 1454 |  |         ctx->Xi.c[n] ^= c; | 
| 1455 |  |         mres = n = (n + 1) % 16; | 
| 1456 |  |         if (n == 0) | 
| 1457 |  |             GCM_MUL(ctx); | 
| 1458 |  | #endif | 
| 1459 | 0 |     } | 
| 1460 |  | 
 | 
| 1461 | 0 |     ctx->mres = mres; | 
| 1462 | 0 |     return 0; | 
| 1463 | 0 | } | 
| 1464 |  |  | 
| 1465 |  | int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, | 
| 1466 |  |                                 const unsigned char *in, unsigned char *out, | 
| 1467 |  |                                 size_t len, ctr128_f stream) | 
| 1468 | 0 | { | 
| 1469 |  | #if defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1470 |  |     return CRYPTO_gcm128_encrypt(ctx, in, out, len); | 
| 1471 |  | #else | 
| 1472 | 0 |     const union { | 
| 1473 | 0 |         long one; | 
| 1474 | 0 |         char little; | 
| 1475 | 0 |     } is_endian = { 1 }; | 
| 1476 | 0 |     unsigned int n, ctr, mres; | 
| 1477 | 0 |     size_t i; | 
| 1478 | 0 |     u64 mlen = ctx->len.u[1]; | 
| 1479 | 0 |     void *key = ctx->key; | 
| 1480 | 0 | # ifdef GCM_FUNCREF_4BIT | 
| 1481 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 1482 | 0 | #  ifdef GHASH | 
| 1483 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 1484 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 1485 | 0 | #  endif | 
| 1486 | 0 | # endif | 
| 1487 |  | 
 | 
| 1488 | 0 |     mlen += len; | 
| 1489 | 0 |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) | 
| 1490 | 0 |         return -1; | 
| 1491 | 0 |     ctx->len.u[1] = mlen; | 
| 1492 |  | 
 | 
| 1493 | 0 |     mres = ctx->mres; | 
| 1494 |  | 
 | 
| 1495 | 0 |     if (ctx->ares) { | 
| 1496 |  |         /* First call to encrypt finalizes GHASH(AAD) */ | 
| 1497 | 0 | #if defined(GHASH) | 
| 1498 | 0 |         if (len == 0) { | 
| 1499 | 0 |             GCM_MUL(ctx); | 
| 1500 | 0 |             ctx->ares = 0; | 
| 1501 | 0 |             return 0; | 
| 1502 | 0 |         } | 
| 1503 | 0 |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); | 
| 1504 | 0 |         ctx->Xi.u[0] = 0; | 
| 1505 | 0 |         ctx->Xi.u[1] = 0; | 
| 1506 | 0 |         mres = sizeof(ctx->Xi); | 
| 1507 |  | #else | 
| 1508 |  |         GCM_MUL(ctx); | 
| 1509 |  | #endif | 
| 1510 | 0 |         ctx->ares = 0; | 
| 1511 | 0 |     } | 
| 1512 |  |  | 
| 1513 | 0 |     if (is_endian.little) | 
| 1514 |  | # ifdef BSWAP4 | 
| 1515 |  |         ctr = BSWAP4(ctx->Yi.d[3]); | 
| 1516 |  | # else | 
| 1517 | 0 |         ctr = GETU32(ctx->Yi.c + 12); | 
| 1518 | 0 | # endif | 
| 1519 | 0 |     else | 
| 1520 | 0 |         ctr = ctx->Yi.d[3]; | 
| 1521 |  | 
 | 
| 1522 | 0 |     n = mres % 16; | 
| 1523 | 0 |     if (n) { | 
| 1524 | 0 | # if defined(GHASH) | 
| 1525 | 0 |         while (n && len) { | 
| 1526 | 0 |             ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; | 
| 1527 | 0 |             --len; | 
| 1528 | 0 |             n = (n + 1) % 16; | 
| 1529 | 0 |         } | 
| 1530 | 0 |         if (n == 0) { | 
| 1531 | 0 |             GHASH(ctx, ctx->Xn, mres); | 
| 1532 | 0 |             mres = 0; | 
| 1533 | 0 |         } else { | 
| 1534 | 0 |             ctx->mres = mres; | 
| 1535 | 0 |             return 0; | 
| 1536 | 0 |         } | 
| 1537 |  | # else | 
| 1538 |  |         while (n && len) { | 
| 1539 |  |             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; | 
| 1540 |  |             --len; | 
| 1541 |  |             n = (n + 1) % 16; | 
| 1542 |  |         } | 
| 1543 |  |         if (n == 0) { | 
| 1544 |  |             GCM_MUL(ctx); | 
| 1545 |  |             mres = 0; | 
| 1546 |  |         } else { | 
| 1547 |  |             ctx->mres = n; | 
| 1548 |  |             return 0; | 
| 1549 |  |         } | 
| 1550 |  | # endif | 
| 1551 | 0 |     } | 
| 1552 | 0 | # if defined(GHASH) | 
| 1553 | 0 |         if (len >= 16 && mres) { | 
| 1554 | 0 |             GHASH(ctx, ctx->Xn, mres); | 
| 1555 | 0 |             mres = 0; | 
| 1556 | 0 |         } | 
| 1557 | 0 | #  if defined(GHASH_CHUNK) | 
| 1558 | 0 |     while (len >= GHASH_CHUNK) { | 
| 1559 | 0 |         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); | 
| 1560 | 0 |         ctr += GHASH_CHUNK / 16; | 
| 1561 | 0 |         if (is_endian.little) | 
| 1562 |  | #   ifdef BSWAP4 | 
| 1563 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1564 |  | #   else | 
| 1565 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1566 | 0 | #   endif | 
| 1567 | 0 |         else | 
| 1568 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1569 | 0 |         GHASH(ctx, out, GHASH_CHUNK); | 
| 1570 | 0 |         out += GHASH_CHUNK; | 
| 1571 | 0 |         in += GHASH_CHUNK; | 
| 1572 | 0 |         len -= GHASH_CHUNK; | 
| 1573 | 0 |     } | 
| 1574 | 0 | #  endif | 
| 1575 | 0 | # endif | 
| 1576 | 0 |     if ((i = (len & (size_t)-16))) { | 
| 1577 | 0 |         size_t j = i / 16; | 
| 1578 |  | 
 | 
| 1579 | 0 |         (*stream) (in, out, j, key, ctx->Yi.c); | 
| 1580 | 0 |         ctr += (unsigned int)j; | 
| 1581 | 0 |         if (is_endian.little) | 
| 1582 |  | # ifdef BSWAP4 | 
| 1583 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1584 |  | # else | 
| 1585 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1586 | 0 | # endif | 
| 1587 | 0 |         else | 
| 1588 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1589 | 0 |         in += i; | 
| 1590 | 0 |         len -= i; | 
| 1591 | 0 | # if defined(GHASH) | 
| 1592 | 0 |         GHASH(ctx, out, i); | 
| 1593 | 0 |         out += i; | 
| 1594 |  | # else | 
| 1595 |  |         while (j--) { | 
| 1596 |  |             for (i = 0; i < 16; ++i) | 
| 1597 |  |                 ctx->Xi.c[i] ^= out[i]; | 
| 1598 |  |             GCM_MUL(ctx); | 
| 1599 |  |             out += 16; | 
| 1600 |  |         } | 
| 1601 |  | # endif | 
| 1602 | 0 |     } | 
| 1603 | 0 |     if (len) { | 
| 1604 | 0 |         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1605 | 0 |         ++ctr; | 
| 1606 | 0 |         if (is_endian.little) | 
| 1607 |  | # ifdef BSWAP4 | 
| 1608 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1609 |  | # else | 
| 1610 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1611 | 0 | # endif | 
| 1612 | 0 |         else | 
| 1613 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1614 | 0 |         while (len--) { | 
| 1615 | 0 | # if defined(GHASH) | 
| 1616 | 0 |             ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; | 
| 1617 |  | # else | 
| 1618 |  |             ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; | 
| 1619 |  | # endif | 
| 1620 | 0 |             ++n; | 
| 1621 | 0 |         } | 
| 1622 | 0 |     } | 
| 1623 |  | 
 | 
| 1624 | 0 |     ctx->mres = mres; | 
| 1625 | 0 |     return 0; | 
| 1626 | 0 | #endif | 
| 1627 | 0 | } | 
| 1628 |  |  | 
| 1629 |  | int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, | 
| 1630 |  |                                 const unsigned char *in, unsigned char *out, | 
| 1631 |  |                                 size_t len, ctr128_f stream) | 
| 1632 | 0 | { | 
| 1633 |  | #if defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1634 |  |     return CRYPTO_gcm128_decrypt(ctx, in, out, len); | 
| 1635 |  | #else | 
| 1636 | 0 |     const union { | 
| 1637 | 0 |         long one; | 
| 1638 | 0 |         char little; | 
| 1639 | 0 |     } is_endian = { 1 }; | 
| 1640 | 0 |     unsigned int n, ctr, mres; | 
| 1641 | 0 |     size_t i; | 
| 1642 | 0 |     u64 mlen = ctx->len.u[1]; | 
| 1643 | 0 |     void *key = ctx->key; | 
| 1644 | 0 | # ifdef GCM_FUNCREF_4BIT | 
| 1645 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 1646 | 0 | #  ifdef GHASH | 
| 1647 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 1648 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 1649 | 0 | #  endif | 
| 1650 | 0 | # endif | 
| 1651 |  | 
 | 
| 1652 | 0 |     mlen += len; | 
| 1653 | 0 |     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) | 
| 1654 | 0 |         return -1; | 
| 1655 | 0 |     ctx->len.u[1] = mlen; | 
| 1656 |  | 
 | 
| 1657 | 0 |     mres = ctx->mres; | 
| 1658 |  | 
 | 
| 1659 | 0 |     if (ctx->ares) { | 
| 1660 |  |         /* First call to decrypt finalizes GHASH(AAD) */ | 
| 1661 | 0 | # if defined(GHASH) | 
| 1662 | 0 |         if (len == 0) { | 
| 1663 | 0 |             GCM_MUL(ctx); | 
| 1664 | 0 |             ctx->ares = 0; | 
| 1665 | 0 |             return 0; | 
| 1666 | 0 |         } | 
| 1667 | 0 |         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); | 
| 1668 | 0 |         ctx->Xi.u[0] = 0; | 
| 1669 | 0 |         ctx->Xi.u[1] = 0; | 
| 1670 | 0 |         mres = sizeof(ctx->Xi); | 
| 1671 |  | # else | 
| 1672 |  |         GCM_MUL(ctx); | 
| 1673 |  | # endif | 
| 1674 | 0 |         ctx->ares = 0; | 
| 1675 | 0 |     } | 
| 1676 |  |  | 
| 1677 | 0 |     if (is_endian.little) | 
| 1678 |  | # ifdef BSWAP4 | 
| 1679 |  |         ctr = BSWAP4(ctx->Yi.d[3]); | 
| 1680 |  | # else | 
| 1681 | 0 |         ctr = GETU32(ctx->Yi.c + 12); | 
| 1682 | 0 | # endif | 
| 1683 | 0 |     else | 
| 1684 | 0 |         ctr = ctx->Yi.d[3]; | 
| 1685 |  | 
 | 
| 1686 | 0 |     n = mres % 16; | 
| 1687 | 0 |     if (n) { | 
| 1688 | 0 | # if defined(GHASH) | 
| 1689 | 0 |         while (n && len) { | 
| 1690 | 0 |             *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; | 
| 1691 | 0 |             --len; | 
| 1692 | 0 |             n = (n + 1) % 16; | 
| 1693 | 0 |         } | 
| 1694 | 0 |         if (n == 0) { | 
| 1695 | 0 |             GHASH(ctx, ctx->Xn, mres); | 
| 1696 | 0 |             mres = 0; | 
| 1697 | 0 |         } else { | 
| 1698 | 0 |             ctx->mres = mres; | 
| 1699 | 0 |             return 0; | 
| 1700 | 0 |         } | 
| 1701 |  | # else | 
| 1702 |  |         while (n && len) { | 
| 1703 |  |             u8 c = *(in++); | 
| 1704 |  |             *(out++) = c ^ ctx->EKi.c[n]; | 
| 1705 |  |             ctx->Xi.c[n] ^= c; | 
| 1706 |  |             --len; | 
| 1707 |  |             n = (n + 1) % 16; | 
| 1708 |  |         } | 
| 1709 |  |         if (n == 0) { | 
| 1710 |  |             GCM_MUL(ctx); | 
| 1711 |  |             mres = 0; | 
| 1712 |  |         } else { | 
| 1713 |  |             ctx->mres = n; | 
| 1714 |  |             return 0; | 
| 1715 |  |         } | 
| 1716 |  | # endif | 
| 1717 | 0 |     } | 
| 1718 | 0 | # if defined(GHASH) | 
| 1719 | 0 |     if (len >= 16 && mres) { | 
| 1720 | 0 |         GHASH(ctx, ctx->Xn, mres); | 
| 1721 | 0 |         mres = 0; | 
| 1722 | 0 |     } | 
| 1723 | 0 | #  if defined(GHASH_CHUNK) | 
| 1724 | 0 |     while (len >= GHASH_CHUNK) { | 
| 1725 | 0 |         GHASH(ctx, in, GHASH_CHUNK); | 
| 1726 | 0 |         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); | 
| 1727 | 0 |         ctr += GHASH_CHUNK / 16; | 
| 1728 | 0 |         if (is_endian.little) | 
| 1729 |  | #   ifdef BSWAP4 | 
| 1730 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1731 |  | #   else | 
| 1732 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1733 | 0 | #   endif | 
| 1734 | 0 |         else | 
| 1735 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1736 | 0 |         out += GHASH_CHUNK; | 
| 1737 | 0 |         in += GHASH_CHUNK; | 
| 1738 | 0 |         len -= GHASH_CHUNK; | 
| 1739 | 0 |     } | 
| 1740 | 0 | #  endif | 
| 1741 | 0 | # endif | 
| 1742 | 0 |     if ((i = (len & (size_t)-16))) { | 
| 1743 | 0 |         size_t j = i / 16; | 
| 1744 |  | 
 | 
| 1745 | 0 | # if defined(GHASH) | 
| 1746 | 0 |         GHASH(ctx, in, i); | 
| 1747 |  | # else | 
| 1748 |  |         while (j--) { | 
| 1749 |  |             size_t k; | 
| 1750 |  |             for (k = 0; k < 16; ++k) | 
| 1751 |  |                 ctx->Xi.c[k] ^= in[k]; | 
| 1752 |  |             GCM_MUL(ctx); | 
| 1753 |  |             in += 16; | 
| 1754 |  |         } | 
| 1755 |  |         j = i / 16; | 
| 1756 |  |         in -= i; | 
| 1757 |  | # endif | 
| 1758 | 0 |         (*stream) (in, out, j, key, ctx->Yi.c); | 
| 1759 | 0 |         ctr += (unsigned int)j; | 
| 1760 | 0 |         if (is_endian.little) | 
| 1761 |  | # ifdef BSWAP4 | 
| 1762 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1763 |  | # else | 
| 1764 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1765 | 0 | # endif | 
| 1766 | 0 |         else | 
| 1767 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1768 | 0 |         out += i; | 
| 1769 | 0 |         in += i; | 
| 1770 | 0 |         len -= i; | 
| 1771 | 0 |     } | 
| 1772 | 0 |     if (len) { | 
| 1773 | 0 |         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); | 
| 1774 | 0 |         ++ctr; | 
| 1775 | 0 |         if (is_endian.little) | 
| 1776 |  | # ifdef BSWAP4 | 
| 1777 |  |             ctx->Yi.d[3] = BSWAP4(ctr); | 
| 1778 |  | # else | 
| 1779 | 0 |             PUTU32(ctx->Yi.c + 12, ctr); | 
| 1780 | 0 | # endif | 
| 1781 | 0 |         else | 
| 1782 | 0 |             ctx->Yi.d[3] = ctr; | 
| 1783 | 0 |         while (len--) { | 
| 1784 | 0 | # if defined(GHASH) | 
| 1785 | 0 |             out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; | 
| 1786 |  | # else | 
| 1787 |  |             u8 c = in[n]; | 
| 1788 |  |             ctx->Xi.c[mres++] ^= c; | 
| 1789 |  |             out[n] = c ^ ctx->EKi.c[n]; | 
| 1790 |  | # endif | 
| 1791 | 0 |             ++n; | 
| 1792 | 0 |         } | 
| 1793 | 0 |     } | 
| 1794 |  | 
 | 
| 1795 | 0 |     ctx->mres = mres; | 
| 1796 | 0 |     return 0; | 
| 1797 | 0 | #endif | 
| 1798 | 0 | } | 
| 1799 |  |  | 
| 1800 |  | int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, | 
| 1801 |  |                          size_t len) | 
| 1802 | 0 | { | 
| 1803 | 0 |     const union { | 
| 1804 | 0 |         long one; | 
| 1805 | 0 |         char little; | 
| 1806 | 0 |     } is_endian = { 1 }; | 
| 1807 | 0 |     u64 alen = ctx->len.u[0] << 3; | 
| 1808 | 0 |     u64 clen = ctx->len.u[1] << 3; | 
| 1809 | 0 | #ifdef GCM_FUNCREF_4BIT | 
| 1810 | 0 |     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; | 
| 1811 | 0 | # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1812 | 0 |     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], | 
| 1813 | 0 |                          const u8 *inp, size_t len) = ctx->ghash; | 
| 1814 | 0 | # endif | 
| 1815 | 0 | #endif | 
| 1816 |  | 
 | 
| 1817 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1818 | 0 |     u128 bitlen; | 
| 1819 | 0 |     unsigned int mres = ctx->mres; | 
| 1820 |  | 
 | 
| 1821 | 0 |     if (mres) { | 
| 1822 | 0 |         unsigned blocks = (mres + 15) & -16; | 
| 1823 |  | 
 | 
| 1824 | 0 |         memset(ctx->Xn + mres, 0, blocks - mres); | 
| 1825 | 0 |         mres = blocks; | 
| 1826 | 0 |         if (mres == sizeof(ctx->Xn)) { | 
| 1827 | 0 |             GHASH(ctx, ctx->Xn, mres); | 
| 1828 | 0 |             mres = 0; | 
| 1829 | 0 |         } | 
| 1830 | 0 |     } else if (ctx->ares) { | 
| 1831 | 0 |         GCM_MUL(ctx); | 
| 1832 | 0 |     } | 
| 1833 |  | #else | 
| 1834 |  |     if (ctx->mres || ctx->ares) | 
| 1835 |  |         GCM_MUL(ctx); | 
| 1836 |  | #endif | 
| 1837 |  | 
 | 
| 1838 | 0 |     if (is_endian.little) { | 
| 1839 |  | #ifdef BSWAP8 | 
| 1840 |  |         alen = BSWAP8(alen); | 
| 1841 |  |         clen = BSWAP8(clen); | 
| 1842 |  | #else | 
| 1843 | 0 |         u8 *p = ctx->len.c; | 
| 1844 |  | 
 | 
| 1845 | 0 |         ctx->len.u[0] = alen; | 
| 1846 | 0 |         ctx->len.u[1] = clen; | 
| 1847 |  | 
 | 
| 1848 | 0 |         alen = (u64)GETU32(p) << 32 | GETU32(p + 4); | 
| 1849 | 0 |         clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); | 
| 1850 | 0 | #endif | 
| 1851 | 0 |     } | 
| 1852 |  | 
 | 
| 1853 | 0 | #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) | 
| 1854 | 0 |     bitlen.hi = alen; | 
| 1855 | 0 |     bitlen.lo = clen; | 
| 1856 | 0 |     memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); | 
| 1857 | 0 |     mres += sizeof(bitlen); | 
| 1858 | 0 |     GHASH(ctx, ctx->Xn, mres); | 
| 1859 |  | #else | 
| 1860 |  |     ctx->Xi.u[0] ^= alen; | 
| 1861 |  |     ctx->Xi.u[1] ^= clen; | 
| 1862 |  |     GCM_MUL(ctx); | 
| 1863 |  | #endif | 
| 1864 |  | 
 | 
| 1865 | 0 |     ctx->Xi.u[0] ^= ctx->EK0.u[0]; | 
| 1866 | 0 |     ctx->Xi.u[1] ^= ctx->EK0.u[1]; | 
| 1867 |  | 
 | 
| 1868 | 0 |     if (tag && len <= sizeof(ctx->Xi)) | 
| 1869 | 0 |         return CRYPTO_memcmp(ctx->Xi.c, tag, len); | 
| 1870 | 0 |     else | 
| 1871 | 0 |         return -1; | 
| 1872 | 0 | } | 
| 1873 |  |  | 
| 1874 |  | void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) | 
| 1875 | 0 | { | 
| 1876 | 0 |     CRYPTO_gcm128_finish(ctx, NULL, 0); | 
| 1877 | 0 |     memcpy(tag, ctx->Xi.c, | 
| 1878 | 0 |            len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); | 
| 1879 | 0 | } | 
| 1880 |  |  | 
| 1881 |  | GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) | 
| 1882 | 0 | { | 
| 1883 | 0 |     GCM128_CONTEXT *ret; | 
| 1884 |  | 
 | 
| 1885 | 0 |     if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) | 
| 1886 | 0 |         CRYPTO_gcm128_init(ret, key, block); | 
| 1887 |  | 
 | 
| 1888 | 0 |     return ret; | 
| 1889 | 0 | } | 
| 1890 |  |  | 
| 1891 |  | void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) | 
| 1892 | 0 | { | 
| 1893 | 0 |     OPENSSL_clear_free(ctx, sizeof(*ctx)); | 
| 1894 | 0 | } |