Line | Count | Source (jump to first uncovered line) |
1 | | // gcm.cpp - originally written and placed in the public domain by Wei Dai. |
2 | | // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless |
3 | | // multiply routines are less efficient because they shadow x86. |
4 | | // The precomputed key table integration makes it tricky to use the |
5 | | // more efficient ARMv8 implementation of the multiply and reduce. |
6 | | |
7 | | // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code |
8 | | |
9 | | #include "pch.h" |
10 | | #include "config.h" |
11 | | |
12 | | #ifndef CRYPTOPP_IMPORTS |
13 | | #ifndef CRYPTOPP_GENERATE_X64_MASM |
14 | | |
15 | | // Visual Studio .Net 2003 compiler crash |
16 | | #if defined(CRYPTOPP_MSC_VERSION) && (CRYPTOPP_MSC_VERSION < 1400) |
17 | | # pragma optimize("", off) |
18 | | #endif |
19 | | |
20 | | #include "gcm.h" |
21 | | #include "cpu.h" |
22 | | |
23 | | #if defined(CRYPTOPP_DISABLE_GCM_ASM) |
24 | | # undef CRYPTOPP_X86_ASM_AVAILABLE |
25 | | # undef CRYPTOPP_X32_ASM_AVAILABLE |
26 | | # undef CRYPTOPP_X64_ASM_AVAILABLE |
27 | | # undef CRYPTOPP_SSE2_ASM_AVAILABLE |
28 | | #endif |
29 | | |
30 | | NAMESPACE_BEGIN(CryptoPP) |
31 | | |
32 | | #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
33 | | // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs |
34 | | // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0' |
35 | | #if defined(CRYPTOPP_DISABLE_MIXED_ASM) |
36 | | // 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path. |
37 | | # define USE_MOVD_REG32 1 |
38 | | #elif defined(__GNUC__) || defined(CRYPTOPP_MSC_VERSION) |
39 | | // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64. |
40 | | # define USE_MOVD_REG32_OR_REG64 1 |
41 | | #else |
42 | | // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64. |
43 | | # define USE_MOV_REG32_OR_REG64 1 |
44 | | #endif |
45 | | #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 |
46 | | |
47 | | word16 GCM_Base::s_reductionTable[256]; |
48 | | volatile bool GCM_Base::s_reductionTableInitialized = false; |
49 | | |
50 | | void GCM_Base::GCTR::IncrementCounterBy256() |
51 | 64 | { |
52 | 64 | IncrementCounterByOne(m_counterArray+BlockSize()-4, 3); |
53 | 64 | } |
54 | | |
55 | | static inline void Xor16(byte *a, const byte *b, const byte *c) |
56 | 0 | { |
57 | 0 | CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>())); |
58 | 0 | CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>())); |
59 | 0 | CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>())); |
60 | 0 | ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0]; |
61 | 0 | ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1]; |
62 | 0 | } |
63 | | |
64 | | #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE |
65 | | // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in |
66 | | // a source file with a SSE architecture switch. Also see GH #226 and GH #284. |
67 | | extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c); |
68 | | #endif // SSE2 |
69 | | |
70 | | #if CRYPTOPP_ARM_NEON_AVAILABLE |
71 | | extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); |
72 | | #endif |
73 | | |
74 | | #if CRYPTOPP_POWER8_AVAILABLE |
75 | | extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c); |
76 | | #endif |
77 | | |
78 | | #if CRYPTOPP_CLMUL_AVAILABLE |
79 | | extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize); |
80 | | extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); |
81 | | const unsigned int s_cltableSizeInBlocks = 8; |
82 | | extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer); |
83 | | #endif // CRYPTOPP_CLMUL_AVAILABLE |
84 | | |
85 | | #if CRYPTOPP_ARM_PMULL_AVAILABLE |
86 | | extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize); |
87 | | extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); |
88 | | const unsigned int s_cltableSizeInBlocks = 8; |
89 | | extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer); |
90 | | #endif // CRYPTOPP_ARM_PMULL_AVAILABLE |
91 | | |
92 | | #if CRYPTOPP_POWER8_VMULL_AVAILABLE |
93 | | extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize); |
94 | | extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer); |
95 | | const unsigned int s_cltableSizeInBlocks = 8; |
96 | | extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer); |
97 | | #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE |
98 | | |
99 | | void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs ¶ms) |
100 | 97 | { |
101 | 97 | BlockCipher &blockCipher = AccessBlockCipher(); |
102 | 97 | blockCipher.SetKey(userKey, keylength, params); |
103 | | |
104 | | // GCM is only defined for 16-byte block ciphers at the moment. |
105 | | // However, variable blocksize support means we have to defer |
106 | | // blocksize checks to runtime after the key is set. Also see |
107 | | // https://github.com/weidai11/cryptopp/issues/408. |
108 | 97 | const unsigned int blockSize = blockCipher.BlockSize(); |
109 | 97 | CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE); |
110 | 97 | if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE) |
111 | 0 | throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16"); |
112 | | |
113 | 97 | int tableSize, i, j, k; |
114 | | |
115 | 97 | #if CRYPTOPP_CLMUL_AVAILABLE |
116 | 97 | if (HasCLMUL()) |
117 | 84 | { |
118 | | // Avoid "parameter not used" error and suppress Coverity finding |
119 | 84 | (void)params.GetIntValue(Name::TableSize(), tableSize); |
120 | 84 | tableSize = s_cltableSizeInBlocks * blockSize; |
121 | 84 | CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize)); |
122 | 84 | } |
123 | 13 | else |
124 | | #elif CRYPTOPP_ARM_PMULL_AVAILABLE |
125 | | if (HasPMULL()) |
126 | | { |
127 | | // Avoid "parameter not used" error and suppress Coverity finding |
128 | | (void)params.GetIntValue(Name::TableSize(), tableSize); |
129 | | tableSize = s_cltableSizeInBlocks * blockSize; |
130 | | CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize)); |
131 | | } |
132 | | else |
133 | | #elif CRYPTOPP_POWER8_VMULL_AVAILABLE |
134 | | if (HasPMULL()) |
135 | | { |
136 | | // Avoid "parameter not used" error and suppress Coverity finding |
137 | | (void)params.GetIntValue(Name::TableSize(), tableSize); |
138 | | tableSize = s_cltableSizeInBlocks * blockSize; |
139 | | CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize)); |
140 | | } |
141 | | else |
142 | | #endif |
143 | 13 | { |
144 | 13 | if (params.GetIntValue(Name::TableSize(), tableSize)) |
145 | 0 | tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024; |
146 | 13 | else |
147 | 13 | tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024; |
148 | | |
149 | | //#if defined(CRYPTOPP_MSC_VERSION) && (CRYPTOPP_MSC_VERSION < 1400) |
150 | | // VC 2003 workaround: compiler generates bad code for 64K tables |
151 | | //tableSize = 2*1024; |
152 | | //#endif |
153 | 13 | } |
154 | | |
155 | 97 | m_buffer.resize(3*blockSize + tableSize); |
156 | 97 | byte *mulTable = MulTable(); |
157 | 97 | byte *hashKey = HashKey(); |
158 | 97 | std::memset(hashKey, 0, REQUIRED_BLOCKSIZE); |
159 | 97 | blockCipher.ProcessBlock(hashKey); |
160 | | |
161 | 97 | #if CRYPTOPP_CLMUL_AVAILABLE |
162 | 97 | if (HasCLMUL()) |
163 | 84 | { |
164 | 84 | GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize); |
165 | 84 | return; |
166 | 84 | } |
167 | | #elif CRYPTOPP_ARM_PMULL_AVAILABLE |
168 | | if (HasPMULL()) |
169 | | { |
170 | | GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize); |
171 | | return; |
172 | | } |
173 | | #elif CRYPTOPP_POWER8_VMULL_AVAILABLE |
174 | | if (HasPMULL()) |
175 | | { |
176 | | GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize); |
177 | | return; |
178 | | } |
179 | | #endif |
180 | | |
181 | 13 | word64 V0, V1; |
182 | 13 | typedef BlockGetAndPut<word64, BigEndian> Block; |
183 | 13 | Block::Get(hashKey)(V0)(V1); |
184 | | |
185 | 13 | if (tableSize == 64*1024) |
186 | 0 | { |
187 | 0 | for (i=0; i<128; i++) |
188 | 0 | { |
189 | 0 | k = i%8; |
190 | 0 | Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1); |
191 | |
|
192 | 0 | int x = (int)V1 & 1; |
193 | 0 | V1 = (V1>>1) | (V0<<63); |
194 | 0 | V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); |
195 | 0 | } |
196 | |
|
197 | 0 | for (i=0; i<16; i++) |
198 | 0 | { |
199 | 0 | std::memset(mulTable+i*256*16, 0, 16); |
200 | 0 | #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE |
201 | 0 | if (HasSSE2()) |
202 | 0 | for (j=2; j<=0x80; j*=2) |
203 | 0 | for (k=1; k<j; k++) |
204 | 0 | GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); |
205 | 0 | else |
206 | | #elif CRYPTOPP_ARM_NEON_AVAILABLE |
207 | | if (HasNEON()) |
208 | | for (j=2; j<=0x80; j*=2) |
209 | | for (k=1; k<j; k++) |
210 | | GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); |
211 | | else |
212 | | #elif CRYPTOPP_POWER8_AVAILABLE |
213 | | if (HasPower8()) |
214 | | for (j=2; j<=0x80; j*=2) |
215 | | for (k=1; k<j; k++) |
216 | | GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); |
217 | | else |
218 | | #endif |
219 | 0 | for (j=2; j<=0x80; j*=2) |
220 | 0 | for (k=1; k<j; k++) |
221 | 0 | Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); |
222 | 0 | } |
223 | 0 | } |
224 | 13 | else |
225 | 13 | { |
226 | 13 | if (!s_reductionTableInitialized) |
227 | 0 | { |
228 | 0 | s_reductionTable[0] = 0; |
229 | 0 | word16 x = 0x01c2; |
230 | 0 | s_reductionTable[1] = ByteReverse(x); |
231 | 0 | for (unsigned int ii=2; ii<=0x80; ii*=2) |
232 | 0 | { |
233 | 0 | x <<= 1; |
234 | 0 | s_reductionTable[ii] = ByteReverse(x); |
235 | 0 | for (unsigned int jj=1; jj<ii; jj++) |
236 | 0 | s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj]; |
237 | 0 | } |
238 | 0 | s_reductionTableInitialized = true; |
239 | 0 | } |
240 | | |
241 | 13 | for (i=0; i<128-24; i++) |
242 | 0 | { |
243 | 0 | k = i%32; |
244 | 0 | if (k < 4) |
245 | 0 | Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1); |
246 | 0 | else if (k < 8) |
247 | 0 | Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1); |
248 | |
|
249 | 0 | int x = (int)V1 & 1; |
250 | 0 | V1 = (V1>>1) | (V0<<63); |
251 | 0 | V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0); |
252 | 0 | } |
253 | | |
254 | 13 | for (i=0; i<4; i++) |
255 | 0 | { |
256 | 0 | std::memset(mulTable+i*256, 0, 16); |
257 | 0 | std::memset(mulTable+1024+i*256, 0, 16); |
258 | 0 | #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE |
259 | 0 | if (HasSSE2()) |
260 | 0 | for (j=2; j<=8; j*=2) |
261 | 0 | for (k=1; k<j; k++) |
262 | 0 | { |
263 | 0 | GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); |
264 | 0 | GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); |
265 | 0 | } |
266 | 0 | else |
267 | | #elif CRYPTOPP_ARM_NEON_AVAILABLE |
268 | | if (HasNEON()) |
269 | | for (j=2; j<=8; j*=2) |
270 | | for (k=1; k<j; k++) |
271 | | { |
272 | | GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); |
273 | | GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); |
274 | | } |
275 | | else |
276 | | #elif CRYPTOPP_POWER8_AVAILABLE |
277 | | if (HasPower8()) |
278 | | for (j=2; j<=8; j*=2) |
279 | | for (k=1; k<j; k++) |
280 | | { |
281 | | GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); |
282 | | GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); |
283 | | } |
284 | | else |
285 | | #endif |
286 | 0 | for (j=2; j<=8; j*=2) |
287 | 0 | for (k=1; k<j; k++) |
288 | 0 | { |
289 | 0 | Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); |
290 | 0 | Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); |
291 | 0 | } |
292 | 0 | } |
293 | 13 | } |
294 | 13 | } |
295 | | |
296 | | inline void GCM_Base::ReverseHashBufferIfNeeded() |
297 | 120 | { |
298 | 120 | #if CRYPTOPP_CLMUL_AVAILABLE |
299 | 120 | if (HasCLMUL()) |
300 | 120 | { |
301 | 120 | GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer()); |
302 | 120 | } |
303 | | #elif CRYPTOPP_ARM_PMULL_AVAILABLE |
304 | | if (HasPMULL()) |
305 | | { |
306 | | GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer()); |
307 | | } |
308 | | #elif CRYPTOPP_POWER8_VMULL_AVAILABLE |
309 | | if (HasPMULL()) |
310 | | { |
311 | | GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer()); |
312 | | } |
313 | | #endif |
314 | 120 | } |
315 | | |
316 | | void GCM_Base::Resync(const byte *iv, size_t len) |
317 | 78 | { |
318 | 78 | BlockCipher &cipher = AccessBlockCipher(); |
319 | 78 | byte *hashBuffer = HashBuffer(); |
320 | | |
321 | 78 | if (len == 12) |
322 | 0 | { |
323 | 0 | std::memcpy(hashBuffer, iv, len); |
324 | 0 | std::memset(hashBuffer+len, 0, 3); |
325 | 0 | hashBuffer[len+3] = 1; |
326 | 0 | } |
327 | 78 | else |
328 | 78 | { |
329 | 78 | size_t origLen = len; |
330 | 78 | std::memset(hashBuffer, 0, HASH_BLOCKSIZE); |
331 | | |
332 | 78 | if (len >= HASH_BLOCKSIZE) |
333 | 66 | { |
334 | 66 | len = GCM_Base::AuthenticateBlocks(iv, len); |
335 | 66 | iv += (origLen - len); |
336 | 66 | } |
337 | | |
338 | 78 | if (len > 0) |
339 | 29 | { |
340 | 29 | std::memcpy(m_buffer, iv, len); |
341 | 29 | std::memset(m_buffer+len, 0, HASH_BLOCKSIZE-len); |
342 | 29 | GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); |
343 | 29 | } |
344 | | |
345 | 78 | PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8); |
346 | 78 | GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); |
347 | | |
348 | 78 | ReverseHashBufferIfNeeded(); |
349 | 78 | } |
350 | | |
351 | 78 | if (m_state >= State_IVSet) |
352 | 0 | m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE); |
353 | 78 | else |
354 | 78 | m_ctr.SetCipherWithIV(cipher, hashBuffer); |
355 | | |
356 | 78 | m_ctr.Seek(HASH_BLOCKSIZE); |
357 | | |
358 | 78 | std::memset(hashBuffer, 0, HASH_BLOCKSIZE); |
359 | 78 | } |
360 | | |
361 | | unsigned int GCM_Base::OptimalDataAlignment() const |
362 | 0 | { |
363 | 0 | return |
364 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) |
365 | | HasSSE2() ? 16 : |
366 | | #elif CRYPTOPP_ARM_NEON_AVAILABLE |
367 | | HasNEON() ? 4 : |
368 | | #elif CRYPTOPP_POWER8_AVAILABLE |
369 | | HasPower8() ? 16 : |
370 | | #endif |
371 | 0 | GetBlockCipher().OptimalDataAlignment(); |
372 | 0 | } |
373 | | |
374 | | #if CRYPTOPP_MSC_VERSION |
375 | | # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code |
376 | | #endif |
377 | | |
378 | | #endif // Not CRYPTOPP_GENERATE_X64_MASM |
379 | | |
380 | | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
381 | | extern "C" { |
382 | | void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable); |
383 | | void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer); |
384 | | } |
385 | | #endif |
386 | | |
387 | | #ifndef CRYPTOPP_GENERATE_X64_MASM |
388 | | |
389 | | size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len) |
390 | 462 | { |
391 | 462 | #if CRYPTOPP_CLMUL_AVAILABLE |
392 | 462 | if (HasCLMUL()) |
393 | 462 | { |
394 | 462 | return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer()); |
395 | 462 | } |
396 | | #elif CRYPTOPP_ARM_PMULL_AVAILABLE |
397 | | if (HasPMULL()) |
398 | | { |
399 | | return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer()); |
400 | | } |
401 | | #elif CRYPTOPP_POWER8_VMULL_AVAILABLE |
402 | | if (HasPMULL()) |
403 | | { |
404 | | return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer()); |
405 | | } |
406 | | #endif |
407 | | |
408 | 0 | typedef BlockGetAndPut<word64, NativeByteOrder> Block; |
409 | 0 | word64 *hashBuffer = (word64 *)(void *)HashBuffer(); |
410 | 0 | CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>())); |
411 | |
|
412 | 0 | switch (2*(m_buffer.size()>=64*1024) |
413 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) |
414 | | + HasSSE2() |
415 | | //#elif CRYPTOPP_ARM_NEON_AVAILABLE |
416 | | // + HasNEON() |
417 | | #endif |
418 | 0 | ) |
419 | 0 | { |
420 | 0 | case 0: // non-SSE2 and 2K tables |
421 | 0 | { |
422 | 0 | byte *mulTable = MulTable(); |
423 | 0 | word64 x0 = hashBuffer[0], x1 = hashBuffer[1]; |
424 | |
|
425 | 0 | do |
426 | 0 | { |
427 | 0 | word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1; |
428 | 0 | Block::Get(data)(y0)(y1); |
429 | 0 | x0 ^= y0; |
430 | 0 | x1 ^= y1; |
431 | |
|
432 | 0 | data += HASH_BLOCKSIZE; |
433 | 0 | len -= HASH_BLOCKSIZE; |
434 | |
|
435 | 0 | #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8) |
436 | |
|
437 | 0 | #if (CRYPTOPP_LITTLE_ENDIAN) |
438 | | #if CRYPTOPP_BOOL_SLOW_WORD64 |
439 | | word32 z0 = (word32)x0; |
440 | | word32 z1 = (word32)(x0>>32); |
441 | | word32 z2 = (word32)x1; |
442 | | word32 z3 = (word32)(x1>>32); |
443 | | #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e) |
444 | | #else |
445 | 0 | #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e) |
446 | 0 | #endif |
447 | 0 | #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8) |
448 | 0 | #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8; |
449 | | #else |
450 | | #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e) |
451 | | #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff) |
452 | | #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8; |
453 | | #endif |
454 | |
|
455 | 0 | #define GF_MUL_32BY128(op, a, b, c) \ |
456 | 0 | a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \ |
457 | 0 | a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \ |
458 | 0 | b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \ |
459 | 0 | b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \ |
460 | 0 | c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \ |
461 | 0 | c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \ |
462 | 0 | d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \ |
463 | 0 | d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \ |
464 | 0 |
|
465 | 0 | GF_MUL_32BY128(=, 0, 0, 0) |
466 | 0 | GF_MUL_32BY128(^=, 0, 1, 1) |
467 | 0 | GF_MUL_32BY128(^=, 1, 0, 2) |
468 | 0 | GF_MUL_32BY128(^=, 1, 1, 3) |
469 | |
|
470 | 0 | word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16; |
471 | 0 | GF_SHIFT_8(d) |
472 | 0 | c0 ^= d0; c1 ^= d1; |
473 | 0 | r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8; |
474 | 0 | GF_SHIFT_8(c) |
475 | 0 | b0 ^= c0; b1 ^= c1; |
476 | 0 | r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)]; |
477 | 0 | GF_SHIFT_8(b) |
478 | 0 | a0 ^= b0; a1 ^= b1; |
479 | 0 | a0 ^= ConditionalByteReverse<word64>(LITTLE_ENDIAN_ORDER, r); |
480 | 0 | x0 = a0; x1 = a1; |
481 | 0 | } |
482 | 0 | while (len >= HASH_BLOCKSIZE); |
483 | |
|
484 | 0 | hashBuffer[0] = x0; hashBuffer[1] = x1; |
485 | 0 | return len; |
486 | 0 | } |
487 | | |
488 | 0 | case 2: // non-SSE2 and 64K tables |
489 | 0 | { |
490 | 0 | byte *mulTable = MulTable(); |
491 | 0 | word64 x0 = hashBuffer[0], x1 = hashBuffer[1]; |
492 | |
|
493 | 0 | do |
494 | 0 | { |
495 | 0 | word64 y0, y1, a0, a1; |
496 | 0 | Block::Get(data)(y0)(y1); |
497 | 0 | x0 ^= y0; |
498 | 0 | x1 ^= y1; |
499 | |
|
500 | 0 | data += HASH_BLOCKSIZE; |
501 | 0 | len -= HASH_BLOCKSIZE; |
502 | |
|
503 | 0 | #undef READ_TABLE_WORD64_COMMON |
504 | 0 | #undef READ_TABLE_WORD64 |
505 | |
|
506 | 0 | #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8) |
507 | |
|
508 | 0 | #if (CRYPTOPP_LITTLE_ENDIAN) |
509 | | #if CRYPTOPP_BOOL_SLOW_WORD64 |
510 | | word32 z0 = (word32)x0; |
511 | | word32 z1 = (word32)(x0>>32); |
512 | | word32 z2 = (word32)x1; |
513 | | word32 z3 = (word32)(x1>>32); |
514 | | #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e) |
515 | | #else |
516 | 0 | #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e) |
517 | 0 | #endif |
518 | | #else |
519 | | #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e) |
520 | | #endif |
521 | |
|
522 | 0 | #define GF_MUL_8BY128(op, b, c, d) \ |
523 | 0 | a0 op READ_TABLE_WORD64(b, c, d, 0);\ |
524 | 0 | a1 op READ_TABLE_WORD64(b, c, d, 1);\ |
525 | 0 |
|
526 | 0 | GF_MUL_8BY128(=, 0, 0, 0) |
527 | 0 | GF_MUL_8BY128(^=, 0, 0, 1) |
528 | 0 | GF_MUL_8BY128(^=, 0, 0, 2) |
529 | 0 | GF_MUL_8BY128(^=, 0, 0, 3) |
530 | 0 | GF_MUL_8BY128(^=, 0, 1, 0) |
531 | 0 | GF_MUL_8BY128(^=, 0, 1, 1) |
532 | 0 | GF_MUL_8BY128(^=, 0, 1, 2) |
533 | 0 | GF_MUL_8BY128(^=, 0, 1, 3) |
534 | 0 | GF_MUL_8BY128(^=, 1, 2, 0) |
535 | 0 | GF_MUL_8BY128(^=, 1, 2, 1) |
536 | 0 | GF_MUL_8BY128(^=, 1, 2, 2) |
537 | 0 | GF_MUL_8BY128(^=, 1, 2, 3) |
538 | 0 | GF_MUL_8BY128(^=, 1, 3, 0) |
539 | 0 | GF_MUL_8BY128(^=, 1, 3, 1) |
540 | 0 | GF_MUL_8BY128(^=, 1, 3, 2) |
541 | 0 | GF_MUL_8BY128(^=, 1, 3, 3) |
542 | |
|
543 | 0 | x0 = a0; x1 = a1; |
544 | 0 | } |
545 | 0 | while (len >= HASH_BLOCKSIZE); |
546 | |
|
547 | 0 | hashBuffer[0] = x0; hashBuffer[1] = x1; |
548 | 0 | return len; |
549 | 0 | } |
550 | 0 | #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM |
551 | |
|
552 | | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
553 | | case 1: // SSE2 and 2K tables |
554 | | GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable); |
555 | | return len % 16; |
556 | | case 3: // SSE2 and 64K tables |
557 | | GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer); |
558 | | return len % 16; |
559 | | #endif |
560 | |
|
561 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
562 | | |
563 | | case 1: // SSE2 and 2K tables |
564 | | { |
565 | | #ifdef __GNUC__ |
566 | | __asm__ __volatile__ |
567 | | ( |
568 | | INTEL_NOPREFIX |
569 | | #elif defined(CRYPTOPP_GENERATE_X64_MASM) |
570 | | ALIGN 8 |
571 | | GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME |
572 | | rex_push_reg rsi |
573 | | push_reg rdi |
574 | | push_reg rbx |
575 | | .endprolog |
576 | | mov rsi, r8 |
577 | | mov r11, r9 |
578 | | #else |
579 | | AS2( mov WORD_REG(cx), data ) |
580 | | AS2( mov WORD_REG(dx), len ) |
581 | | AS2( mov WORD_REG(si), hashBuffer ) |
582 | | AS2( shr WORD_REG(dx), 4 ) |
583 | | #endif |
584 | | |
585 | | #if CRYPTOPP_BOOL_X32 |
586 | | AS1(push rbx) |
587 | | AS1(push rbp) |
588 | | #else |
589 | | AS_PUSH_IF86( bx) |
590 | | AS_PUSH_IF86( bp) |
591 | | #endif |
592 | | |
593 | | #ifdef __GNUC__ |
594 | | AS2( mov AS_REG_7, WORD_REG(di)) |
595 | | #elif CRYPTOPP_BOOL_X86 |
596 | | AS2( lea AS_REG_7, s_reductionTable) |
597 | | #endif |
598 | | |
599 | | AS2( movdqa xmm0, [WORD_REG(si)] ) |
600 | | |
601 | | #define MUL_TABLE_0 WORD_REG(si) + 32 |
602 | | #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024 |
603 | | #define RED_TABLE AS_REG_7 |
604 | | |
605 | | ASL(0) |
606 | | AS2( movdqu xmm4, [WORD_REG(cx)] ) |
607 | | AS2( pxor xmm0, xmm4 ) |
608 | | |
609 | | AS2( movd ebx, xmm0 ) |
610 | | AS2( mov eax, AS_HEX(f0f0f0f0) ) |
611 | | AS2( and eax, ebx ) |
612 | | AS2( shl ebx, 4 ) |
613 | | AS2( and ebx, AS_HEX(f0f0f0f0) ) |
614 | | AS2( movzx edi, ah ) |
615 | | AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) |
616 | | AS2( movzx edi, al ) |
617 | | AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) |
618 | | AS2( shr eax, 16 ) |
619 | | AS2( movzx edi, ah ) |
620 | | AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) |
621 | | AS2( movzx edi, al ) |
622 | | AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] ) |
623 | | |
624 | | #define SSE2_MUL_32BITS(i) \ |
625 | | AS2( psrldq xmm0, 4 )\ |
626 | | AS2( movd eax, xmm0 )\ |
627 | | AS2( and eax, AS_HEX(f0f0f0f0) )\ |
628 | | AS2( movzx edi, bh )\ |
629 | | AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ |
630 | | AS2( movzx edi, bl )\ |
631 | | AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ |
632 | | AS2( shr ebx, 16 )\ |
633 | | AS2( movzx edi, bh )\ |
634 | | AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ |
635 | | AS2( movzx edi, bl )\ |
636 | | AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\ |
637 | | AS2( movd ebx, xmm0 )\ |
638 | | AS2( shl ebx, 4 )\ |
639 | | AS2( and ebx, AS_HEX(f0f0f0f0) )\ |
640 | | AS2( movzx edi, ah )\ |
641 | | AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ |
642 | | AS2( movzx edi, al )\ |
643 | | AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ |
644 | | AS2( shr eax, 16 )\ |
645 | | AS2( movzx edi, ah )\ |
646 | | AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ |
647 | | AS2( movzx edi, al )\ |
648 | | AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\ |
649 | | |
650 | | SSE2_MUL_32BITS(1) |
651 | | SSE2_MUL_32BITS(2) |
652 | | SSE2_MUL_32BITS(3) |
653 | | |
654 | | AS2( movzx edi, bh ) |
655 | | AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) |
656 | | AS2( movzx edi, bl ) |
657 | | AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) |
658 | | AS2( shr ebx, 16 ) |
659 | | AS2( movzx edi, bh ) |
660 | | AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) |
661 | | AS2( movzx edi, bl ) |
662 | | AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] ) |
663 | | |
664 | | AS2( movdqa xmm0, xmm3 ) |
665 | | AS2( pslldq xmm3, 1 ) |
666 | | AS2( pxor xmm2, xmm3 ) |
667 | | AS2( movdqa xmm1, xmm2 ) |
668 | | AS2( pslldq xmm2, 1 ) |
669 | | AS2( pxor xmm5, xmm2 ) |
670 | | |
671 | | AS2( psrldq xmm0, 15 ) |
672 | | #if USE_MOVD_REG32 |
673 | | AS2( movd edi, xmm0 ) |
674 | | #elif USE_MOV_REG32_OR_REG64 |
675 | | AS2( mov WORD_REG(di), xmm0 ) |
676 | | #else // GNU Assembler |
677 | | AS2( movd WORD_REG(di), xmm0 ) |
678 | | #endif |
679 | | AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) |
680 | | AS2( shl eax, 8 ) |
681 | | |
682 | | AS2( movdqa xmm0, xmm5 ) |
683 | | AS2( pslldq xmm5, 1 ) |
684 | | AS2( pxor xmm4, xmm5 ) |
685 | | |
686 | | AS2( psrldq xmm1, 15 ) |
687 | | #if USE_MOVD_REG32 |
688 | | AS2( movd edi, xmm1 ) |
689 | | #elif USE_MOV_REG32_OR_REG64 |
690 | | AS2( mov WORD_REG(di), xmm1 ) |
691 | | #else |
692 | | AS2( movd WORD_REG(di), xmm1 ) |
693 | | #endif |
694 | | AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) |
695 | | AS2( shl eax, 8 ) |
696 | | |
697 | | AS2( psrldq xmm0, 15 ) |
698 | | #if USE_MOVD_REG32 |
699 | | AS2( movd edi, xmm0 ) |
700 | | #elif USE_MOV_REG32_OR_REG64 |
701 | | AS2( mov WORD_REG(di), xmm0 ) |
702 | | #else |
703 | | AS2( movd WORD_REG(di), xmm0 ) |
704 | | #endif |
705 | | AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] ) |
706 | | |
707 | | AS2( movd xmm0, eax ) |
708 | | AS2( pxor xmm0, xmm4 ) |
709 | | |
710 | | AS2( add WORD_REG(cx), 16 ) |
711 | | AS2( sub WORD_REG(dx), 1 ) |
712 | | // ATT_NOPREFIX |
713 | | ASJ( jnz, 0, b ) |
714 | | INTEL_NOPREFIX |
715 | | AS2( movdqa [WORD_REG(si)], xmm0 ) |
716 | | |
717 | | #if CRYPTOPP_BOOL_X32 |
718 | | AS1(pop rbp) |
719 | | AS1(pop rbx) |
720 | | #else |
721 | | AS_POP_IF86( bp) |
722 | | AS_POP_IF86( bx) |
723 | | #endif |
724 | | |
725 | | #ifdef __GNUC__ |
726 | | ATT_PREFIX |
727 | | : |
728 | | : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable) |
729 | | : "memory", "cc", "%eax", "%ebx" |
730 | | #if (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
731 | | , PERCENT_REG(AS_REG_7), "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5" |
732 | | #endif |
733 | | ); |
734 | | #elif defined(CRYPTOPP_GENERATE_X64_MASM) |
735 | | pop rbx |
736 | | pop rdi |
737 | | pop rsi |
738 | | ret |
739 | | GCM_AuthenticateBlocks_2K_SSE2 ENDP |
740 | | #endif |
741 | | |
742 | | return len%16; |
743 | | } |
744 | | case 3: // SSE2 and 64K tables |
745 | | { |
746 | | #ifdef __GNUC__ |
747 | | __asm__ __volatile__ |
748 | | ( |
749 | | INTEL_NOPREFIX |
750 | | #elif defined(CRYPTOPP_GENERATE_X64_MASM) |
751 | | ALIGN 8 |
752 | | GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME |
753 | | rex_push_reg rsi |
754 | | push_reg rdi |
755 | | .endprolog |
756 | | mov rsi, r8 |
757 | | #else |
758 | | AS2( mov WORD_REG(cx), data ) |
759 | | AS2( mov WORD_REG(dx), len ) |
760 | | AS2( mov WORD_REG(si), hashBuffer ) |
761 | | AS2( shr WORD_REG(dx), 4 ) |
762 | | #endif |
763 | | |
764 | | AS2( movdqa xmm0, [WORD_REG(si)] ) |
765 | | |
766 | | #undef MUL_TABLE |
767 | | #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16 |
768 | | |
769 | | ASL(1) |
770 | | AS2( movdqu xmm1, [WORD_REG(cx)] ) |
771 | | AS2( pxor xmm1, xmm0 ) |
772 | | AS2( pxor xmm0, xmm0 ) |
773 | | |
774 | | #undef SSE2_MUL_32BITS |
775 | | #define SSE2_MUL_32BITS(i) \ |
776 | | AS2( movd eax, xmm1 )\ |
777 | | AS2( psrldq xmm1, 4 )\ |
778 | | AS2( movzx edi, al )\ |
779 | | AS2( add WORD_REG(di), WORD_REG(di) )\ |
780 | | AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\ |
781 | | AS2( movzx edi, ah )\ |
782 | | AS2( add WORD_REG(di), WORD_REG(di) )\ |
783 | | AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\ |
784 | | AS2( shr eax, 16 )\ |
785 | | AS2( movzx edi, al )\ |
786 | | AS2( add WORD_REG(di), WORD_REG(di) )\ |
787 | | AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\ |
788 | | AS2( movzx edi, ah )\ |
789 | | AS2( add WORD_REG(di), WORD_REG(di) )\ |
790 | | AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\ |
791 | | |
792 | | SSE2_MUL_32BITS(0) |
793 | | SSE2_MUL_32BITS(1) |
794 | | SSE2_MUL_32BITS(2) |
795 | | SSE2_MUL_32BITS(3) |
796 | | |
797 | | AS2( add WORD_REG(cx), 16 ) |
798 | | AS2( sub WORD_REG(dx), 1 ) |
799 | | // ATT_NOPREFIX |
800 | | ASJ( jnz, 1, b ) |
801 | | INTEL_NOPREFIX |
802 | | AS2( movdqa [WORD_REG(si)], xmm0 ) |
803 | | |
804 | | #ifdef __GNUC__ |
805 | | ATT_PREFIX |
806 | | : |
807 | | : "c" (data), "d" (len/16), "S" (hashBuffer) |
808 | | : "memory", "cc", "%edi", "%eax" |
809 | | #if (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
810 | | , "%xmm0", "%xmm1" |
811 | | #endif |
812 | | ); |
813 | | #elif defined(CRYPTOPP_GENERATE_X64_MASM) |
814 | | pop rdi |
815 | | pop rsi |
816 | | ret |
817 | | GCM_AuthenticateBlocks_64K_SSE2 ENDP |
818 | | #endif |
819 | | |
820 | | return len%16; |
821 | | } |
822 | | #endif |
823 | 0 | #ifndef CRYPTOPP_GENERATE_X64_MASM |
824 | 0 | } |
825 | | |
826 | 0 | return len%16; |
827 | 0 | } |
828 | | |
829 | | void GCM_Base::AuthenticateLastHeaderBlock() |
830 | 120 | { |
831 | 120 | if (m_bufferedDataLength > 0) |
832 | 88 | { |
833 | 88 | std::memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength); |
834 | 88 | m_bufferedDataLength = 0; |
835 | 88 | GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); |
836 | 88 | } |
837 | 120 | } |
838 | | |
839 | | void GCM_Base::AuthenticateLastConfidentialBlock() |
840 | 42 | { |
841 | 42 | GCM_Base::AuthenticateLastHeaderBlock(); |
842 | 42 | PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8); |
843 | 42 | GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE); |
844 | 42 | } |
845 | | |
846 | | void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize) |
847 | 42 | { |
848 | 42 | m_ctr.Seek(0); |
849 | 42 | ReverseHashBufferIfNeeded(); |
850 | 42 | m_ctr.ProcessData(mac, HashBuffer(), macSize); |
851 | 42 | } |
852 | | |
853 | | NAMESPACE_END |
854 | | |
855 | | #endif // Not CRYPTOPP_GENERATE_X64_MASM |
856 | | #endif |