Line | Count | Source (jump to first uncovered line) |
1 | | // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c |
2 | | |
3 | | // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey |
4 | | // Walton implemented Intel SHA extensions based on Intel articles and code |
5 | | // by Sean Gulley. Jeffrey Walton implemented ARM SHA-1 and SHA-256 based |
6 | | // on ARM code and code from Johannes Schneiders, Skip Hovsmith and |
7 | | // Barry O'Rourke. Jeffrey Walton and Bill Schmidt implemented Power8 |
8 | | // SHA-256 and SHA-512. All code is in the public domain. |
9 | | |
10 | | // In August 2017 JW reworked the internals to align all the |
11 | | // implementations. Formerly all hashes were software based, IterHashBase |
12 | | // handled endian conversions, and IterHashBase dispatched a single to |
13 | | // block SHA{N}::Transform. SHA{N}::Transform then performed the single |
14 | | // block hashing. It was repeated for multiple blocks. |
15 | | // |
16 | | // The rework added SHA{N}::HashMultipleBlocks (class) and |
17 | | // SHA{N}_HashMultipleBlocks (free standing). There are also hardware |
18 | | // accelerated variations. Callers enter SHA{N}::HashMultipleBlocks (class) |
19 | | // and the function calls SHA{N}_HashMultipleBlocks (free standing) or |
20 | | // SHA{N}_HashBlock (free standing) as a fallback. |
21 | | // |
22 | | // An added wrinkle is hardware is little endian, C++ is big endian, and |
23 | | // callers use big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder |
24 | | // for the incoming data arrangement. Hardware based SHA{N}_HashMultipleBlock |
25 | | // can often perform the endian swap much easier by setting an EPI mask. |
26 | | // Endian swap incurs no penalty on Intel SHA, and 4-instruction penalty on |
27 | | // ARM SHA. Under C++ the full software based swap penalty is incurred due |
28 | | // to use of ReverseBytes(). |
29 | | // |
30 | | // In May 2019 JW added Cryptogams ARMv7 and NEON implementations for SHA1, |
31 | | // SHA256 and SHA512. The Cryptogams code closed a performance gap on modern |
32 | | // 32-bit ARM devices. Cryptogams is Andy Polyakov's project used to create |
33 | | // high speed crypto algorithms and share them with other developers. Andy's |
34 | | // code runs 30% to 50% faster than C/C++ code. The Cryptogams code can be |
35 | | // disabled in config_asm.h. An example of integrating Andy's code is at |
36 | | // https://wiki.openssl.org/index.php/Cryptogams_SHA. |
37 | | |
38 | | // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code |
39 | | |
40 | | #include "pch.h" |
41 | | #include "config.h" |
42 | | |
43 | | #if CRYPTOPP_MSC_VERSION |
44 | | # pragma warning(disable: 4731) |
45 | | #endif |
46 | | |
47 | | #ifndef CRYPTOPP_IMPORTS |
48 | | #ifndef CRYPTOPP_GENERATE_X64_MASM |
49 | | |
50 | | #include "secblock.h" |
51 | | #include "sha.h" |
52 | | #include "misc.h" |
53 | | #include "cpu.h" |
54 | | |
55 | | #if defined(CRYPTOPP_DISABLE_SHA_ASM) |
56 | | # undef CRYPTOPP_X86_ASM_AVAILABLE |
57 | | # undef CRYPTOPP_X32_ASM_AVAILABLE |
58 | | # undef CRYPTOPP_X64_ASM_AVAILABLE |
59 | | # undef CRYPTOPP_SSE2_ASM_AVAILABLE |
60 | | #endif |
61 | | |
62 | | NAMESPACE_BEGIN(CryptoPP) |
63 | | |
64 | | #if CRYPTOPP_SHANI_AVAILABLE |
65 | | extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order); |
66 | | extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order); |
67 | | #endif |
68 | | |
69 | | #if CRYPTOGAMS_ARM_SHA1 |
70 | | extern "C" void cryptogams_sha1_block_data_order(word32* state, const word32 *data, size_t blocks); |
71 | | extern "C" void cryptogams_sha1_block_data_order_neon(word32* state, const word32 *data, size_t blocks); |
72 | | #endif |
73 | | |
74 | | #if CRYPTOPP_ARM_SHA1_AVAILABLE |
75 | | extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); |
76 | | #endif |
77 | | |
78 | | #if CRYPTOPP_ARM_SHA2_AVAILABLE |
79 | | extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); |
80 | | #endif |
81 | | |
82 | | #if CRYPTOGAMS_ARM_SHA256 |
83 | | extern "C" void cryptogams_sha256_block_data_order(word32* state, const word32 *data, size_t blocks); |
84 | | extern "C" void cryptogams_sha256_block_data_order_neon(word32* state, const word32 *data, size_t blocks); |
85 | | #endif |
86 | | |
87 | | #if CRYPTOPP_ARM_SHA512_AVAILABLE |
88 | | extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order); |
89 | | #endif |
90 | | |
91 | | #if CRYPTOPP_POWER8_SHA_AVAILABLE |
92 | | extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order); |
93 | | extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order); |
94 | | #endif |
95 | | |
96 | | #if CRYPTOGAMS_ARM_SHA512 |
97 | | extern "C" void cryptogams_sha512_block_data_order(word64* state, const word64 *data, size_t blocks); |
98 | | extern "C" void cryptogams_sha512_block_data_order_neon(word64* state, const word64 *data, size_t blocks); |
99 | | #endif |
100 | | |
101 | | // We add extern to export table to sha_simd.cpp, but it |
102 | | // cleared http://github.com/weidai11/cryptopp/issues/502 |
103 | | extern const word32 SHA256_K[64]; |
104 | | extern const word64 SHA512_K[80]; |
105 | | |
106 | | CRYPTOPP_ALIGN_DATA(16) |
107 | | const word64 SHA512_K[80] = { |
108 | | W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), |
109 | | W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), |
110 | | W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), |
111 | | W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), |
112 | | W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), |
113 | | W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), |
114 | | W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), |
115 | | W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), |
116 | | W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), |
117 | | W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), |
118 | | W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), |
119 | | W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), |
120 | | W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), |
121 | | W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), |
122 | | W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), |
123 | | W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), |
124 | | W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), |
125 | | W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), |
126 | | W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), |
127 | | W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), |
128 | | W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), |
129 | | W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), |
130 | | W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), |
131 | | W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), |
132 | | W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), |
133 | | W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), |
134 | | W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), |
135 | | W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), |
136 | | W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), |
137 | | W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), |
138 | | W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), |
139 | | W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), |
140 | | W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), |
141 | | W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), |
142 | | W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), |
143 | | W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), |
144 | | W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), |
145 | | W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), |
146 | | W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), |
147 | | W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) |
148 | | }; |
149 | | |
150 | | CRYPTOPP_ALIGN_DATA(16) |
151 | | const word32 SHA256_K[64] = { |
152 | | |
153 | | 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, |
154 | | 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, |
155 | | 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, |
156 | | 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, |
157 | | 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, |
158 | | 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, |
159 | | 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, |
160 | | 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, |
161 | | 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, |
162 | | 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, |
163 | | 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, |
164 | | 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, |
165 | | 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, |
166 | | 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, |
167 | | 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, |
168 | | 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
169 | | }; |
170 | | |
171 | | //////////////////////////////// |
172 | | // start of Steve Reid's code // |
173 | | //////////////////////////////// |
174 | | |
175 | | ANONYMOUS_NAMESPACE_BEGIN |
176 | | |
177 | 0 | #define blk0(i) (W[i] = data[i]) |
178 | 0 | #define blk1(i) (W[i&15] = rotlConstant<1>(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15])) |
179 | | |
180 | 0 | #define f1(x,y,z) (z^(x&(y^z))) |
181 | 0 | #define f2(x,y,z) (x^y^z) |
182 | 0 | #define f3(x,y,z) ((x&y)|(z&(x|y))) |
183 | 0 | #define f4(x,y,z) (x^y^z) |
184 | | |
185 | | /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ |
186 | 0 | #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w); |
187 | 0 | #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w); |
188 | 0 | #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlConstant<5>(v);w=rotlConstant<30>(w); |
189 | 0 | #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlConstant<5>(v);w=rotlConstant<30>(w); |
190 | 0 | #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlConstant<5>(v);w=rotlConstant<30>(w); |
191 | | |
192 | | void SHA1_HashBlock_CXX(word32 *state, const word32 *data) |
193 | 0 | { |
194 | 0 | CRYPTOPP_ASSERT(state); |
195 | 0 | CRYPTOPP_ASSERT(data); |
196 | |
|
197 | 0 | word32 W[16]; |
198 | | /* Copy context->state[] to working vars */ |
199 | 0 | word32 a = state[0]; |
200 | 0 | word32 b = state[1]; |
201 | 0 | word32 c = state[2]; |
202 | 0 | word32 d = state[3]; |
203 | 0 | word32 e = state[4]; |
204 | | /* 4 rounds of 20 operations each. Loop unrolled. */ |
205 | 0 | R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); |
206 | 0 | R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); |
207 | 0 | R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); |
208 | 0 | R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); |
209 | 0 | R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); |
210 | 0 | R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); |
211 | 0 | R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); |
212 | 0 | R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); |
213 | 0 | R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); |
214 | 0 | R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); |
215 | 0 | R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); |
216 | 0 | R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); |
217 | 0 | R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); |
218 | 0 | R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); |
219 | 0 | R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); |
220 | 0 | R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); |
221 | 0 | R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); |
222 | 0 | R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); |
223 | 0 | R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); |
224 | 0 | R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); |
225 | | /* Add the working vars back into context.state[] */ |
226 | 0 | state[0] += a; |
227 | 0 | state[1] += b; |
228 | 0 | state[2] += c; |
229 | 0 | state[3] += d; |
230 | 0 | state[4] += e; |
231 | 0 | } |
232 | | |
233 | | #undef blk0 |
234 | | #undef blk1 |
235 | | #undef f1 |
236 | | #undef f2 |
237 | | #undef f3 |
238 | | #undef f4 |
239 | | #undef R1 |
240 | | #undef R2 |
241 | | #undef R3 |
242 | | #undef R4 |
243 | | |
244 | | ANONYMOUS_NAMESPACE_END |
245 | | |
246 | | ////////////////////////////// |
247 | | // end of Steve Reid's code // |
248 | | ////////////////////////////// |
249 | | |
250 | | std::string SHA1::AlgorithmProvider() const |
251 | 0 | { |
252 | 0 | #if CRYPTOPP_SHANI_AVAILABLE |
253 | 0 | if (HasSHA()) |
254 | 0 | return "SHANI"; |
255 | 0 | #endif |
256 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
257 | | if (HasSSE2()) |
258 | | return "SSE2"; |
259 | | #endif |
260 | | #if CRYPTOGAMS_ARM_SHA1 |
261 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
262 | | if (HasNEON()) |
263 | | return "NEON"; |
264 | | else |
265 | | # endif |
266 | | if (HasARMv7()) |
267 | | return "ARMv7"; |
268 | | #endif |
269 | | #if CRYPTOPP_ARM_SHA1_AVAILABLE |
270 | | if (HasSHA1()) |
271 | | return "ARMv8"; |
272 | | #endif |
273 | 0 | return "C++"; |
274 | 0 | } |
275 | | |
276 | | void SHA1::InitState(HashWordType *state) |
277 | 27.8k | { |
278 | 27.8k | state[0] = 0x67452301; |
279 | 27.8k | state[1] = 0xEFCDAB89; |
280 | 27.8k | state[2] = 0x98BADCFE; |
281 | 27.8k | state[3] = 0x10325476; |
282 | 27.8k | state[4] = 0xC3D2E1F0; |
283 | 27.8k | } |
284 | | |
285 | | void SHA1::Transform(word32 *state, const word32 *data) |
286 | 0 | { |
287 | 0 | CRYPTOPP_ASSERT(state); |
288 | 0 | CRYPTOPP_ASSERT(data); |
289 | |
|
290 | 0 | #if CRYPTOPP_SHANI_AVAILABLE |
291 | 0 | if (HasSHA()) |
292 | 0 | { |
293 | 0 | SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER); |
294 | 0 | return; |
295 | 0 | } |
296 | 0 | #endif |
297 | | // Disabled at the moment due to MDC and SEAL failures |
298 | | #if CRYPTOGAMS_ARM_SHA1 && 0 |
299 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
300 | | if (HasNEON()) |
301 | | { |
302 | | # if defined(CRYPTOPP_LITTLE_ENDIAN) |
303 | | word32 dataBuf[16]; |
304 | | ByteReverse(dataBuf, data, SHA1::BLOCKSIZE); |
305 | | cryptogams_sha1_block_data_order_neon(state, dataBuf, 1); |
306 | | # else |
307 | | cryptogams_sha1_block_data_order_neon(state, data, 1); |
308 | | # endif |
309 | | return; |
310 | | } |
311 | | else |
312 | | # endif |
313 | | if (HasARMv7()) |
314 | | { |
315 | | # if defined(CRYPTOPP_LITTLE_ENDIAN) |
316 | | word32 dataBuf[16]; |
317 | | ByteReverse(dataBuf, data, SHA1::BLOCKSIZE); |
318 | | cryptogams_sha1_block_data_order(state, data, 1); |
319 | | # else |
320 | | cryptogams_sha1_block_data_order(state, data, 1); |
321 | | # endif |
322 | | return; |
323 | | } |
324 | | #endif |
325 | | #if CRYPTOPP_ARM_SHA1_AVAILABLE |
326 | | if (HasSHA1()) |
327 | | { |
328 | | SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER); |
329 | | return; |
330 | | } |
331 | | #endif |
332 | | |
333 | 0 | SHA1_HashBlock_CXX(state, data); |
334 | 0 | } |
335 | | |
336 | | size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length) |
337 | 71.7k | { |
338 | 71.7k | CRYPTOPP_ASSERT(input); |
339 | 71.7k | CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE); |
340 | | |
341 | 71.7k | #if CRYPTOPP_SHANI_AVAILABLE |
342 | 71.7k | if (HasSHA()) |
343 | 71.7k | { |
344 | 71.7k | SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER); |
345 | 71.7k | return length & (SHA1::BLOCKSIZE - 1); |
346 | 71.7k | } |
347 | 0 | #endif |
348 | | #if CRYPTOGAMS_ARM_SHA1 |
349 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
350 | | if (HasNEON()) |
351 | | { |
352 | | cryptogams_sha1_block_data_order_neon(m_state, input, length / SHA1::BLOCKSIZE); |
353 | | return length & (SHA1::BLOCKSIZE - 1); |
354 | | } |
355 | | else |
356 | | # endif |
357 | | if (HasARMv7()) |
358 | | { |
359 | | cryptogams_sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE); |
360 | | return length & (SHA1::BLOCKSIZE - 1); |
361 | | } |
362 | | #endif |
363 | | #if CRYPTOPP_ARM_SHA1_AVAILABLE |
364 | | if (HasSHA1()) |
365 | | { |
366 | | SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); |
367 | | return length & (SHA1::BLOCKSIZE - 1); |
368 | | } |
369 | | #endif |
370 | | |
371 | 0 | const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); |
372 | 0 | word32 *dataBuf = this->DataBuf(); |
373 | 0 | do |
374 | 0 | { |
375 | 0 | if (noReverse) |
376 | 0 | { |
377 | 0 | SHA1_HashBlock_CXX(m_state, input); |
378 | 0 | } |
379 | 0 | else |
380 | 0 | { |
381 | 0 | ByteReverse(dataBuf, input, SHA1::BLOCKSIZE); |
382 | 0 | SHA1_HashBlock_CXX(m_state, dataBuf); |
383 | 0 | } |
384 | |
|
385 | 0 | input += SHA1::BLOCKSIZE/sizeof(word32); |
386 | 0 | length -= SHA1::BLOCKSIZE; |
387 | 0 | } |
388 | 0 | while (length >= SHA1::BLOCKSIZE); |
389 | 0 | return length; |
390 | 71.7k | } |
391 | | |
392 | | // ************************************************************* |
393 | | |
394 | | ANONYMOUS_NAMESPACE_BEGIN |
395 | | |
396 | 0 | #define a(i) T[(0-i)&7] |
397 | 0 | #define b(i) T[(1-i)&7] |
398 | 0 | #define c(i) T[(2-i)&7] |
399 | 0 | #define d(i) T[(3-i)&7] |
400 | 0 | #define e(i) T[(4-i)&7] |
401 | 0 | #define f(i) T[(5-i)&7] |
402 | 0 | #define g(i) T[(6-i)&7] |
403 | 0 | #define h(i) T[(7-i)&7] |
404 | | |
405 | 0 | #define blk0(i) (W[i] = data[i]) |
406 | 0 | #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) |
407 | | |
408 | 0 | #define Ch(x,y,z) (z^(x&(y^z))) |
409 | 0 | #define Maj(x,y,z) (y^((x^y)&(y^z))) |
410 | | |
411 | 0 | #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ |
412 | 0 | d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) |
413 | | |
414 | | // for SHA256 |
415 | 0 | #define s0(x) (rotrConstant<7>(x)^rotrConstant<18>(x)^(x>>3)) |
416 | 0 | #define s1(x) (rotrConstant<17>(x)^rotrConstant<19>(x)^(x>>10)) |
417 | 0 | #define S0(x) (rotrConstant<2>(x)^rotrConstant<13>(x)^rotrConstant<22>(x)) |
418 | 0 | #define S1(x) (rotrConstant<6>(x)^rotrConstant<11>(x)^rotrConstant<25>(x)) |
419 | | |
420 | | void SHA256_HashBlock_CXX(word32 *state, const word32 *data) |
421 | 0 | { |
422 | 0 | word32 W[16]={0}, T[8]; |
423 | | /* Copy context->state[] to working vars */ |
424 | 0 | std::memcpy(T, state, sizeof(T)); |
425 | | /* 64 operations, partially loop unrolled */ |
426 | 0 | for (unsigned int j=0; j<64; j+=16) |
427 | 0 | { |
428 | 0 | R( 0); R( 1); R( 2); R( 3); |
429 | 0 | R( 4); R( 5); R( 6); R( 7); |
430 | 0 | R( 8); R( 9); R(10); R(11); |
431 | 0 | R(12); R(13); R(14); R(15); |
432 | 0 | } |
433 | | /* Add the working vars back into context.state[] */ |
434 | 0 | state[0] += a(0); |
435 | 0 | state[1] += b(0); |
436 | 0 | state[2] += c(0); |
437 | 0 | state[3] += d(0); |
438 | 0 | state[4] += e(0); |
439 | 0 | state[5] += f(0); |
440 | 0 | state[6] += g(0); |
441 | 0 | state[7] += h(0); |
442 | 0 | } |
443 | | |
444 | | #undef Ch |
445 | | #undef Maj |
446 | | #undef s0 |
447 | | #undef s1 |
448 | | #undef S0 |
449 | | #undef S1 |
450 | | #undef blk0 |
451 | | #undef blk1 |
452 | | #undef blk2 |
453 | | #undef R |
454 | | |
455 | | #undef a |
456 | | #undef b |
457 | | #undef c |
458 | | #undef d |
459 | | #undef e |
460 | | #undef f |
461 | | #undef g |
462 | | #undef h |
463 | | |
464 | | ANONYMOUS_NAMESPACE_END |
465 | | |
466 | | std::string SHA256_AlgorithmProvider() |
467 | 0 | { |
468 | 0 | #if CRYPTOPP_SHANI_AVAILABLE |
469 | 0 | if (HasSHA()) |
470 | 0 | return "SHANI"; |
471 | 0 | #endif |
472 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
473 | | if (HasSSE2()) |
474 | | return "SSE2"; |
475 | | #endif |
476 | | #if CRYPTOGAMS_ARM_SHA256 |
477 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
478 | | if (HasNEON()) |
479 | | return "NEON"; |
480 | | else |
481 | | # endif |
482 | | if (HasARMv7()) |
483 | | return "ARMv7"; |
484 | | #endif |
485 | | #if CRYPTOPP_ARM_SHA2_AVAILABLE |
486 | | if (HasSHA2()) |
487 | | return "ARMv8"; |
488 | | #endif |
489 | | #if (CRYPTOPP_POWER8_SHA_AVAILABLE) |
490 | | if (HasSHA256()) |
491 | | return "Power8"; |
492 | | #endif |
493 | 0 | return "C++"; |
494 | 0 | } |
495 | | |
496 | | std::string SHA224::AlgorithmProvider() const |
497 | 0 | { |
498 | 0 | return SHA256_AlgorithmProvider(); |
499 | 0 | } |
500 | | |
501 | | void SHA224::InitState(HashWordType *state) |
502 | 11.9k | { |
503 | 11.9k | static const word32 s[8] = { |
504 | 11.9k | 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, |
505 | 11.9k | 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; |
506 | 11.9k | std::memcpy(state, s, sizeof(s)); |
507 | 11.9k | } |
508 | | |
509 | | void SHA256::InitState(HashWordType *state) |
510 | 65.4k | { |
511 | 65.4k | static const word32 s[8] = { |
512 | 65.4k | 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, |
513 | 65.4k | 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; |
514 | 65.4k | std::memcpy(state, s, sizeof(s)); |
515 | 65.4k | } |
516 | | #endif // Not CRYPTOPP_GENERATE_X64_MASM |
517 | | |
518 | | #if defined(CRYPTOPP_X86_ASM_AVAILABLE) |
519 | | |
520 | | ANONYMOUS_NAMESPACE_BEGIN |
521 | | |
522 | | void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len) |
523 | | { |
524 | | // Due to the inline asm |
525 | | CRYPTOPP_UNUSED(state); |
526 | | CRYPTOPP_UNUSED(data); |
527 | | CRYPTOPP_UNUSED(len); |
528 | | |
529 | | #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ |
530 | | #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] |
531 | | #define G(i) H(i+1) |
532 | | #define F(i) H(i+2) |
533 | | #define E(i) H(i+3) |
534 | | #define D(i) H(i+4) |
535 | | #define C(i) H(i+5) |
536 | | #define B(i) H(i+6) |
537 | | #define A(i) H(i+7) |
538 | | #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 |
539 | | #define Wt_2(i) Wt((i)-2) |
540 | | #define Wt_15(i) Wt((i)-15) |
541 | | #define Wt_7(i) Wt((i)-7) |
542 | | #define K_END [BASE+8*4+16*4+0*WORD_SZ] |
543 | | #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] |
544 | | #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] |
545 | | #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] |
546 | | #define Kt(i) WORD_REG(si)+(i)*4 |
547 | | #if CRYPTOPP_BOOL_X86 |
548 | | #define BASE esp+4 |
549 | | #elif defined(__GNUC__) |
550 | | #define BASE r8 |
551 | | #else |
552 | | #define BASE rsp |
553 | | #endif |
554 | | |
555 | | #define RA0(i, edx, edi) \ |
556 | | AS2( add edx, [Kt(i)] )\ |
557 | | AS2( add edx, [Wt(i)] )\ |
558 | | AS2( add edx, H(i) )\ |
559 | | |
560 | | #define RA1(i, edx, edi) |
561 | | |
562 | | #define RB0(i, edx, edi) |
563 | | |
564 | | #define RB1(i, edx, edi) \ |
565 | | AS2( mov AS_REG_7d, [Wt_2(i)] )\ |
566 | | AS2( mov edi, [Wt_15(i)])\ |
567 | | AS2( mov ebx, AS_REG_7d )\ |
568 | | AS2( shr AS_REG_7d, 10 )\ |
569 | | AS2( ror ebx, 17 )\ |
570 | | AS2( xor AS_REG_7d, ebx )\ |
571 | | AS2( ror ebx, 2 )\ |
572 | | AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\ |
573 | | AS2( add ebx, [Wt_7(i)])\ |
574 | | AS2( mov AS_REG_7d, edi )\ |
575 | | AS2( shr AS_REG_7d, 3 )\ |
576 | | AS2( ror edi, 7 )\ |
577 | | AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\ |
578 | | AS2( xor AS_REG_7d, edi )\ |
579 | | AS2( add edx, [Kt(i)])\ |
580 | | AS2( ror edi, 11 )\ |
581 | | AS2( add edx, H(i) )\ |
582 | | AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\ |
583 | | AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\ |
584 | | AS2( mov [Wt(i)], AS_REG_7d)\ |
585 | | AS2( add edx, AS_REG_7d )\ |
586 | | |
587 | | #define ROUND(i, r, eax, ecx, edi, edx)\ |
588 | | /* in: edi = E */\ |
589 | | /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\ |
590 | | AS2( mov edx, F(i) )\ |
591 | | AS2( xor edx, G(i) )\ |
592 | | AS2( and edx, edi )\ |
593 | | AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\ |
594 | | AS2( mov AS_REG_7d, edi )\ |
595 | | AS2( ror edi, 6 )\ |
596 | | AS2( ror AS_REG_7d, 25 )\ |
597 | | RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ |
598 | | AS2( xor AS_REG_7d, edi )\ |
599 | | AS2( ror edi, 5 )\ |
600 | | AS2( xor AS_REG_7d, edi )/* S1(E) */\ |
601 | | AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\ |
602 | | RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ |
603 | | /* in: ecx = A, eax = B^C, edx = T1 */\ |
604 | | /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\ |
605 | | AS2( mov ebx, ecx )\ |
606 | | AS2( xor ecx, B(i) )/* A^B */\ |
607 | | AS2( and eax, ecx )\ |
608 | | AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\ |
609 | | AS2( mov AS_REG_7d, ebx )\ |
610 | | AS2( ror ebx, 2 )\ |
611 | | AS2( add eax, edx )/* T1 + Maj(A,B,C) */\ |
612 | | AS2( add edx, D(i) )\ |
613 | | AS2( mov D(i), edx )\ |
614 | | AS2( ror AS_REG_7d, 22 )\ |
615 | | AS2( xor AS_REG_7d, ebx )\ |
616 | | AS2( ror ebx, 11 )\ |
617 | | AS2( xor AS_REG_7d, ebx )\ |
618 | | AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\ |
619 | | AS2( mov H(i), eax )\ |
620 | | |
621 | | // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25) |
622 | | // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`" |
623 | | #if CRYPTOPP_BOOL_X64 |
624 | | #define SWAP_COPY(i) \ |
625 | | AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ |
626 | | AS1( bswap WORD_REG(bx))\ |
627 | | AS2( mov [Wt(i*2+1)], WORD_REG(bx)) |
628 | | #else // X86 and X32 |
629 | | #define SWAP_COPY(i) \ |
630 | | AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ |
631 | | AS1( bswap WORD_REG(bx))\ |
632 | | AS2( mov [Wt(i)], WORD_REG(bx)) |
633 | | #endif |
634 | | |
635 | | #if defined(__GNUC__) |
636 | | #if CRYPTOPP_BOOL_X64 |
637 | | FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace; |
638 | | #endif |
639 | | __asm__ __volatile__ |
640 | | ( |
641 | | #if CRYPTOPP_BOOL_X64 |
642 | | "lea %4, %%r8;" |
643 | | #endif |
644 | | INTEL_NOPREFIX |
645 | | #elif defined(CRYPTOPP_GENERATE_X64_MASM) |
646 | | ALIGN 8 |
647 | | SHA256_HashMultipleBlocks_SSE2 PROC FRAME |
648 | | rex_push_reg rsi |
649 | | push_reg rdi |
650 | | push_reg rbx |
651 | | push_reg rbp |
652 | | alloc_stack(LOCALS_SIZE+8) |
653 | | .endprolog |
654 | | mov rdi, r8 |
655 | | lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] |
656 | | #endif |
657 | | |
658 | | #if CRYPTOPP_BOOL_X86 |
659 | | #ifndef __GNUC__ |
660 | | AS2( mov edi, [len]) |
661 | | AS2( lea WORD_REG(si), [SHA256_K+48*4]) |
662 | | #endif |
663 | | #if !defined(CRYPTOPP_MSC_VERSION) || (CRYPTOPP_MSC_VERSION < 1400) |
664 | | AS_PUSH_IF86(bx) |
665 | | #endif |
666 | | |
667 | | AS_PUSH_IF86(bp) |
668 | | AS2( mov ebx, esp) |
669 | | AS2( and esp, -16) |
670 | | AS2( sub WORD_REG(sp), LOCALS_SIZE) |
671 | | AS_PUSH_IF86(bx) |
672 | | #endif |
673 | | AS2( mov STATE_SAVE, WORD_REG(cx)) |
674 | | AS2( mov DATA_SAVE, WORD_REG(dx)) |
675 | | AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)]) |
676 | | AS2( mov DATA_END, WORD_REG(ax)) |
677 | | AS2( mov K_END, WORD_REG(si)) |
678 | | |
679 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
680 | | #if CRYPTOPP_BOOL_X86 |
681 | | AS2( test edi, 1) |
682 | | ASJ( jnz, 2, f) |
683 | | AS1( dec DWORD PTR K_END) |
684 | | #endif |
685 | | AS2( movdqu xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16]) |
686 | | AS2( movdqu xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16]) |
687 | | #endif |
688 | | |
689 | | #if CRYPTOPP_BOOL_X86 |
690 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
691 | | ASJ( jmp, 0, f) |
692 | | #endif |
693 | | ASL(2) // non-SSE2 |
694 | | AS2( mov esi, ecx) |
695 | | AS2( lea edi, A(0)) |
696 | | AS2( mov ecx, 8) |
697 | | ATT_NOPREFIX |
698 | | AS1( rep movsd) |
699 | | INTEL_NOPREFIX |
700 | | AS2( mov esi, K_END) |
701 | | ASJ( jmp, 3, f) |
702 | | #endif |
703 | | |
704 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
705 | | ASL(0) |
706 | | AS2( movdqu E(0), xmm1) |
707 | | AS2( movdqu A(0), xmm0) |
708 | | #endif |
709 | | #if CRYPTOPP_BOOL_X86 |
710 | | ASL(3) |
711 | | #endif |
712 | | AS2( sub WORD_REG(si), 48*4) |
713 | | SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3) |
714 | | SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7) |
715 | | #if CRYPTOPP_BOOL_X86 |
716 | | SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11) |
717 | | SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15) |
718 | | #endif |
719 | | AS2( mov edi, E(0)) // E |
720 | | AS2( mov eax, B(0)) // B |
721 | | AS2( xor eax, C(0)) // B^C |
722 | | AS2( mov ecx, A(0)) // A |
723 | | |
724 | | ROUND(0, 0, eax, ecx, edi, edx) |
725 | | ROUND(1, 0, ecx, eax, edx, edi) |
726 | | ROUND(2, 0, eax, ecx, edi, edx) |
727 | | ROUND(3, 0, ecx, eax, edx, edi) |
728 | | ROUND(4, 0, eax, ecx, edi, edx) |
729 | | ROUND(5, 0, ecx, eax, edx, edi) |
730 | | ROUND(6, 0, eax, ecx, edi, edx) |
731 | | ROUND(7, 0, ecx, eax, edx, edi) |
732 | | ROUND(8, 0, eax, ecx, edi, edx) |
733 | | ROUND(9, 0, ecx, eax, edx, edi) |
734 | | ROUND(10, 0, eax, ecx, edi, edx) |
735 | | ROUND(11, 0, ecx, eax, edx, edi) |
736 | | ROUND(12, 0, eax, ecx, edi, edx) |
737 | | ROUND(13, 0, ecx, eax, edx, edi) |
738 | | ROUND(14, 0, eax, ecx, edi, edx) |
739 | | ROUND(15, 0, ecx, eax, edx, edi) |
740 | | |
741 | | ASL(1) |
742 | | AS2(add WORD_REG(si), 4*16) |
743 | | ROUND(0, 1, eax, ecx, edi, edx) |
744 | | ROUND(1, 1, ecx, eax, edx, edi) |
745 | | ROUND(2, 1, eax, ecx, edi, edx) |
746 | | ROUND(3, 1, ecx, eax, edx, edi) |
747 | | ROUND(4, 1, eax, ecx, edi, edx) |
748 | | ROUND(5, 1, ecx, eax, edx, edi) |
749 | | ROUND(6, 1, eax, ecx, edi, edx) |
750 | | ROUND(7, 1, ecx, eax, edx, edi) |
751 | | ROUND(8, 1, eax, ecx, edi, edx) |
752 | | ROUND(9, 1, ecx, eax, edx, edi) |
753 | | ROUND(10, 1, eax, ecx, edi, edx) |
754 | | ROUND(11, 1, ecx, eax, edx, edi) |
755 | | ROUND(12, 1, eax, ecx, edi, edx) |
756 | | ROUND(13, 1, ecx, eax, edx, edi) |
757 | | ROUND(14, 1, eax, ecx, edi, edx) |
758 | | ROUND(15, 1, ecx, eax, edx, edi) |
759 | | AS2( cmp WORD_REG(si), K_END) |
760 | | ATT_NOPREFIX |
761 | | ASJ( jb, 1, b) |
762 | | INTEL_NOPREFIX |
763 | | |
764 | | AS2( mov WORD_REG(dx), DATA_SAVE) |
765 | | AS2( add WORD_REG(dx), 64) |
766 | | AS2( mov AS_REG_7, STATE_SAVE) |
767 | | AS2( mov DATA_SAVE, WORD_REG(dx)) |
768 | | |
769 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
770 | | #if CRYPTOPP_BOOL_X86 |
771 | | AS2( test DWORD PTR K_END, 1) |
772 | | ASJ( jz, 4, f) |
773 | | #endif |
774 | | AS2( movdqu xmm1, XMMWORD_PTR [AS_REG_7+1*16]) |
775 | | AS2( movdqu xmm0, XMMWORD_PTR [AS_REG_7+0*16]) |
776 | | AS2( paddd xmm1, E(0)) |
777 | | AS2( paddd xmm0, A(0)) |
778 | | AS2( movdqu [AS_REG_7+1*16], xmm1) |
779 | | AS2( movdqu [AS_REG_7+0*16], xmm0) |
780 | | AS2( cmp WORD_REG(dx), DATA_END) |
781 | | ATT_NOPREFIX |
782 | | ASJ( jb, 0, b) |
783 | | INTEL_NOPREFIX |
784 | | #endif |
785 | | |
786 | | #if CRYPTOPP_BOOL_X86 |
787 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
788 | | ASJ( jmp, 5, f) |
789 | | ASL(4) // non-SSE2 |
790 | | #endif |
791 | | AS2( add [AS_REG_7+0*4], ecx) // A |
792 | | AS2( add [AS_REG_7+4*4], edi) // E |
793 | | AS2( mov eax, B(0)) |
794 | | AS2( mov ebx, C(0)) |
795 | | AS2( mov ecx, D(0)) |
796 | | AS2( add [AS_REG_7+1*4], eax) |
797 | | AS2( add [AS_REG_7+2*4], ebx) |
798 | | AS2( add [AS_REG_7+3*4], ecx) |
799 | | AS2( mov eax, F(0)) |
800 | | AS2( mov ebx, G(0)) |
801 | | AS2( mov ecx, H(0)) |
802 | | AS2( add [AS_REG_7+5*4], eax) |
803 | | AS2( add [AS_REG_7+6*4], ebx) |
804 | | AS2( add [AS_REG_7+7*4], ecx) |
805 | | AS2( mov ecx, AS_REG_7d) |
806 | | AS2( cmp WORD_REG(dx), DATA_END) |
807 | | ASJ( jb, 2, b) |
808 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
809 | | ASL(5) |
810 | | #endif |
811 | | #endif |
812 | | |
813 | | AS_POP_IF86(sp) |
814 | | AS_POP_IF86(bp) |
815 | | #if !defined(CRYPTOPP_MSC_VERSION) || (CRYPTOPP_MSC_VERSION < 1400) |
816 | | AS_POP_IF86(bx) |
817 | | #endif |
818 | | |
819 | | #ifdef CRYPTOPP_GENERATE_X64_MASM |
820 | | add rsp, LOCALS_SIZE+8 |
821 | | pop rbp |
822 | | pop rbx |
823 | | pop rdi |
824 | | pop rsi |
825 | | ret |
826 | | SHA256_HashMultipleBlocks_SSE2 ENDP |
827 | | #endif |
828 | | |
829 | | #ifdef __GNUC__ |
830 | | ATT_PREFIX |
831 | | : |
832 | | : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len) |
833 | | #if (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
834 | | , "m" (workspace[0]) |
835 | | #endif |
836 | | : "memory", "cc", "%eax" |
837 | | #if (CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
838 | | , PERCENT_REG(AS_REG_7), "%rbx", "%r8", "%r10", "%xmm0", "%xmm1" |
839 | | #else |
840 | | , "%ebx" |
841 | | #endif |
842 | | ); |
843 | | #endif |
844 | | } |
845 | | |
846 | | ANONYMOUS_NAMESPACE_END |
847 | | |
848 | | #endif // CRYPTOPP_X86_ASM_AVAILABLE |
849 | | |
850 | | #ifndef CRYPTOPP_GENERATE_X64_MASM |
851 | | |
852 | | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
853 | | extern "C" { |
854 | | void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len); |
855 | | } |
856 | | #endif |
857 | | |
858 | | std::string SHA256::AlgorithmProvider() const |
859 | 0 | { |
860 | 0 | return SHA256_AlgorithmProvider(); |
861 | 0 | } |
862 | | |
863 | | void SHA256::Transform(word32 *state, const word32 *data) |
864 | 0 | { |
865 | 0 | CRYPTOPP_ASSERT(state); |
866 | 0 | CRYPTOPP_ASSERT(data); |
867 | |
|
868 | 0 | #if CRYPTOPP_SHANI_AVAILABLE |
869 | 0 | if (HasSHA()) |
870 | 0 | { |
871 | 0 | SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); |
872 | 0 | return; |
873 | 0 | } |
874 | 0 | #endif |
875 | | // Disabled at the moment due to MDC and SEAL failures |
876 | | #if CRYPTOGAMS_ARM_SHA256 && 0 |
877 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
878 | | if (HasNEON()) |
879 | | { |
880 | | # if defined(CRYPTOPP_LITTLE_ENDIAN) |
881 | | word32 dataBuf[16]; |
882 | | ByteReverse(dataBuf, data, SHA256::BLOCKSIZE); |
883 | | cryptogams_sha256_block_data_order_neon(state, dataBuf, 1); |
884 | | # else |
885 | | cryptogams_sha256_block_data_order_neon(state, data, 1); |
886 | | # endif |
887 | | return; |
888 | | } |
889 | | else |
890 | | # endif |
891 | | if (HasARMv7()) |
892 | | { |
893 | | # if defined(CRYPTOPP_LITTLE_ENDIAN) |
894 | | word32 dataBuf[16]; |
895 | | ByteReverse(dataBuf, data, SHA256::BLOCKSIZE); |
896 | | cryptogams_sha256_block_data_order(state, data, 1); |
897 | | # else |
898 | | cryptogams_sha256_block_data_order(state, data, 1); |
899 | | # endif |
900 | | return; |
901 | | } |
902 | | #endif |
903 | | #if CRYPTOPP_ARM_SHA2_AVAILABLE |
904 | | if (HasSHA2()) |
905 | | { |
906 | | SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); |
907 | | return; |
908 | | } |
909 | | #endif |
910 | | #if CRYPTOPP_POWER8_SHA_AVAILABLE |
911 | | if (HasSHA256()) |
912 | | { |
913 | | SHA256_HashMultipleBlocks_POWER8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER); |
914 | | return; |
915 | | } |
916 | | #endif |
917 | | |
918 | 0 | SHA256_HashBlock_CXX(state, data); |
919 | 0 | } |
920 | | |
921 | | size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) |
922 | 151k | { |
923 | 151k | CRYPTOPP_ASSERT(input); |
924 | 151k | CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); |
925 | | |
926 | 151k | #if CRYPTOPP_SHANI_AVAILABLE |
927 | 151k | if (HasSHA()) |
928 | 151k | { |
929 | 151k | SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER); |
930 | 151k | return length & (SHA256::BLOCKSIZE - 1); |
931 | 151k | } |
932 | 0 | #endif |
933 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE |
934 | | if (HasSSE2()) |
935 | | { |
936 | | const size_t res = length & (SHA256::BLOCKSIZE - 1); |
937 | | SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res); |
938 | | return res; |
939 | | } |
940 | | #endif |
941 | | #if CRYPTOGAMS_ARM_SHA256 |
942 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
943 | | if (HasNEON()) |
944 | | { |
945 | | cryptogams_sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE); |
946 | | return length & (SHA256::BLOCKSIZE - 1); |
947 | | } |
948 | | else |
949 | | # endif |
950 | | if (HasARMv7()) |
951 | | { |
952 | | cryptogams_sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE); |
953 | | return length & (SHA256::BLOCKSIZE - 1); |
954 | | } |
955 | | #endif |
956 | | #if CRYPTOPP_ARM_SHA2_AVAILABLE |
957 | | if (HasSHA2()) |
958 | | { |
959 | | SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); |
960 | | return length & (SHA256::BLOCKSIZE - 1); |
961 | | } |
962 | | #endif |
963 | | #if CRYPTOPP_POWER8_SHA_AVAILABLE |
964 | | if (HasSHA256()) |
965 | | { |
966 | | SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER); |
967 | | return length & (SHA256::BLOCKSIZE - 1); |
968 | | } |
969 | | #endif |
970 | | |
971 | 0 | const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); |
972 | 0 | word32 *dataBuf = this->DataBuf(); |
973 | 0 | do |
974 | 0 | { |
975 | 0 | if (noReverse) |
976 | 0 | { |
977 | 0 | SHA256_HashBlock_CXX(m_state, input); |
978 | 0 | } |
979 | 0 | else |
980 | 0 | { |
981 | 0 | ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); |
982 | 0 | SHA256_HashBlock_CXX(m_state, dataBuf); |
983 | 0 | } |
984 | |
|
985 | 0 | input += SHA256::BLOCKSIZE/sizeof(word32); |
986 | 0 | length -= SHA256::BLOCKSIZE; |
987 | 0 | } |
988 | 0 | while (length >= SHA256::BLOCKSIZE); |
989 | 0 | return length; |
990 | 151k | } |
991 | | |
992 | | size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) |
993 | 28.0k | { |
994 | 28.0k | CRYPTOPP_ASSERT(input); |
995 | 28.0k | CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE); |
996 | | |
997 | 28.0k | #if CRYPTOPP_SHANI_AVAILABLE |
998 | 28.0k | if (HasSHA()) |
999 | 28.0k | { |
1000 | 28.0k | SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER); |
1001 | 28.0k | return length & (SHA256::BLOCKSIZE - 1); |
1002 | 28.0k | } |
1003 | 0 | #endif |
1004 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE |
1005 | | if (HasSSE2()) |
1006 | | { |
1007 | | const size_t res = length & (SHA256::BLOCKSIZE - 1); |
1008 | | SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res); |
1009 | | return res; |
1010 | | } |
1011 | | #endif |
1012 | | #if CRYPTOGAMS_ARM_SHA256 |
1013 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
1014 | | if (HasNEON()) |
1015 | | { |
1016 | | cryptogams_sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE); |
1017 | | return length & (SHA256::BLOCKSIZE - 1); |
1018 | | } |
1019 | | else |
1020 | | # endif |
1021 | | if (HasARMv7()) |
1022 | | { |
1023 | | cryptogams_sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE); |
1024 | | return length & (SHA256::BLOCKSIZE - 1); |
1025 | | } |
1026 | | #endif |
1027 | | #if CRYPTOPP_ARM_SHA2_AVAILABLE |
1028 | | if (HasSHA2()) |
1029 | | { |
1030 | | SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER); |
1031 | | return length & (SHA256::BLOCKSIZE - 1); |
1032 | | } |
1033 | | #endif |
1034 | | #if CRYPTOPP_POWER8_SHA_AVAILABLE |
1035 | | if (HasSHA256()) |
1036 | | { |
1037 | | SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER); |
1038 | | return length & (SHA256::BLOCKSIZE - 1); |
1039 | | } |
1040 | | #endif |
1041 | | |
1042 | 0 | const bool noReverse = NativeByteOrderIs(this->GetByteOrder()); |
1043 | 0 | word32 *dataBuf = this->DataBuf(); |
1044 | 0 | do |
1045 | 0 | { |
1046 | 0 | if (noReverse) |
1047 | 0 | { |
1048 | 0 | SHA256_HashBlock_CXX(m_state, input); |
1049 | 0 | } |
1050 | 0 | else |
1051 | 0 | { |
1052 | 0 | ByteReverse(dataBuf, input, SHA256::BLOCKSIZE); |
1053 | 0 | SHA256_HashBlock_CXX(m_state, dataBuf); |
1054 | 0 | } |
1055 | |
|
1056 | 0 | input += SHA256::BLOCKSIZE/sizeof(word32); |
1057 | 0 | length -= SHA256::BLOCKSIZE; |
1058 | 0 | } |
1059 | 0 | while (length >= SHA256::BLOCKSIZE); |
1060 | 0 | return length; |
1061 | 28.0k | } |
1062 | | |
1063 | | // ************************************************************* |
1064 | | |
1065 | | std::string SHA512_AlgorithmProvider() |
1066 | 0 | { |
1067 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE |
1068 | | if (HasSSE2()) |
1069 | | return "SSE2"; |
1070 | | #endif |
1071 | | #if CRYPTOGAMS_ARM_SHA512 |
1072 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
1073 | | if (HasNEON()) |
1074 | | return "NEON"; |
1075 | | else |
1076 | | # endif |
1077 | | if (HasARMv7()) |
1078 | | return "ARMv7"; |
1079 | | #endif |
1080 | | #if (CRYPTOPP_POWER8_SHA_AVAILABLE) |
1081 | | if (HasSHA512()) |
1082 | | return "Power8"; |
1083 | | #endif |
1084 | 0 | return "C++"; |
1085 | 0 | } |
1086 | | |
1087 | | std::string SHA384::AlgorithmProvider() const |
1088 | 0 | { |
1089 | 0 | return SHA512_AlgorithmProvider(); |
1090 | 0 | } |
1091 | | |
1092 | | std::string SHA512::AlgorithmProvider() const |
1093 | 0 | { |
1094 | 0 | return SHA512_AlgorithmProvider(); |
1095 | 0 | } |
1096 | | |
1097 | | void SHA384::InitState(HashWordType *state) |
1098 | 12.1k | { |
1099 | 12.1k | const word64 s[8] = { |
1100 | 12.1k | W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507), |
1101 | 12.1k | W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939), |
1102 | 12.1k | W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511), |
1103 | 12.1k | W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)}; |
1104 | 12.1k | std::memcpy(state, s, sizeof(s)); |
1105 | 12.1k | } |
1106 | | |
1107 | | void SHA512::InitState(HashWordType *state) |
1108 | 17.2k | { |
1109 | 17.2k | const word64 s[8] = { |
1110 | 17.2k | W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b), |
1111 | 17.2k | W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1), |
1112 | 17.2k | W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f), |
1113 | 17.2k | W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)}; |
1114 | 17.2k | std::memcpy(state, s, sizeof(s)); |
1115 | 17.2k | } |
1116 | | |
1117 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86) |
1118 | | |
1119 | | ANONYMOUS_NAMESPACE_BEGIN |
1120 | | |
1121 | | // No inlining due to https://github.com/weidai11/cryptopp/issues/684 |
1122 | | // g++ -DNDEBUG -g2 -O3 -pthread -pipe -c sha.cpp |
1123 | | // sha.cpp: Assembler messages: |
1124 | | // sha.cpp:1155: Error: symbol `SHA512_Round' is already defined |
1125 | | // sha.cpp:1155: Error: symbol `SHA512_Round' is already defined |
1126 | | |
1127 | | CRYPTOPP_NOINLINE CRYPTOPP_NAKED |
1128 | | void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data) |
1129 | | { |
1130 | | // Due to the inline asm |
1131 | | CRYPTOPP_UNUSED(state); |
1132 | | CRYPTOPP_UNUSED(data); |
1133 | | |
1134 | | #ifdef __GNUC__ |
1135 | | __asm__ __volatile__ |
1136 | | ( |
1137 | | INTEL_NOPREFIX |
1138 | | AS_PUSH_IF86( bx) |
1139 | | AS2( mov ebx, eax) |
1140 | | #else |
1141 | | AS1( push ebx) |
1142 | | AS1( push esi) |
1143 | | AS1( push edi) |
1144 | | AS2( lea ebx, SHA512_K) |
1145 | | #endif |
1146 | | |
1147 | | AS2( mov eax, esp) |
1148 | | AS2( and esp, 0xfffffff0) |
1149 | | AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state |
1150 | | AS_PUSH_IF86( ax) |
1151 | | AS2( xor eax, eax) |
1152 | | |
1153 | | AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying |
1154 | | AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8 |
1155 | | |
1156 | | AS2( movdqu xmm0, [ecx+0*16]) |
1157 | | AS2( movdq2q mm4, xmm0) |
1158 | | AS2( movdqu [edi+0*16], xmm0) |
1159 | | AS2( movdqu xmm0, [ecx+1*16]) |
1160 | | AS2( movdqu [edi+1*16], xmm0) |
1161 | | AS2( movdqu xmm0, [ecx+2*16]) |
1162 | | AS2( movdq2q mm5, xmm0) |
1163 | | AS2( movdqu [edi+2*16], xmm0) |
1164 | | AS2( movdqu xmm0, [ecx+3*16]) |
1165 | | AS2( movdqu [edi+3*16], xmm0) |
1166 | | ASJ( jmp, 0, f) |
1167 | | |
1168 | | #define SSE2_S0_S1(r, a, b, c) \ |
1169 | | AS2( movq mm6, r)\ |
1170 | | AS2( psrlq r, a)\ |
1171 | | AS2( movq mm7, r)\ |
1172 | | AS2( psllq mm6, 64-c)\ |
1173 | | AS2( pxor mm7, mm6)\ |
1174 | | AS2( psrlq r, b-a)\ |
1175 | | AS2( pxor mm7, r)\ |
1176 | | AS2( psllq mm6, c-b)\ |
1177 | | AS2( pxor mm7, mm6)\ |
1178 | | AS2( psrlq r, c-b)\ |
1179 | | AS2( pxor r, mm7)\ |
1180 | | AS2( psllq mm6, b-a)\ |
1181 | | AS2( pxor r, mm6) |
1182 | | |
1183 | | #define SSE2_s0(r, a, b, c) \ |
1184 | | AS2( movdqu xmm6, r)\ |
1185 | | AS2( psrlq r, a)\ |
1186 | | AS2( movdqu xmm7, r)\ |
1187 | | AS2( psllq xmm6, 64-c)\ |
1188 | | AS2( pxor xmm7, xmm6)\ |
1189 | | AS2( psrlq r, b-a)\ |
1190 | | AS2( pxor xmm7, r)\ |
1191 | | AS2( psrlq r, c-b)\ |
1192 | | AS2( pxor r, xmm7)\ |
1193 | | AS2( psllq xmm6, c-a)\ |
1194 | | AS2( pxor r, xmm6) |
1195 | | |
1196 | | #define SSE2_s1(r, a, b, c) \ |
1197 | | AS2( movdqu xmm6, r)\ |
1198 | | AS2( psrlq r, a)\ |
1199 | | AS2( movdqu xmm7, r)\ |
1200 | | AS2( psllq xmm6, 64-c)\ |
1201 | | AS2( pxor xmm7, xmm6)\ |
1202 | | AS2( psrlq r, b-a)\ |
1203 | | AS2( pxor xmm7, r)\ |
1204 | | AS2( psllq xmm6, c-b)\ |
1205 | | AS2( pxor xmm7, xmm6)\ |
1206 | | AS2( psrlq r, c-b)\ |
1207 | | AS2( pxor r, xmm7) |
1208 | | ASL(SHA512_Round) |
1209 | | |
1210 | | // k + w is in mm0, a is in mm4, e is in mm5 |
1211 | | AS2( paddq mm0, [edi+7*8]) // h |
1212 | | AS2( movq mm2, [edi+5*8]) // f |
1213 | | AS2( movq mm3, [edi+6*8]) // g |
1214 | | AS2( pxor mm2, mm3) |
1215 | | AS2( pand mm2, mm5) |
1216 | | SSE2_S0_S1(mm5,14,18,41) |
1217 | | AS2( pxor mm2, mm3) |
1218 | | AS2( paddq mm0, mm2) // h += Ch(e,f,g) |
1219 | | AS2( paddq mm5, mm0) // h += S1(e) |
1220 | | AS2( movq mm2, [edi+1*8]) // b |
1221 | | AS2( movq mm1, mm2) |
1222 | | AS2( por mm2, mm4) |
1223 | | AS2( pand mm2, [edi+2*8]) // c |
1224 | | AS2( pand mm1, mm4) |
1225 | | AS2( por mm1, mm2) |
1226 | | AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c) |
1227 | | AS2( paddq mm5, [edi+3*8]) // e = d + h |
1228 | | AS2( movq [edi+3*8], mm5) |
1229 | | AS2( movq [edi+11*8], mm5) |
1230 | | SSE2_S0_S1(mm4,28,34,39) // S0(a) |
1231 | | AS2( paddq mm4, mm1) // a = temp + S0(a) |
1232 | | AS2( movq [edi-8], mm4) |
1233 | | AS2( movq [edi+7*8], mm4) |
1234 | | AS1( ret) |
1235 | | |
1236 | | // first 16 rounds |
1237 | | ASL(0) |
1238 | | AS2( movq mm0, [edx+eax*8]) |
1239 | | AS2( movq [esi+eax*8], mm0) |
1240 | | AS2( movq [esi+eax*8+16*8], mm0) |
1241 | | AS2( paddq mm0, [ebx+eax*8]) |
1242 | | ASC( call, SHA512_Round) |
1243 | | |
1244 | | AS1( inc eax) |
1245 | | AS2( sub edi, 8) |
1246 | | AS2( test eax, 7) |
1247 | | ASJ( jnz, 0, b) |
1248 | | AS2( add edi, 8*8) |
1249 | | AS2( cmp eax, 16) |
1250 | | ASJ( jne, 0, b) |
1251 | | |
1252 | | // rest of the rounds |
1253 | | AS2( movdqu xmm0, [esi+(16-2)*8]) |
1254 | | ASL(1) |
1255 | | // data expansion, W[i-2] already in xmm0 |
1256 | | AS2( movdqu xmm3, [esi]) |
1257 | | AS2( paddq xmm3, [esi+(16-7)*8]) |
1258 | | AS2( movdqu xmm2, [esi+(16-15)*8]) |
1259 | | SSE2_s1(xmm0, 6, 19, 61) |
1260 | | AS2( paddq xmm0, xmm3) |
1261 | | SSE2_s0(xmm2, 1, 7, 8) |
1262 | | AS2( paddq xmm0, xmm2) |
1263 | | AS2( movdq2q mm0, xmm0) |
1264 | | AS2( movhlps xmm1, xmm0) |
1265 | | AS2( paddq mm0, [ebx+eax*8]) |
1266 | | AS2( movlps [esi], xmm0) |
1267 | | AS2( movlps [esi+8], xmm1) |
1268 | | AS2( movlps [esi+8*16], xmm0) |
1269 | | AS2( movlps [esi+8*17], xmm1) |
1270 | | // 2 rounds |
1271 | | ASC( call, SHA512_Round) |
1272 | | AS2( sub edi, 8) |
1273 | | AS2( movdq2q mm0, xmm1) |
1274 | | AS2( paddq mm0, [ebx+eax*8+8]) |
1275 | | ASC( call, SHA512_Round) |
1276 | | // update indices and loop |
1277 | | AS2( add esi, 16) |
1278 | | AS2( add eax, 2) |
1279 | | AS2( sub edi, 8) |
1280 | | AS2( test eax, 7) |
1281 | | ASJ( jnz, 1, b) |
1282 | | // do housekeeping every 8 rounds |
1283 | | AS2( mov esi, 0xf) |
1284 | | AS2( and esi, eax) |
1285 | | AS2( lea esi, [esp+4+20*8+8+esi*8]) |
1286 | | AS2( add edi, 8*8) |
1287 | | AS2( cmp eax, 80) |
1288 | | ASJ( jne, 1, b) |
1289 | | |
1290 | | #define SSE2_CombineState(i) \ |
1291 | | AS2( movdqu xmm0, [edi+i*16])\ |
1292 | | AS2( paddq xmm0, [ecx+i*16])\ |
1293 | | AS2( movdqu [ecx+i*16], xmm0) |
1294 | | |
1295 | | SSE2_CombineState(0) |
1296 | | SSE2_CombineState(1) |
1297 | | SSE2_CombineState(2) |
1298 | | SSE2_CombineState(3) |
1299 | | |
1300 | | AS_POP_IF86( sp) |
1301 | | AS1( emms) |
1302 | | |
1303 | | #if defined(__GNUC__) |
1304 | | AS_POP_IF86( bx) |
1305 | | ATT_PREFIX |
1306 | | : |
1307 | | : "a" (SHA512_K), "c" (state), "d" (data) |
1308 | | : "%ebx", "%esi", "%edi", "memory", "cc" |
1309 | | #if (CRYPTOPP_BOOL_X64) |
1310 | | , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", |
1311 | | "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" |
1312 | | #endif |
1313 | | ); |
1314 | | #else |
1315 | | AS1( pop edi) |
1316 | | AS1( pop esi) |
1317 | | AS1( pop ebx) |
1318 | | AS1( ret) |
1319 | | #endif |
1320 | | } |
1321 | | |
1322 | | ANONYMOUS_NAMESPACE_END |
1323 | | |
1324 | | #endif // CRYPTOPP_SSE2_ASM_AVAILABLE |
1325 | | |
1326 | | ANONYMOUS_NAMESPACE_BEGIN |
1327 | | |
1328 | 667k | #define a(i) T[(0-i)&7] |
1329 | 667k | #define b(i) T[(1-i)&7] |
1330 | 667k | #define c(i) T[(2-i)&7] |
1331 | 54.1M | #define d(i) T[(3-i)&7] |
1332 | 667k | #define e(i) T[(4-i)&7] |
1333 | 667k | #define f(i) T[(5-i)&7] |
1334 | 667k | #define g(i) T[(6-i)&7] |
1335 | 160M | #define h(i) T[(7-i)&7] |
1336 | | |
1337 | 10.6M | #define blk0(i) (W[i]=data[i]) |
1338 | 42.7M | #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) |
1339 | | |
1340 | 53.4M | #define Ch(x,y,z) (z^(x&(y^z))) |
1341 | 53.4M | #define Maj(x,y,z) (y^((x^y)&(y^z))) |
1342 | | |
1343 | 42.7M | #define s0(x) (rotrConstant<1>(x)^rotrConstant<8>(x)^(x>>7)) |
1344 | 42.7M | #define s1(x) (rotrConstant<19>(x)^rotrConstant<61>(x)^(x>>6)) |
1345 | 53.4M | #define S0(x) (rotrConstant<28>(x)^rotrConstant<34>(x)^rotrConstant<39>(x)) |
1346 | 53.4M | #define S1(x) (rotrConstant<14>(x)^rotrConstant<18>(x)^rotrConstant<41>(x)) |
1347 | | |
1348 | 53.4M | #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\ |
1349 | 53.4M | (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)); |
1350 | | |
1351 | | void SHA512_HashBlock_CXX(word64 *state, const word64 *data) |
1352 | 667k | { |
1353 | 667k | CRYPTOPP_ASSERT(state); |
1354 | 667k | CRYPTOPP_ASSERT(data); |
1355 | | |
1356 | 667k | word64 W[16]={0}, T[8]; |
1357 | | |
1358 | | /* Copy context->state[] to working vars */ |
1359 | 667k | std::memcpy(T, state, sizeof(T)); |
1360 | | |
1361 | | /* 80 operations, partially loop unrolled */ |
1362 | 4.00M | for (unsigned int j=0; j<80; j+=16) |
1363 | 3.33M | { |
1364 | 3.33M | R( 0); R( 1); R( 2); R( 3); |
1365 | 3.33M | R( 4); R( 5); R( 6); R( 7); |
1366 | 3.33M | R( 8); R( 9); R(10); R(11); |
1367 | 3.33M | R(12); R(13); R(14); R(15); |
1368 | 3.33M | } |
1369 | | |
1370 | 667k | state[0] += a(0); |
1371 | 667k | state[1] += b(0); |
1372 | 667k | state[2] += c(0); |
1373 | 667k | state[3] += d(0); |
1374 | 667k | state[4] += e(0); |
1375 | 667k | state[5] += f(0); |
1376 | 667k | state[6] += g(0); |
1377 | 667k | state[7] += h(0); |
1378 | 667k | } |
1379 | | |
1380 | | ANONYMOUS_NAMESPACE_END |
1381 | | |
1382 | | void SHA512::Transform(word64 *state, const word64 *data) |
1383 | 667k | { |
1384 | 667k | CRYPTOPP_ASSERT(state); |
1385 | 667k | CRYPTOPP_ASSERT(data); |
1386 | | |
1387 | | #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86) |
1388 | | if (HasSSE2()) |
1389 | | { |
1390 | | SHA512_HashBlock_SSE2(state, data); |
1391 | | return; |
1392 | | } |
1393 | | #endif |
1394 | | #if CRYPTOGAMS_ARM_SHA512 |
1395 | | # if CRYPTOPP_ARM_NEON_AVAILABLE |
1396 | | if (HasNEON()) |
1397 | | { |
1398 | | # if (CRYPTOPP_LITTLE_ENDIAN) |
1399 | | word64 dataBuf[16]; |
1400 | | ByteReverse(dataBuf, data, SHA512::BLOCKSIZE); |
1401 | | cryptogams_sha512_block_data_order_neon(state, dataBuf, 1); |
1402 | | # else |
1403 | | cryptogams_sha512_block_data_order_neon(state, data, 1); |
1404 | | # endif |
1405 | | return; |
1406 | | } |
1407 | | else |
1408 | | # endif |
1409 | | if (HasARMv7()) |
1410 | | { |
1411 | | # if (CRYPTOPP_LITTLE_ENDIAN) |
1412 | | word64 dataBuf[16]; |
1413 | | ByteReverse(dataBuf, data, SHA512::BLOCKSIZE); |
1414 | | cryptogams_sha512_block_data_order(state, dataBuf, 1); |
1415 | | # else |
1416 | | cryptogams_sha512_block_data_order(state, data, 1); |
1417 | | # endif |
1418 | | return; |
1419 | | } |
1420 | | #endif |
1421 | | #if CRYPTOPP_POWER8_SHA_AVAILABLE |
1422 | | if (HasSHA512()) |
1423 | | { |
1424 | | SHA512_HashMultipleBlocks_POWER8(state, data, SHA512::BLOCKSIZE, BIG_ENDIAN_ORDER); |
1425 | | return; |
1426 | | } |
1427 | | #endif |
1428 | | |
1429 | 667k | SHA512_HashBlock_CXX(state, data); |
1430 | 667k | } |
1431 | | |
1432 | | #undef Ch |
1433 | | #undef Maj |
1434 | | |
1435 | | #undef s0 |
1436 | | #undef s1 |
1437 | | #undef S0 |
1438 | | #undef S1 |
1439 | | |
1440 | | #undef blk0 |
1441 | | #undef blk1 |
1442 | | #undef blk2 |
1443 | | |
1444 | | #undef R |
1445 | | |
1446 | | #undef a |
1447 | | #undef b |
1448 | | #undef c |
1449 | | #undef d |
1450 | | #undef e |
1451 | | #undef f |
1452 | | #undef g |
1453 | | #undef h |
1454 | | |
1455 | | NAMESPACE_END |
1456 | | |
1457 | | #endif // Not CRYPTOPP_GENERATE_X64_MASM |
1458 | | #endif // Not CRYPTOPP_IMPORTS |