/src/cryptopp/lsh512_avx.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // lsh.cpp - written and placed in the public domain by Jeffrey Walton |
2 | | // Based on the specification and source code provided by |
3 | | // Korea Internet & Security Agency (KISA) website. Also |
4 | | // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do |
5 | | // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do. |
6 | | |
7 | | // We are hitting some sort of GCC bug in the LSH AVX2 code path. |
8 | | // Clang is OK on the AVX2 code path. We believe it is GCC Issue |
9 | | // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It |
10 | | // makes using zeroupper a little tricky. |
11 | | |
12 | | #include "pch.h" |
13 | | #include "config.h" |
14 | | |
15 | | #include "lsh.h" |
16 | | #include "misc.h" |
17 | | |
18 | | // Squash MS LNK4221 and libtool warnings |
19 | | extern const char LSH512_AVX_FNAME[] = __FILE__; |
20 | | |
21 | | #if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE) |
22 | | |
23 | | #if defined(CRYPTOPP_AVX2_AVAILABLE) |
24 | | # include <emmintrin.h> |
25 | | # include <immintrin.h> |
26 | | #endif |
27 | | |
28 | | #if defined(CRYPTOPP_GCC_COMPATIBLE) |
29 | | # include <x86intrin.h> |
30 | | #endif |
31 | | |
32 | | ANONYMOUS_NAMESPACE_BEGIN |
33 | | |
34 | | /* LSH Constants */ |
35 | | |
36 | | const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256; |
37 | | // const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048; |
38 | | // const unsigned int LSH512_CV_BYTE_LEN = 128; |
39 | | const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64; |
40 | | |
41 | | // const unsigned int MSG_BLK_WORD_LEN = 32; |
42 | | const unsigned int CV_WORD_LEN = 16; |
43 | | const unsigned int CONST_WORD_LEN = 8; |
44 | | // const unsigned int HASH_VAL_MAX_WORD_LEN = 8; |
45 | | const unsigned int NUM_STEPS = 28; |
46 | | |
47 | | const unsigned int ROT_EVEN_ALPHA = 23; |
48 | | const unsigned int ROT_EVEN_BETA = 59; |
49 | | const unsigned int ROT_ODD_ALPHA = 7; |
50 | | const unsigned int ROT_ODD_BETA = 3; |
51 | | |
52 | | const unsigned int LSH_TYPE_512_512 = 0x0010040; |
53 | | const unsigned int LSH_TYPE_512_384 = 0x0010030; |
54 | | const unsigned int LSH_TYPE_512_256 = 0x0010020; |
55 | | const unsigned int LSH_TYPE_512_224 = 0x001001C; |
56 | | |
57 | | // const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384; |
58 | | // const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512; |
59 | | |
60 | | /* Error Code */ |
61 | | |
62 | | const unsigned int LSH_SUCCESS = 0x0; |
63 | | // const unsigned int LSH_ERR_NULL_PTR = 0x2401; |
64 | | // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402; |
65 | | const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403; |
66 | | const unsigned int LSH_ERR_INVALID_STATE = 0x2404; |
67 | | |
68 | | /* Index into our state array */ |
69 | | |
70 | | const unsigned int AlgorithmType = 80; |
71 | | const unsigned int RemainingBits = 81; |
72 | | |
73 | | NAMESPACE_END |
74 | | |
75 | | NAMESPACE_BEGIN(CryptoPP) |
76 | | NAMESPACE_BEGIN(LSH) |
77 | | |
78 | | // lsh512.cpp |
79 | | extern const word64 LSH512_IV224[CV_WORD_LEN]; |
80 | | extern const word64 LSH512_IV256[CV_WORD_LEN]; |
81 | | extern const word64 LSH512_IV384[CV_WORD_LEN]; |
82 | | extern const word64 LSH512_IV512[CV_WORD_LEN]; |
83 | | extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS]; |
84 | | |
85 | | NAMESPACE_END // LSH |
86 | | NAMESPACE_END // Crypto++ |
87 | | |
88 | | ANONYMOUS_NAMESPACE_BEGIN |
89 | | |
90 | | using CryptoPP::byte; |
91 | | using CryptoPP::word32; |
92 | | using CryptoPP::word64; |
93 | | using CryptoPP::rotlFixed; |
94 | | using CryptoPP::rotlConstant; |
95 | | |
96 | | using CryptoPP::GetBlock; |
97 | | using CryptoPP::LittleEndian; |
98 | | using CryptoPP::ConditionalByteReverse; |
99 | | using CryptoPP::LITTLE_ENDIAN_ORDER; |
100 | | |
101 | | using CryptoPP::LSH::LSH512_IV224; |
102 | | using CryptoPP::LSH::LSH512_IV256; |
103 | | using CryptoPP::LSH::LSH512_IV384; |
104 | | using CryptoPP::LSH::LSH512_IV512; |
105 | | using CryptoPP::LSH::LSH512_StepConstants; |
106 | | |
107 | | typedef byte lsh_u8; |
108 | | typedef word32 lsh_u32; |
109 | | typedef word64 lsh_u64; |
110 | | typedef word32 lsh_uint; |
111 | | typedef word32 lsh_err; |
112 | | typedef word32 lsh_type; |
113 | | |
114 | | struct LSH512_AVX2_Context |
115 | | { |
116 | | LSH512_AVX2_Context(word64* state, word64 algType, word64& remainingBitLength) : |
117 | | cv_l(state+0), cv_r(state+8), sub_msgs(state+16), |
118 | | last_block(reinterpret_cast<byte*>(state+48)), |
119 | | remain_databitlen(remainingBitLength), |
120 | 129k | alg_type(static_cast<lsh_type>(algType)) {} |
121 | | |
122 | | lsh_u64* cv_l; // start of our state block |
123 | | lsh_u64* cv_r; |
124 | | lsh_u64* sub_msgs; |
125 | | lsh_u8* last_block; |
126 | | lsh_u64& remain_databitlen; |
127 | | lsh_type alg_type; |
128 | | }; |
129 | | |
130 | | struct LSH512_AVX2_Internal |
131 | | { |
132 | | LSH512_AVX2_Internal(word64* state) : |
133 | | submsg_e_l(state+16), submsg_e_r(state+24), |
134 | 209k | submsg_o_l(state+32), submsg_o_r(state+40) { } |
135 | | |
136 | | lsh_u64* submsg_e_l; /* even left sub-message */ |
137 | | lsh_u64* submsg_e_r; /* even right sub-message */ |
138 | | lsh_u64* submsg_o_l; /* odd left sub-message */ |
139 | | lsh_u64* submsg_o_r; /* odd right sub-message */ |
140 | | }; |
141 | | |
142 | | // Zero the upper 128 bits of all YMM registers on exit. |
143 | | // It avoids AVX state transition penalties when saving state. |
144 | | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735 |
145 | | // makes using zeroupper a little tricky. |
146 | | |
147 | | struct AVX_Cleanup |
148 | | { |
149 | 129k | ~AVX_Cleanup() { |
150 | 129k | _mm256_zeroupper(); |
151 | 129k | } |
152 | | }; |
153 | | |
154 | | // const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 }; |
155 | | |
156 | | /* LSH AlgType Macro */ |
157 | | |
158 | 0 | inline bool LSH_IS_LSH512(lsh_uint val) { |
159 | 0 | return (val & 0xf0000) == 0x10000; |
160 | 0 | } |
161 | | |
162 | 17.8k | inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) { |
163 | 17.8k | return val >> 24; |
164 | 17.8k | } |
165 | | |
166 | 17.8k | inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) { |
167 | 17.8k | return val & 0xffff; |
168 | 17.8k | } |
169 | | |
170 | 0 | inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) { |
171 | 0 | return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val); |
172 | 0 | } |
173 | | |
174 | 0 | inline lsh_u64 loadLE64(lsh_u64 v) { |
175 | 0 | return ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v); |
176 | 0 | } |
177 | | |
178 | 0 | lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) { |
179 | 0 | return rotlFixed(x, r); |
180 | 0 | } |
181 | | |
182 | | // Original code relied upon unaligned lsh_u64 buffer |
183 | | inline void load_msg_blk(LSH512_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN]) |
184 | 209k | { |
185 | 209k | lsh_u64* submsg_e_l = i_state->submsg_e_l; |
186 | 209k | lsh_u64* submsg_e_r = i_state->submsg_e_r; |
187 | 209k | lsh_u64* submsg_o_l = i_state->submsg_o_l; |
188 | 209k | lsh_u64* submsg_o_r = i_state->submsg_o_r; |
189 | | |
190 | 209k | _mm256_storeu_si256(M256_CAST(submsg_e_l+0), |
191 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+0))); |
192 | 209k | _mm256_storeu_si256(M256_CAST(submsg_e_l+4), |
193 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+32))); |
194 | | |
195 | 209k | _mm256_storeu_si256(M256_CAST(submsg_e_r+0), |
196 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+64))); |
197 | 209k | _mm256_storeu_si256(M256_CAST(submsg_e_r+4), |
198 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+96))); |
199 | | |
200 | 209k | _mm256_storeu_si256(M256_CAST(submsg_o_l+0), |
201 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+128))); |
202 | 209k | _mm256_storeu_si256(M256_CAST(submsg_o_l+4), |
203 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+160))); |
204 | | |
205 | 209k | _mm256_storeu_si256(M256_CAST(submsg_o_r+0), |
206 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+192))); |
207 | 209k | _mm256_storeu_si256(M256_CAST(submsg_o_r+4), |
208 | 209k | _mm256_loadu_si256(CONST_M256_CAST(msgblk+224))); |
209 | 209k | } |
210 | | |
211 | | inline void msg_exp_even(LSH512_AVX2_Internal* i_state) |
212 | 2.93M | { |
213 | 2.93M | CRYPTOPP_ASSERT(i_state != NULLPTR); |
214 | | |
215 | 2.93M | lsh_u64* submsg_e_l = i_state->submsg_e_l; |
216 | 2.93M | lsh_u64* submsg_e_r = i_state->submsg_e_r; |
217 | 2.93M | lsh_u64* submsg_o_l = i_state->submsg_o_l; |
218 | 2.93M | lsh_u64* submsg_o_r = i_state->submsg_o_r; |
219 | | |
220 | 2.93M | _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi64( |
221 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), |
222 | 2.93M | _mm256_permute4x64_epi64( |
223 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), |
224 | 2.93M | _MM_SHUFFLE(1,0,2,3)))); |
225 | 2.93M | _mm256_storeu_si256(M256_CAST(submsg_e_l+4), _mm256_add_epi64( |
226 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)), |
227 | 2.93M | _mm256_permute4x64_epi64( |
228 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)), |
229 | 2.93M | _MM_SHUFFLE(2,1,0,3)))); |
230 | | |
231 | 2.93M | _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi64( |
232 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), |
233 | 2.93M | _mm256_permute4x64_epi64( |
234 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), |
235 | 2.93M | _MM_SHUFFLE(1,0,2,3)))); |
236 | 2.93M | _mm256_storeu_si256(M256_CAST(submsg_e_r+4), _mm256_add_epi64( |
237 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)), |
238 | 2.93M | _mm256_permute4x64_epi64( |
239 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)), |
240 | 2.93M | _MM_SHUFFLE(2,1,0,3)))); |
241 | 2.93M | } |
242 | | |
243 | | inline void msg_exp_odd(LSH512_AVX2_Internal* i_state) |
244 | 2.72M | { |
245 | 2.72M | CRYPTOPP_ASSERT(i_state != NULLPTR); |
246 | | |
247 | 2.72M | lsh_u64* submsg_e_l = i_state->submsg_e_l; |
248 | 2.72M | lsh_u64* submsg_e_r = i_state->submsg_e_r; |
249 | 2.72M | lsh_u64* submsg_o_l = i_state->submsg_o_l; |
250 | 2.72M | lsh_u64* submsg_o_r = i_state->submsg_o_r; |
251 | | |
252 | 2.72M | _mm256_storeu_si256(M256_CAST(submsg_o_l+0), |
253 | 2.72M | _mm256_add_epi64( |
254 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), |
255 | 2.72M | _mm256_permute4x64_epi64( |
256 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), |
257 | 2.72M | _MM_SHUFFLE(1,0,2,3)))); |
258 | 2.72M | _mm256_storeu_si256(M256_CAST(submsg_o_l+4), |
259 | 2.72M | _mm256_add_epi64( |
260 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)), |
261 | 2.72M | _mm256_permute4x64_epi64( |
262 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)), |
263 | 2.72M | _MM_SHUFFLE(2,1,0,3)))); |
264 | | |
265 | 2.72M | _mm256_storeu_si256(M256_CAST(submsg_o_r+0), |
266 | 2.72M | _mm256_add_epi64( |
267 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), |
268 | 2.72M | _mm256_permute4x64_epi64( |
269 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), |
270 | 2.72M | _MM_SHUFFLE(1,0,2,3)))); |
271 | 2.72M | _mm256_storeu_si256(M256_CAST(submsg_o_r+4), |
272 | 2.72M | _mm256_add_epi64( |
273 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)), |
274 | 2.72M | _mm256_permute4x64_epi64( |
275 | 2.72M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)), |
276 | 2.72M | _MM_SHUFFLE(2,1,0,3)))); |
277 | 2.72M | } |
278 | | |
279 | | inline void load_sc(const lsh_u64** p_const_v, size_t i) |
280 | 5.86M | { |
281 | 5.86M | *p_const_v = &LSH512_StepConstants[i]; |
282 | 5.86M | } |
283 | | |
284 | | inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state) |
285 | 3.14M | { |
286 | 3.14M | CRYPTOPP_ASSERT(i_state != NULLPTR); |
287 | | |
288 | 3.14M | lsh_u64* submsg_e_l = i_state->submsg_e_l; |
289 | 3.14M | lsh_u64* submsg_e_r = i_state->submsg_e_r; |
290 | | |
291 | 3.14M | _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256( |
292 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(cv_l)), |
293 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l)))); |
294 | 3.14M | _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256( |
295 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(cv_r)), |
296 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r)))); |
297 | | |
298 | 3.14M | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256( |
299 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), |
300 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)))); |
301 | 3.14M | _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256( |
302 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), |
303 | 3.14M | _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)))); |
304 | 3.14M | } |
305 | | |
306 | | inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state) |
307 | 2.93M | { |
308 | 2.93M | CRYPTOPP_ASSERT(i_state != NULLPTR); |
309 | | |
310 | 2.93M | lsh_u64* submsg_o_l = i_state->submsg_o_l; |
311 | 2.93M | lsh_u64* submsg_o_r = i_state->submsg_o_r; |
312 | | |
313 | 2.93M | _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256( |
314 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(cv_l)), |
315 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l)))); |
316 | 2.93M | _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256( |
317 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(cv_r)), |
318 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r)))); |
319 | | |
320 | 2.93M | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256( |
321 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), |
322 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)))); |
323 | 2.93M | _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256( |
324 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), |
325 | 2.93M | _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)))); |
326 | 2.93M | } |
327 | | |
328 | | inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8]) |
329 | 17.6M | { |
330 | 17.6M | _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi64( |
331 | 17.6M | _mm256_loadu_si256(CONST_M256_CAST(cv_l)), |
332 | 17.6M | _mm256_loadu_si256(CONST_M256_CAST(cv_r)))); |
333 | 17.6M | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_add_epi64( |
334 | 17.6M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), |
335 | 17.6M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)))); |
336 | 17.6M | } |
337 | | |
338 | | template <unsigned int R> |
339 | | inline void rotate_blk(lsh_u64 cv[8]) |
340 | 11.7M | { |
341 | 11.7M | _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256( |
342 | 11.7M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R), |
343 | 11.7M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R))); |
344 | 11.7M | _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256( |
345 | 11.7M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R), |
346 | 11.7M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R))); |
347 | 11.7M | } lsh512_avx.cpp:void (anonymous namespace)::rotate_blk<23u>(unsigned long*) Line | Count | Source | 340 | 2.93M | { | 341 | 2.93M | _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256( | 342 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R), | 343 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R))); | 344 | 2.93M | _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256( | 345 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R), | 346 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R))); | 347 | 2.93M | } |
lsh512_avx.cpp:void (anonymous namespace)::rotate_blk<59u>(unsigned long*) Line | Count | Source | 340 | 2.93M | { | 341 | 2.93M | _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256( | 342 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R), | 343 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R))); | 344 | 2.93M | _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256( | 345 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R), | 346 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R))); | 347 | 2.93M | } |
lsh512_avx.cpp:void (anonymous namespace)::rotate_blk<7u>(unsigned long*) Line | Count | Source | 340 | 2.93M | { | 341 | 2.93M | _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256( | 342 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R), | 343 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R))); | 344 | 2.93M | _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256( | 345 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R), | 346 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R))); | 347 | 2.93M | } |
lsh512_avx.cpp:void (anonymous namespace)::rotate_blk<3u>(unsigned long*) Line | Count | Source | 340 | 2.93M | { | 341 | 2.93M | _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256( | 342 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R), | 343 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R))); | 344 | 2.93M | _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256( | 345 | 2.93M | _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R), | 346 | 2.93M | _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R))); | 347 | 2.93M | } |
|
348 | | |
349 | | inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8]) |
350 | 5.86M | { |
351 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256( |
352 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_l)), |
353 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(const_v)))); |
354 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256( |
355 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), |
356 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(const_v+4)))); |
357 | 5.86M | } |
358 | | |
359 | | inline void rotate_msg_gamma(lsh_u64 cv_r[8]) |
360 | 5.86M | { |
361 | | // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 }; |
362 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+0), |
363 | 5.86M | _mm256_shuffle_epi8( |
364 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)), |
365 | 5.86M | _mm256_set_epi8( |
366 | 5.86M | /* hi lane */ 9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4, |
367 | 5.86M | /* lo lane */ 13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0))); |
368 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+4), |
369 | 5.86M | _mm256_shuffle_epi8( |
370 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), |
371 | 5.86M | _mm256_set_epi8( |
372 | 5.86M | /* hi lane */ 8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3, |
373 | 5.86M | /* lo lane */ 12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7))); |
374 | 5.86M | } |
375 | | |
376 | | inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8]) |
377 | 5.86M | { |
378 | 5.86M | __m256i temp[2]; |
379 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_permute4x64_epi64( |
380 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2))); |
381 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_permute4x64_epi64( |
382 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2))); |
383 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_permute4x64_epi64( |
384 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0))); |
385 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_permute4x64_epi64( |
386 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0))); |
387 | | |
388 | 5.86M | temp[0] = _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)); |
389 | 5.86M | temp[1] = _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)); |
390 | | |
391 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l+0), |
392 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_l+4))); |
393 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_l+4), |
394 | 5.86M | _mm256_loadu_si256(CONST_M256_CAST(cv_r+4))); |
395 | | |
396 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+0), temp[0]); |
397 | 5.86M | _mm256_storeu_si256(M256_CAST(cv_r+4), temp[1]); |
398 | 5.86M | } |
399 | | |
400 | | /* -------------------------------------------------------- * |
401 | | * step function |
402 | | * -------------------------------------------------------- */ |
403 | | |
404 | | template <unsigned int Alpha, unsigned int Beta> |
405 | | inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8]) |
406 | 5.86M | { |
407 | 5.86M | add_blk(cv_l, cv_r); |
408 | 5.86M | rotate_blk<Alpha>(cv_l); |
409 | 5.86M | xor_with_const(cv_l, const_v); |
410 | 5.86M | add_blk(cv_r, cv_l); |
411 | 5.86M | rotate_blk<Beta>(cv_r); |
412 | 5.86M | add_blk(cv_l, cv_r); |
413 | 5.86M | rotate_msg_gamma(cv_r); |
414 | 5.86M | } lsh512_avx.cpp:void (anonymous namespace)::mix<23u, 59u>(unsigned long*, unsigned long*, unsigned long const*) Line | Count | Source | 406 | 2.93M | { | 407 | 2.93M | add_blk(cv_l, cv_r); | 408 | 2.93M | rotate_blk<Alpha>(cv_l); | 409 | 2.93M | xor_with_const(cv_l, const_v); | 410 | 2.93M | add_blk(cv_r, cv_l); | 411 | 2.93M | rotate_blk<Beta>(cv_r); | 412 | 2.93M | add_blk(cv_l, cv_r); | 413 | 2.93M | rotate_msg_gamma(cv_r); | 414 | 2.93M | } |
lsh512_avx.cpp:void (anonymous namespace)::mix<7u, 3u>(unsigned long*, unsigned long*, unsigned long const*) Line | Count | Source | 406 | 2.93M | { | 407 | 2.93M | add_blk(cv_l, cv_r); | 408 | 2.93M | rotate_blk<Alpha>(cv_l); | 409 | 2.93M | xor_with_const(cv_l, const_v); | 410 | 2.93M | add_blk(cv_r, cv_l); | 411 | 2.93M | rotate_blk<Beta>(cv_r); | 412 | 2.93M | add_blk(cv_l, cv_r); | 413 | 2.93M | rotate_msg_gamma(cv_r); | 414 | 2.93M | } |
|
415 | | |
416 | | /* -------------------------------------------------------- * |
417 | | * compression function |
418 | | * -------------------------------------------------------- */ |
419 | | |
420 | | inline void compress(LSH512_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN]) |
421 | 209k | { |
422 | 209k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
423 | | |
424 | 209k | LSH512_AVX2_Internal s_state(ctx->cv_l); |
425 | 209k | LSH512_AVX2_Internal* i_state = &s_state; |
426 | | |
427 | 209k | const lsh_u64* const_v = NULL; |
428 | 209k | lsh_u64 *cv_l = ctx->cv_l; |
429 | 209k | lsh_u64 *cv_r = ctx->cv_r; |
430 | | |
431 | 209k | load_msg_blk(i_state, pdMsgBlk); |
432 | | |
433 | 209k | msg_add_even(cv_l, cv_r, i_state); |
434 | 209k | load_sc(&const_v, 0); |
435 | 209k | mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v); |
436 | 209k | word_perm(cv_l, cv_r); |
437 | | |
438 | 209k | msg_add_odd(cv_l, cv_r, i_state); |
439 | 209k | load_sc(&const_v, 8); |
440 | 209k | mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v); |
441 | 209k | word_perm(cv_l, cv_r); |
442 | | |
443 | 2.93M | for (size_t i = 1; i < NUM_STEPS / 2; i++) |
444 | 2.72M | { |
445 | 2.72M | msg_exp_even(i_state); |
446 | 2.72M | msg_add_even(cv_l, cv_r, i_state); |
447 | 2.72M | load_sc(&const_v, 16 * i); |
448 | 2.72M | mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v); |
449 | 2.72M | word_perm(cv_l, cv_r); |
450 | | |
451 | 2.72M | msg_exp_odd(i_state); |
452 | 2.72M | msg_add_odd(cv_l, cv_r, i_state); |
453 | 2.72M | load_sc(&const_v, 16 * i + 8); |
454 | 2.72M | mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v); |
455 | 2.72M | word_perm(cv_l, cv_r); |
456 | 2.72M | } |
457 | | |
458 | 209k | msg_exp_even(i_state); |
459 | 209k | msg_add_even(cv_l, cv_r, i_state); |
460 | 209k | } |
461 | | |
462 | | /* -------------------------------------------------------- */ |
463 | | |
464 | | inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16]) |
465 | 18.2k | { |
466 | | // The IV's are 32-byte aligned so we can use aligned loads. |
467 | 18.2k | _mm256_storeu_si256(M256_CAST(cv_l+0), |
468 | 18.2k | _mm256_load_si256(CONST_M256_CAST(iv+0))); |
469 | 18.2k | _mm256_storeu_si256(M256_CAST(cv_l+4), |
470 | 18.2k | _mm256_load_si256(CONST_M256_CAST(iv+4))); |
471 | | |
472 | 18.2k | _mm256_storeu_si256(M256_CAST(cv_r+0), |
473 | 18.2k | _mm256_load_si256(CONST_M256_CAST(iv+8))); |
474 | 18.2k | _mm256_storeu_si256(M256_CAST(cv_r+4), |
475 | 18.2k | _mm256_load_si256(CONST_M256_CAST(iv+12))); |
476 | 18.2k | } |
477 | | |
478 | | inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8]) |
479 | 0 | { |
480 | 0 | _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256()); |
481 | 0 | _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256()); |
482 | 0 | _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256()); |
483 | 0 | _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256()); |
484 | 0 | } |
485 | | |
486 | | inline void zero_submsgs(LSH512_AVX2_Context* ctx) |
487 | 18.2k | { |
488 | 18.2k | lsh_u64* sub_msgs = ctx->sub_msgs; |
489 | | |
490 | 18.2k | _mm256_storeu_si256(M256_CAST(sub_msgs+ 0), |
491 | 18.2k | _mm256_setzero_si256()); |
492 | 18.2k | _mm256_storeu_si256(M256_CAST(sub_msgs+ 4), |
493 | 18.2k | _mm256_setzero_si256()); |
494 | | |
495 | 18.2k | _mm256_storeu_si256(M256_CAST(sub_msgs+ 8), |
496 | 18.2k | _mm256_setzero_si256()); |
497 | 18.2k | _mm256_storeu_si256(M256_CAST(sub_msgs+12), |
498 | 18.2k | _mm256_setzero_si256()); |
499 | 18.2k | } |
500 | | |
501 | | inline void init224(LSH512_AVX2_Context* ctx) |
502 | 0 | { |
503 | 0 | CRYPTOPP_ASSERT(ctx != NULLPTR); |
504 | |
|
505 | 0 | zero_submsgs(ctx); |
506 | 0 | load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224); |
507 | 0 | } |
508 | | |
509 | | inline void init256(LSH512_AVX2_Context* ctx) |
510 | 5.48k | { |
511 | 5.48k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
512 | | |
513 | 5.48k | zero_submsgs(ctx); |
514 | 5.48k | load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256); |
515 | 5.48k | } |
516 | | |
517 | | inline void init384(LSH512_AVX2_Context* ctx) |
518 | 6.51k | { |
519 | 6.51k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
520 | | |
521 | 6.51k | zero_submsgs(ctx); |
522 | 6.51k | load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384); |
523 | 6.51k | } |
524 | | |
525 | | inline void init512(LSH512_AVX2_Context* ctx) |
526 | 6.27k | { |
527 | 6.27k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
528 | | |
529 | 6.27k | zero_submsgs(ctx); |
530 | 6.27k | load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512); |
531 | 6.27k | } |
532 | | |
533 | | /* -------------------------------------------------------- */ |
534 | | |
535 | | inline void fin(LSH512_AVX2_Context* ctx) |
536 | 17.8k | { |
537 | 17.8k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
538 | | |
539 | 17.8k | _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256( |
540 | 17.8k | _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)), |
541 | 17.8k | _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0)))); |
542 | | |
543 | 17.8k | _mm256_storeu_si256(M256_CAST(ctx->cv_l+4), _mm256_xor_si256( |
544 | 17.8k | _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+4)), |
545 | 17.8k | _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+4)))); |
546 | 17.8k | } |
547 | | |
548 | | /* -------------------------------------------------------- */ |
549 | | |
550 | | inline void get_hash(LSH512_AVX2_Context* ctx, lsh_u8* pbHashVal) |
551 | 17.8k | { |
552 | 17.8k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
553 | 17.8k | CRYPTOPP_ASSERT(ctx->alg_type != 0); |
554 | 17.8k | CRYPTOPP_ASSERT(pbHashVal != NULLPTR); |
555 | | |
556 | 17.8k | lsh_uint alg_type = ctx->alg_type; |
557 | 17.8k | lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type); |
558 | 17.8k | lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type); |
559 | | |
560 | | // Multiplying by sizeof(lsh_u8) looks odd... |
561 | 17.8k | std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len); |
562 | 17.8k | if (hash_val_bit_len){ |
563 | 0 | pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len); |
564 | 0 | } |
565 | 17.8k | } |
566 | | |
567 | | /* -------------------------------------------------------- */ |
568 | | |
569 | | lsh_err lsh512_init_avx2(LSH512_AVX2_Context* ctx) |
570 | 18.2k | { |
571 | 18.2k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
572 | 18.2k | CRYPTOPP_ASSERT(ctx->alg_type != 0); |
573 | | |
574 | 18.2k | lsh_u32 alg_type = ctx->alg_type; |
575 | 18.2k | const lsh_u64* const_v = NULL; |
576 | 18.2k | ctx->remain_databitlen = 0; |
577 | | |
578 | | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. |
579 | 18.2k | AVX_Cleanup cleanup; |
580 | | |
581 | 18.2k | switch (alg_type){ |
582 | 6.27k | case LSH_TYPE_512_512: |
583 | 6.27k | init512(ctx); |
584 | 6.27k | return LSH_SUCCESS; |
585 | 6.51k | case LSH_TYPE_512_384: |
586 | 6.51k | init384(ctx); |
587 | 6.51k | return LSH_SUCCESS; |
588 | 5.48k | case LSH_TYPE_512_256: |
589 | 5.48k | init256(ctx); |
590 | 5.48k | return LSH_SUCCESS; |
591 | 0 | case LSH_TYPE_512_224: |
592 | 0 | init224(ctx); |
593 | 0 | return LSH_SUCCESS; |
594 | 0 | default: |
595 | 0 | break; |
596 | 18.2k | } |
597 | | |
598 | 0 | lsh_u64* cv_l = ctx->cv_l; |
599 | 0 | lsh_u64* cv_r = ctx->cv_r; |
600 | |
|
601 | 0 | zero_iv(cv_l, cv_r); |
602 | 0 | cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN; |
603 | 0 | cv_l[1] = LSH_GET_HASHBIT(alg_type); |
604 | |
|
605 | 0 | for (size_t i = 0; i < NUM_STEPS / 2; i++) |
606 | 0 | { |
607 | | //Mix |
608 | 0 | load_sc(&const_v, i * 16); |
609 | 0 | mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v); |
610 | 0 | word_perm(cv_l, cv_r); |
611 | |
|
612 | 0 | load_sc(&const_v, i * 16 + 8); |
613 | 0 | mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v); |
614 | 0 | word_perm(cv_l, cv_r); |
615 | 0 | } |
616 | |
|
617 | 0 | return LSH_SUCCESS; |
618 | 18.2k | } |
619 | | |
620 | | lsh_err lsh512_update_avx2(LSH512_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen) |
621 | 93.3k | { |
622 | 93.3k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
623 | 93.3k | CRYPTOPP_ASSERT(data != NULLPTR); |
624 | 93.3k | CRYPTOPP_ASSERT(databitlen % 8 == 0); |
625 | 93.3k | CRYPTOPP_ASSERT(ctx->alg_type != 0); |
626 | | |
627 | | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. |
628 | 93.3k | AVX_Cleanup cleanup; |
629 | | |
630 | 93.3k | if (databitlen == 0){ |
631 | 47.1k | return LSH_SUCCESS; |
632 | 47.1k | } |
633 | | |
634 | | // We are byte oriented. tail bits will always be 0. |
635 | 46.2k | size_t databytelen = databitlen >> 3; |
636 | | // lsh_uint pos2 = databitlen & 0x7; |
637 | 46.2k | const size_t pos2 = 0; |
638 | | |
639 | 46.2k | size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3); |
640 | | // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7; |
641 | 46.2k | const size_t remain_msg_bit = 0; |
642 | | |
643 | 46.2k | if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){ |
644 | 0 | return LSH_ERR_INVALID_STATE; |
645 | 0 | } |
646 | 46.2k | if (remain_msg_bit > 0){ |
647 | 0 | return LSH_ERR_INVALID_DATABITLEN; |
648 | 0 | } |
649 | | |
650 | 46.2k | if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){ |
651 | 25.7k | std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen); |
652 | 25.7k | ctx->remain_databitlen += (lsh_uint)databitlen; |
653 | 25.7k | remain_msg_byte += (lsh_uint)databytelen; |
654 | 25.7k | if (pos2){ |
655 | 0 | ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff); |
656 | 0 | } |
657 | 25.7k | return LSH_SUCCESS; |
658 | 25.7k | } |
659 | | |
660 | 20.4k | if (remain_msg_byte > 0){ |
661 | 3.30k | size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte; |
662 | 3.30k | std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte); |
663 | 3.30k | compress(ctx, ctx->last_block); |
664 | 3.30k | data += more_byte; |
665 | 3.30k | databytelen -= more_byte; |
666 | 3.30k | remain_msg_byte = 0; |
667 | 3.30k | ctx->remain_databitlen = 0; |
668 | 3.30k | } |
669 | | |
670 | 208k | while (databytelen >= LSH512_MSG_BLK_BYTE_LEN) |
671 | 188k | { |
672 | | // This call to compress caused some trouble. |
673 | | // The data pointer can become unaligned in the |
674 | | // previous block. |
675 | 188k | compress(ctx, data); |
676 | 188k | data += LSH512_MSG_BLK_BYTE_LEN; |
677 | 188k | databytelen -= LSH512_MSG_BLK_BYTE_LEN; |
678 | 188k | } |
679 | | |
680 | 20.4k | if (databytelen > 0){ |
681 | 4.41k | std::memcpy(ctx->last_block, data, databytelen); |
682 | 4.41k | ctx->remain_databitlen = (lsh_uint)(databytelen << 3); |
683 | 4.41k | } |
684 | | |
685 | 20.4k | if (pos2){ |
686 | 0 | ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff); |
687 | 0 | ctx->remain_databitlen += pos2; |
688 | 0 | } |
689 | 20.4k | return LSH_SUCCESS; |
690 | 46.2k | } |
691 | | |
692 | | lsh_err lsh512_final_avx2(LSH512_AVX2_Context* ctx, lsh_u8* hashval) |
693 | 17.8k | { |
694 | 17.8k | CRYPTOPP_ASSERT(ctx != NULLPTR); |
695 | 17.8k | CRYPTOPP_ASSERT(hashval != NULLPTR); |
696 | | |
697 | | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. |
698 | 17.8k | AVX_Cleanup cleanup; |
699 | | |
700 | | // We are byte oriented. tail bits will always be 0. |
701 | 17.8k | size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3); |
702 | | // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7; |
703 | 17.8k | const size_t remain_msg_bit = 0; |
704 | | |
705 | 17.8k | if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){ |
706 | 0 | return LSH_ERR_INVALID_STATE; |
707 | 0 | } |
708 | | |
709 | 17.8k | if (remain_msg_bit){ |
710 | 0 | ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit)); |
711 | 0 | } |
712 | 17.8k | else{ |
713 | 17.8k | ctx->last_block[remain_msg_byte] = 0x80; |
714 | 17.8k | } |
715 | 17.8k | std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1); |
716 | | |
717 | 17.8k | compress(ctx, ctx->last_block); |
718 | | |
719 | 17.8k | fin(ctx); |
720 | 17.8k | get_hash(ctx, hashval); |
721 | | |
722 | 17.8k | return LSH_SUCCESS; |
723 | 17.8k | } |
724 | | |
725 | | ANONYMOUS_NAMESPACE_END |
726 | | |
727 | | NAMESPACE_BEGIN(CryptoPP) |
728 | | |
729 | | extern |
730 | | void LSH512_Base_Restart_AVX2(word64* state) |
731 | 18.2k | { |
732 | 18.2k | state[RemainingBits] = 0; |
733 | 18.2k | LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]); |
734 | 18.2k | lsh_err err = lsh512_init_avx2(&ctx); |
735 | | |
736 | 18.2k | if (err != LSH_SUCCESS) |
737 | 0 | throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_avx2 failed"); |
738 | 18.2k | } |
739 | | |
740 | | extern |
741 | | void LSH512_Base_Update_AVX2(word64* state, const byte *input, size_t size) |
742 | 93.3k | { |
743 | 93.3k | LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]); |
744 | 93.3k | lsh_err err = lsh512_update_avx2(&ctx, input, 8*size); |
745 | | |
746 | 93.3k | if (err != LSH_SUCCESS) |
747 | 0 | throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_avx2 failed"); |
748 | 93.3k | } |
749 | | |
750 | | extern |
751 | | void LSH512_Base_TruncatedFinal_AVX2(word64* state, byte *hash, size_t) |
752 | 17.8k | { |
753 | 17.8k | LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]); |
754 | 17.8k | lsh_err err = lsh512_final_avx2(&ctx, hash); |
755 | | |
756 | 17.8k | if (err != LSH_SUCCESS) |
757 | 0 | throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_avx2 failed"); |
758 | 17.8k | } |
759 | | |
760 | | NAMESPACE_END |
761 | | |
762 | | #endif // CRYPTOPP_AVX2_AVAILABLE |