Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/lsh512_sse.cpp
Line
Count
Source (jump to first uncovered line)
1
// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2
//           Based on the specification and source code provided by
3
//           Korea Internet & Security Agency (KISA) website. Also
4
//           see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5
//           and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7
// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8
// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9
// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10
// makes using zeroupper a little tricky.
11
12
#include "pch.h"
13
#include "config.h"
14
15
#include "lsh.h"
16
#include "misc.h"
17
18
// Squash MS LNK4221 and libtool warnings
19
extern const char LSH512_SSE_FNAME[] = __FILE__;
20
21
#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22
23
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
24
# include <emmintrin.h>
25
# include <tmmintrin.h>
26
#endif
27
28
#if defined(CRYPTOPP_XOP_AVAILABLE)
29
# include <ammintrin.h>
30
#endif
31
32
#if defined(CRYPTOPP_GCC_COMPATIBLE)
33
# include <x86intrin.h>
34
#endif
35
36
ANONYMOUS_NAMESPACE_BEGIN
37
38
/* LSH Constants */
39
40
const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
41
// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
42
// const unsigned int LSH512_CV_BYTE_LEN = 128;
43
const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
44
45
// const unsigned int MSG_BLK_WORD_LEN = 32;
46
const unsigned int CV_WORD_LEN = 16;
47
const unsigned int CONST_WORD_LEN = 8;
48
// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
49
const unsigned int NUM_STEPS = 28;
50
51
const unsigned int ROT_EVEN_ALPHA = 23;
52
const unsigned int ROT_EVEN_BETA = 59;
53
const unsigned int ROT_ODD_ALPHA = 7;
54
const unsigned int ROT_ODD_BETA = 3;
55
56
const unsigned int LSH_TYPE_512_512 = 0x0010040;
57
const unsigned int LSH_TYPE_512_384 = 0x0010030;
58
const unsigned int LSH_TYPE_512_256 = 0x0010020;
59
const unsigned int LSH_TYPE_512_224 = 0x001001C;
60
61
// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
62
// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
63
64
/* Error Code */
65
66
const unsigned int LSH_SUCCESS = 0x0;
67
// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68
// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69
const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70
const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71
72
/* Index into our state array */
73
74
const unsigned int AlgorithmType = 80;
75
const unsigned int RemainingBits = 81;
76
77
NAMESPACE_END
78
79
NAMESPACE_BEGIN(CryptoPP)
80
NAMESPACE_BEGIN(LSH)
81
82
// lsh512.cpp
83
extern const word64 LSH512_IV224[CV_WORD_LEN];
84
extern const word64 LSH512_IV256[CV_WORD_LEN];
85
extern const word64 LSH512_IV384[CV_WORD_LEN];
86
extern const word64 LSH512_IV512[CV_WORD_LEN];
87
extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
88
89
NAMESPACE_END  // LSH
90
NAMESPACE_END  // Crypto++
91
92
ANONYMOUS_NAMESPACE_BEGIN
93
94
using CryptoPP::byte;
95
using CryptoPP::word32;
96
using CryptoPP::word64;
97
using CryptoPP::rotlFixed;
98
using CryptoPP::rotlConstant;
99
100
using CryptoPP::GetBlock;
101
using CryptoPP::LittleEndian;
102
using CryptoPP::ConditionalByteReverse;
103
using CryptoPP::LITTLE_ENDIAN_ORDER;
104
105
using CryptoPP::LSH::LSH512_IV224;
106
using CryptoPP::LSH::LSH512_IV256;
107
using CryptoPP::LSH::LSH512_IV384;
108
using CryptoPP::LSH::LSH512_IV512;
109
using CryptoPP::LSH::LSH512_StepConstants;
110
111
typedef byte lsh_u8;
112
typedef word32 lsh_u32;
113
typedef word64 lsh_u64;
114
typedef word32 lsh_uint;
115
typedef word32 lsh_err;
116
typedef word32 lsh_type;
117
118
struct LSH512_SSSE3_Context
119
{
120
  LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
121
    cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
122
    last_block(reinterpret_cast<byte*>(state+48)),
123
    remain_databitlen(remainingBitLength),
124
0
    alg_type(static_cast<lsh_type>(algType)) {}
125
126
  lsh_u64* cv_l;  // start of our state block
127
  lsh_u64* cv_r;
128
  lsh_u64* sub_msgs;
129
  lsh_u8*  last_block;
130
  lsh_u64& remain_databitlen;
131
  lsh_type alg_type;
132
};
133
134
struct LSH512_SSSE3_Internal
135
{
136
  LSH512_SSSE3_Internal(word64* state) :
137
    submsg_e_l(state+16), submsg_e_r(state+24),
138
0
    submsg_o_l(state+32), submsg_o_r(state+40) { }
139
140
  lsh_u64* submsg_e_l; /* even left sub-message  */
141
  lsh_u64* submsg_e_r; /* even right sub-message */
142
  lsh_u64* submsg_o_l; /* odd left sub-message   */
143
  lsh_u64* submsg_o_r; /* odd right sub-message  */
144
};
145
146
// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
147
148
/* LSH AlgType Macro */
149
150
0
inline bool LSH_IS_LSH512(lsh_uint val) {
151
0
  return (val & 0xf0000) == 0x10000;
152
0
}
153
154
0
inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
155
0
  return val >> 24;
156
0
}
157
158
0
inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
159
0
  return val & 0xffff;
160
0
}
161
162
0
inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
163
0
  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
164
0
}
165
166
0
inline lsh_u64 loadLE64(lsh_u64 v) {
167
0
  return ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v);
168
0
}
169
170
0
lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
171
0
  return rotlFixed(x, r);
172
0
}
173
174
// Original code relied upon unaligned lsh_u64 buffer
175
inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
176
0
{
177
0
  lsh_u64* submsg_e_l = i_state->submsg_e_l;
178
0
  lsh_u64* submsg_e_r = i_state->submsg_e_r;
179
0
  lsh_u64* submsg_o_l = i_state->submsg_o_l;
180
0
  lsh_u64* submsg_o_r = i_state->submsg_o_r;
181
182
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
183
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
184
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+2),
185
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
186
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
187
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
188
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+6),
189
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
190
191
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
192
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
193
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+2),
194
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
195
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
196
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
197
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+6),
198
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
199
200
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
201
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
202
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+2),
203
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
204
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
205
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
206
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+6),
207
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
208
209
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
210
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
211
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+2),
212
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
213
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
214
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
215
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+6),
216
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
217
0
}
218
219
inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
220
0
{
221
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
222
223
0
  lsh_u64* submsg_e_l = i_state->submsg_e_l;
224
0
  lsh_u64* submsg_e_r = i_state->submsg_e_r;
225
0
  lsh_u64* submsg_o_l = i_state->submsg_o_l;
226
0
  lsh_u64* submsg_o_r = i_state->submsg_o_r;
227
228
0
  __m128i temp;
229
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
230
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
231
232
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
233
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
234
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
235
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
236
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
237
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
238
239
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
240
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
241
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
242
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
243
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
244
0
    temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
245
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
246
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
247
248
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
249
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
250
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
251
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
252
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
253
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
254
255
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
256
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
257
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
258
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
259
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
260
0
    temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
261
262
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
263
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
264
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
265
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
266
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
267
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
268
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
269
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
270
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
271
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
272
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
273
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
274
275
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
276
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
277
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
278
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
279
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
280
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
281
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
282
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
283
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
284
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
285
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
286
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
287
0
}
288
289
inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
290
0
{
291
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
292
293
0
  lsh_u64* submsg_e_l = i_state->submsg_e_l;
294
0
  lsh_u64* submsg_e_r = i_state->submsg_e_r;
295
0
  lsh_u64* submsg_o_l = i_state->submsg_o_l;
296
0
  lsh_u64* submsg_o_r = i_state->submsg_o_r;
297
298
0
  __m128i temp;
299
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
300
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
301
302
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
303
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
304
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
305
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
306
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
307
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
308
309
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
310
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
311
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
312
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
313
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
314
0
    temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
315
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
316
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
317
318
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
319
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
320
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
321
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
322
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
323
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
324
325
0
  temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
326
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
327
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
328
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
329
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
330
0
    temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
331
332
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
333
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
334
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
335
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
336
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
337
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
338
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
339
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
340
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
341
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
342
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
343
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
344
345
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
346
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
347
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
348
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
349
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
350
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
351
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
352
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
353
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
354
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
355
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
356
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
357
0
}
358
359
inline void load_sc(const lsh_u64** p_const_v, size_t i)
360
0
{
361
0
  *p_const_v = &LSH512_StepConstants[i];
362
0
}
363
364
inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
365
0
{
366
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
367
368
0
  lsh_u64* submsg_e_l = i_state->submsg_e_l;
369
0
  lsh_u64* submsg_e_r = i_state->submsg_e_r;
370
371
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
372
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
373
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
374
0
  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
375
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r)),
376
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
377
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
378
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
379
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
380
0
  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
381
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
382
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
383
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
384
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
385
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
386
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
387
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
388
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
389
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
390
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
391
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
392
0
  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
393
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
394
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
395
0
}
396
397
inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
398
0
{
399
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
400
401
0
  lsh_u64* submsg_o_l = i_state->submsg_o_l;
402
0
  lsh_u64* submsg_o_r = i_state->submsg_o_r;
403
404
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
405
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
406
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
407
0
  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
408
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r)),
409
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
410
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
411
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
412
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
413
0
  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
414
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
415
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
416
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
417
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
418
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
419
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
420
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
421
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
422
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
423
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
424
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
425
0
  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
426
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
427
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
428
0
}
429
430
inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
431
0
{
432
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
433
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
434
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r))));
435
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
436
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
437
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
438
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
439
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
440
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
441
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
442
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
443
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
444
0
}
445
446
template <unsigned int R>
447
inline void rotate_blk(lsh_u64 cv[8])
448
0
{
449
#if defined(CRYPTOPP_XOP_AVAILABLE)
450
  _mm_storeu_si128(M128_CAST(cv),
451
    _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
452
  _mm_storeu_si128(M128_CAST(cv+2),
453
    _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
454
  _mm_storeu_si128(M128_CAST(cv+4),
455
    _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
456
  _mm_storeu_si128(M128_CAST(cv+6),
457
    _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
458
459
#else
460
0
  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
461
0
    _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
462
0
    _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
463
0
  _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
464
0
    _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
465
0
    _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
466
0
  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
467
0
    _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
468
0
    _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
469
0
  _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
470
0
    _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
471
0
    _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
472
0
#endif
473
0
}
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::rotate_blk<23u>(unsigned long*)
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::rotate_blk<59u>(unsigned long*)
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::rotate_blk<7u>(unsigned long*)
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::rotate_blk<3u>(unsigned long*)
474
475
inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
476
0
{
477
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
478
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
479
0
    _mm_loadu_si128(CONST_M128_CAST(const_v))));
480
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
481
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
482
0
    _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
483
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
484
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
485
0
    _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
486
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
487
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
488
0
    _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
489
0
}
490
491
inline void rotate_msg_gamma(lsh_u64 cv_r[8])
492
0
{
493
  // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
494
0
  _mm_storeu_si128(M128_CAST(cv_r+0),
495
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
496
0
      _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
497
0
  _mm_storeu_si128(M128_CAST(cv_r+2),
498
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
499
0
      _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
500
501
0
  _mm_storeu_si128(M128_CAST(cv_r+4),
502
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
503
0
      _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
504
0
  _mm_storeu_si128(M128_CAST(cv_r+6),
505
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
506
0
      _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
507
0
}
508
509
inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
510
0
{
511
0
  __m128i temp[2];
512
0
  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
513
0
  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
514
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
515
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
516
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
517
0
    temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
518
519
0
  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
520
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
521
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
522
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
523
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
524
0
    temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
525
0
  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
526
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
527
528
0
  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
529
0
  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
530
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
531
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
532
0
  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
533
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
534
0
  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
535
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
536
537
0
  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
538
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
539
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
540
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
541
0
  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
542
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
543
544
0
  temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
545
0
  temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
546
547
0
  _mm_storeu_si128(M128_CAST(cv_l+0),
548
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
549
0
  _mm_storeu_si128(M128_CAST(cv_l+2),
550
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
551
0
  _mm_storeu_si128(M128_CAST(cv_l+4),
552
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
553
0
  _mm_storeu_si128(M128_CAST(cv_l+6),
554
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
555
0
  _mm_storeu_si128(M128_CAST(cv_r+4),
556
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
557
0
  _mm_storeu_si128(M128_CAST(cv_r+6),
558
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
559
560
0
  _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
561
0
  _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
562
0
}
563
564
/* -------------------------------------------------------- *
565
* step function
566
* -------------------------------------------------------- */
567
568
template <unsigned int Alpha, unsigned int Beta>
569
inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
570
0
{
571
0
  add_blk(cv_l, cv_r);
572
0
  rotate_blk<Alpha>(cv_l);
573
0
  xor_with_const(cv_l, const_v);
574
0
  add_blk(cv_r, cv_l);
575
0
  rotate_blk<Beta>(cv_r);
576
0
  add_blk(cv_l, cv_r);
577
0
  rotate_msg_gamma(cv_r);
578
0
}
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::mix<23u, 59u>(unsigned long*, unsigned long*, unsigned long const*)
Unexecuted instantiation: lsh512_sse.cpp:void (anonymous namespace)::mix<7u, 3u>(unsigned long*, unsigned long*, unsigned long const*)
579
580
/* -------------------------------------------------------- *
581
* compression function
582
* -------------------------------------------------------- */
583
584
inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
585
0
{
586
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
587
588
0
  LSH512_SSSE3_Internal  s_state(ctx->cv_l);
589
0
  LSH512_SSSE3_Internal* i_state = &s_state;
590
591
0
  const lsh_u64* const_v = NULL;
592
0
  lsh_u64 *cv_l = ctx->cv_l;
593
0
  lsh_u64 *cv_r = ctx->cv_r;
594
595
0
  load_msg_blk(i_state, pdMsgBlk);
596
597
0
  msg_add_even(cv_l, cv_r, i_state);
598
0
  load_sc(&const_v, 0);
599
0
  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
600
0
  word_perm(cv_l, cv_r);
601
602
0
  msg_add_odd(cv_l, cv_r, i_state);
603
0
  load_sc(&const_v, 8);
604
0
  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
605
0
  word_perm(cv_l, cv_r);
606
607
0
  for (size_t i = 1; i < NUM_STEPS / 2; i++)
608
0
  {
609
0
    msg_exp_even(i_state);
610
0
    msg_add_even(cv_l, cv_r, i_state);
611
0
    load_sc(&const_v, 16 * i);
612
0
    mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
613
0
    word_perm(cv_l, cv_r);
614
615
0
    msg_exp_odd(i_state);
616
0
    msg_add_odd(cv_l, cv_r, i_state);
617
0
    load_sc(&const_v, 16 * i + 8);
618
0
    mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
619
0
    word_perm(cv_l, cv_r);
620
0
  }
621
622
0
  msg_exp_even(i_state);
623
0
  msg_add_even(cv_l, cv_r, i_state);
624
0
}
625
626
/* -------------------------------------------------------- */
627
628
inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
629
0
{
630
  // The IV's are 32-byte aligned so we can use aligned loads.
631
0
  _mm_storeu_si128(M128_CAST(cv_l+0),
632
0
    _mm_load_si128(CONST_M128_CAST(iv+0)));
633
0
  _mm_storeu_si128(M128_CAST(cv_l+2),
634
0
    _mm_load_si128(CONST_M128_CAST(iv+2)));
635
0
  _mm_storeu_si128(M128_CAST(cv_l+4),
636
0
    _mm_load_si128(CONST_M128_CAST(iv+4)));
637
0
  _mm_storeu_si128(M128_CAST(cv_l+6),
638
0
    _mm_load_si128(CONST_M128_CAST(iv+6)));
639
0
  _mm_storeu_si128(M128_CAST(cv_r+0),
640
0
    _mm_load_si128(CONST_M128_CAST(iv+8)));
641
0
  _mm_storeu_si128(M128_CAST(cv_r+2),
642
0
    _mm_load_si128(CONST_M128_CAST(iv+10)));
643
0
  _mm_storeu_si128(M128_CAST(cv_r+4),
644
0
    _mm_load_si128(CONST_M128_CAST(iv+12)));
645
0
  _mm_storeu_si128(M128_CAST(cv_r+6),
646
0
    _mm_load_si128(CONST_M128_CAST(iv+14)));
647
0
}
648
649
inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
650
0
{
651
0
  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
652
0
  _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
653
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
654
0
  _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
655
0
  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
656
0
  _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
657
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
658
0
  _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
659
0
}
660
661
inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
662
0
{
663
0
  lsh_u64* sub_msgs = ctx->sub_msgs;
664
665
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
666
0
    _mm_setzero_si128());
667
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
668
0
    _mm_setzero_si128());
669
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
670
0
    _mm_setzero_si128());
671
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
672
0
    _mm_setzero_si128());
673
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
674
0
    _mm_setzero_si128());
675
0
  _mm_storeu_si128(M128_CAST(sub_msgs+10),
676
0
    _mm_setzero_si128());
677
0
  _mm_storeu_si128(M128_CAST(sub_msgs+12),
678
0
    _mm_setzero_si128());
679
0
  _mm_storeu_si128(M128_CAST(sub_msgs+14),
680
0
    _mm_setzero_si128());
681
0
}
682
683
inline void init224(LSH512_SSSE3_Context* ctx)
684
0
{
685
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
686
687
0
  zero_submsgs(ctx);
688
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
689
0
}
690
691
inline void init256(LSH512_SSSE3_Context* ctx)
692
0
{
693
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
694
695
0
  zero_submsgs(ctx);
696
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
697
0
}
698
699
inline void init384(LSH512_SSSE3_Context* ctx)
700
0
{
701
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
702
703
0
  zero_submsgs(ctx);
704
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
705
0
}
706
707
inline void init512(LSH512_SSSE3_Context* ctx)
708
0
{
709
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
710
711
0
  zero_submsgs(ctx);
712
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
713
0
}
714
715
/* -------------------------------------------------------- */
716
717
inline void fin(LSH512_SSSE3_Context* ctx)
718
0
{
719
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
720
721
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
722
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
723
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
724
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
725
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
726
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
727
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
728
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
729
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
730
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
731
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
732
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
733
0
}
734
735
/* -------------------------------------------------------- */
736
737
inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
738
0
{
739
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
740
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
741
0
  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
742
743
0
  lsh_uint alg_type = ctx->alg_type;
744
0
  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
745
0
  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
746
747
  // Multiplying by sizeof(lsh_u8) looks odd...
748
0
  std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
749
0
  if (hash_val_bit_len){
750
0
    pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
751
0
  }
752
0
}
753
754
/* -------------------------------------------------------- */
755
756
lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
757
0
{
758
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
759
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
760
761
0
  lsh_u32 alg_type = ctx->alg_type;
762
0
  const lsh_u64* const_v = NULL;
763
0
  ctx->remain_databitlen = 0;
764
765
0
  switch (alg_type){
766
0
  case LSH_TYPE_512_512:
767
0
    init512(ctx);
768
0
    return LSH_SUCCESS;
769
0
  case LSH_TYPE_512_384:
770
0
    init384(ctx);
771
0
    return LSH_SUCCESS;
772
0
  case LSH_TYPE_512_256:
773
0
    init256(ctx);
774
0
    return LSH_SUCCESS;
775
0
  case LSH_TYPE_512_224:
776
0
    init224(ctx);
777
0
    return LSH_SUCCESS;
778
0
  default:
779
0
    break;
780
0
  }
781
782
0
  lsh_u64* cv_l = ctx->cv_l;
783
0
  lsh_u64* cv_r = ctx->cv_r;
784
785
0
  zero_iv(cv_l, cv_r);
786
0
  cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
787
0
  cv_l[1] = LSH_GET_HASHBIT(alg_type);
788
789
0
  for (size_t i = 0; i < NUM_STEPS / 2; i++)
790
0
  {
791
    //Mix
792
0
    load_sc(&const_v, i * 16);
793
0
    mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
794
0
    word_perm(cv_l, cv_r);
795
796
0
    load_sc(&const_v, i * 16 + 8);
797
0
    mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
798
0
    word_perm(cv_l, cv_r);
799
0
  }
800
801
0
  return LSH_SUCCESS;
802
0
}
803
804
lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
805
0
{
806
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
807
0
  CRYPTOPP_ASSERT(data != NULLPTR);
808
0
  CRYPTOPP_ASSERT(databitlen % 8 == 0);
809
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
810
811
0
  if (databitlen == 0){
812
0
    return LSH_SUCCESS;
813
0
  }
814
815
  // We are byte oriented. tail bits will always be 0.
816
0
  size_t databytelen = databitlen >> 3;
817
  // lsh_uint pos2 = databitlen & 0x7;
818
0
  const size_t pos2 = 0;
819
820
0
  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
821
  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
822
0
  const size_t remain_msg_bit = 0;
823
824
0
  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
825
0
    return LSH_ERR_INVALID_STATE;
826
0
  }
827
0
  if (remain_msg_bit > 0){
828
0
    return LSH_ERR_INVALID_DATABITLEN;
829
0
  }
830
831
0
  if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
832
0
    std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
833
0
    ctx->remain_databitlen += (lsh_uint)databitlen;
834
0
    remain_msg_byte += (lsh_uint)databytelen;
835
0
    if (pos2){
836
0
      ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
837
0
    }
838
0
    return LSH_SUCCESS;
839
0
  }
840
841
0
  if (remain_msg_byte > 0){
842
0
    size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
843
0
    std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
844
0
    compress(ctx, ctx->last_block);
845
0
    data += more_byte;
846
0
    databytelen -= more_byte;
847
0
    remain_msg_byte = 0;
848
0
    ctx->remain_databitlen = 0;
849
0
  }
850
851
0
  while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
852
0
  {
853
    // This call to compress caused some trouble.
854
    // The data pointer can become unaligned in the
855
    // previous block.
856
0
    compress(ctx, data);
857
0
    data += LSH512_MSG_BLK_BYTE_LEN;
858
0
    databytelen -= LSH512_MSG_BLK_BYTE_LEN;
859
0
  }
860
861
0
  if (databytelen > 0){
862
0
    std::memcpy(ctx->last_block, data, databytelen);
863
0
    ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
864
0
  }
865
866
0
  if (pos2){
867
0
    ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
868
0
    ctx->remain_databitlen += pos2;
869
0
  }
870
0
  return LSH_SUCCESS;
871
0
}
872
873
lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
874
0
{
875
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
876
0
  CRYPTOPP_ASSERT(hashval != NULLPTR);
877
878
  // We are byte oriented. tail bits will always be 0.
879
0
  size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
880
  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
881
0
  const size_t remain_msg_bit = 0;
882
883
0
  if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
884
0
    return LSH_ERR_INVALID_STATE;
885
0
  }
886
887
0
  if (remain_msg_bit){
888
0
    ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
889
0
  }
890
0
  else{
891
0
    ctx->last_block[remain_msg_byte] = 0x80;
892
0
  }
893
0
  std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
894
895
0
  compress(ctx, ctx->last_block);
896
897
0
  fin(ctx);
898
0
  get_hash(ctx, hashval);
899
900
0
  return LSH_SUCCESS;
901
0
}
902
903
ANONYMOUS_NAMESPACE_END
904
905
NAMESPACE_BEGIN(CryptoPP)
906
907
extern
908
void LSH512_Base_Restart_SSSE3(word64* state)
909
0
{
910
0
  state[RemainingBits] = 0;
911
0
  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
912
0
  lsh_err err = lsh512_init_ssse3(&ctx);
913
914
0
  if (err != LSH_SUCCESS)
915
0
    throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
916
0
}
917
918
extern
919
void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
920
0
{
921
0
  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
922
0
  lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
923
924
0
  if (err != LSH_SUCCESS)
925
0
    throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
926
0
}
927
928
extern
929
void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
930
0
{
931
0
  LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
932
0
  lsh_err err = lsh512_final_ssse3(&ctx, hash);
933
934
0
  if (err != LSH_SUCCESS)
935
0
    throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
936
0
}
937
938
NAMESPACE_END
939
940
#endif  // CRYPTOPP_SSSE3_AVAILABLE