Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/lsh256_sse.cpp
Line
Count
Source (jump to first uncovered line)
1
// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2
//           Based on the specification and source code provided by
3
//           Korea Internet & Security Agency (KISA) website. Also
4
//           see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5
//           and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7
// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8
// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9
// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10
// makes using zeroupper a little tricky.
11
12
#include "pch.h"
13
#include "config.h"
14
15
#include "lsh.h"
16
#include "cpu.h"
17
#include "misc.h"
18
19
// Squash MS LNK4221 and libtool warnings
20
extern const char LSH256_SSE_FNAME[] = __FILE__;
21
22
#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23
24
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
25
# include <emmintrin.h>
26
# include <tmmintrin.h>
27
#endif
28
29
#if defined(CRYPTOPP_XOP_AVAILABLE)
30
# include <ammintrin.h>
31
#endif
32
33
#if defined(CRYPTOPP_GCC_COMPATIBLE)
34
# include <x86intrin.h>
35
#endif
36
37
ANONYMOUS_NAMESPACE_BEGIN
38
39
/* LSH Constants */
40
41
const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
42
// const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
43
// const unsigned int LSH256_CV_BYTE_LEN = 64;
44
const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
45
46
// const unsigned int MSG_BLK_WORD_LEN = 32;
47
const unsigned int CV_WORD_LEN = 16;
48
const unsigned int CONST_WORD_LEN = 8;
49
// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
50
// const unsigned int WORD_BIT_LEN = 32;
51
const unsigned int NUM_STEPS = 26;
52
53
const unsigned int ROT_EVEN_ALPHA = 29;
54
const unsigned int ROT_EVEN_BETA = 1;
55
const unsigned int ROT_ODD_ALPHA = 5;
56
const unsigned int ROT_ODD_BETA = 17;
57
58
const unsigned int LSH_TYPE_256_256 = 0x0000020;
59
const unsigned int LSH_TYPE_256_224 = 0x000001C;
60
61
// const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
62
// const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
63
64
/* Error Code */
65
66
const unsigned int LSH_SUCCESS = 0x0;
67
// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
68
// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
69
const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
70
const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
71
72
/* Index into our state array */
73
74
const unsigned int AlgorithmType = 80;
75
const unsigned int RemainingBits = 81;
76
77
NAMESPACE_END
78
79
NAMESPACE_BEGIN(CryptoPP)
80
NAMESPACE_BEGIN(LSH)
81
82
// lsh256.cpp
83
extern const word32 LSH256_IV224[CV_WORD_LEN];
84
extern const word32 LSH256_IV256[CV_WORD_LEN];
85
extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86
87
NAMESPACE_END  // LSH
88
NAMESPACE_END  // Crypto++
89
90
ANONYMOUS_NAMESPACE_BEGIN
91
92
using CryptoPP::byte;
93
using CryptoPP::word32;
94
using CryptoPP::rotlFixed;
95
using CryptoPP::rotlConstant;
96
97
using CryptoPP::GetBlock;
98
using CryptoPP::LittleEndian;
99
using CryptoPP::ConditionalByteReverse;
100
using CryptoPP::LITTLE_ENDIAN_ORDER;
101
102
typedef byte lsh_u8;
103
typedef word32 lsh_u32;
104
typedef word32 lsh_uint;
105
typedef word32 lsh_err;
106
typedef word32 lsh_type;
107
108
using CryptoPP::LSH::LSH256_IV224;
109
using CryptoPP::LSH::LSH256_IV256;
110
using CryptoPP::LSH::LSH256_StepConstants;
111
112
struct LSH256_SSSE3_Context
113
{
114
  LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
115
    cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
116
    last_block(reinterpret_cast<byte*>(state+48)),
117
    remain_databitlen(remainingBitLength),
118
0
    alg_type(static_cast<lsh_type>(algType)) {}
119
120
  lsh_u32* cv_l;  // start of our state block
121
  lsh_u32* cv_r;
122
  lsh_u32* sub_msgs;
123
  lsh_u8*  last_block;
124
  lsh_u32& remain_databitlen;
125
  lsh_type alg_type;
126
};
127
128
struct LSH256_SSSE3_Internal
129
{
130
  LSH256_SSSE3_Internal(word32* state) :
131
    submsg_e_l(state+16), submsg_e_r(state+24),
132
0
    submsg_o_l(state+32), submsg_o_r(state+40) { }
133
134
  lsh_u32* submsg_e_l; /* even left sub-message  */
135
  lsh_u32* submsg_e_r; /* even right sub-message */
136
  lsh_u32* submsg_o_l; /* odd left sub-message   */
137
  lsh_u32* submsg_o_r; /* odd right sub-message  */
138
};
139
140
// const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
141
142
/* LSH AlgType Macro */
143
144
0
inline bool LSH_IS_LSH512(lsh_uint val) {
145
0
  return (val & 0xf0000) == 0;
146
0
}
147
148
0
inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
149
0
  return val >> 24;
150
0
}
151
152
0
inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
153
0
  return val & 0xffff;
154
0
}
155
156
0
inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
157
0
  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
158
0
}
159
160
0
inline lsh_u32 loadLE32(lsh_u32 v) {
161
0
  return ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v);
162
0
}
163
164
0
lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
165
0
  return rotlFixed(x, r);
166
0
}
167
168
// Original code relied upon unaligned lsh_u32 buffer
169
inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
170
0
{
171
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
172
0
  lsh_u32* submsg_e_l = i_state->submsg_e_l;
173
0
  lsh_u32* submsg_e_r = i_state->submsg_e_r;
174
0
  lsh_u32* submsg_o_l = i_state->submsg_o_l;
175
0
  lsh_u32* submsg_o_r = i_state->submsg_o_r;
176
177
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
178
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
179
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
180
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
181
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
182
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
183
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
184
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
185
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
186
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
187
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
188
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
189
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
190
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
191
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
192
0
    _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
193
0
}
194
195
inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
196
0
{
197
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
198
199
0
  lsh_u32* submsg_e_l = i_state->submsg_e_l;
200
0
  lsh_u32* submsg_e_r = i_state->submsg_e_r;
201
0
  lsh_u32* submsg_o_l = i_state->submsg_o_l;
202
0
  lsh_u32* submsg_o_r = i_state->submsg_o_r;
203
204
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
205
0
    _mm_shuffle_epi32(
206
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
207
0
    _mm_shuffle_epi32(
208
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
209
210
0
  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
211
0
    _mm_shuffle_epi32(
212
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
213
0
    _mm_shuffle_epi32(
214
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
215
216
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
217
0
    _mm_shuffle_epi32(
218
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
219
0
    _mm_shuffle_epi32(
220
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
221
222
0
  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
223
0
    _mm_shuffle_epi32(
224
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
225
0
    _mm_shuffle_epi32(
226
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
227
0
}
228
229
inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
230
0
{
231
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
232
233
0
  lsh_u32* submsg_e_l = i_state->submsg_e_l;
234
0
  lsh_u32* submsg_e_r = i_state->submsg_e_r;
235
0
  lsh_u32* submsg_o_l = i_state->submsg_o_l;
236
0
  lsh_u32* submsg_o_r = i_state->submsg_o_r;
237
238
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
239
0
    _mm_shuffle_epi32(
240
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
241
0
    _mm_shuffle_epi32(
242
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
243
244
0
  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
245
0
    _mm_shuffle_epi32(
246
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
247
0
    _mm_shuffle_epi32(
248
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
249
250
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
251
0
    _mm_shuffle_epi32(
252
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
253
0
    _mm_shuffle_epi32(
254
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
255
256
0
  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
257
0
    _mm_shuffle_epi32(
258
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
259
0
    _mm_shuffle_epi32(
260
0
      _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
261
0
}
262
263
inline void load_sc(const lsh_u32** p_const_v, size_t i)
264
0
{
265
0
  CRYPTOPP_ASSERT(p_const_v != NULLPTR);
266
267
0
  *p_const_v = &LSH256_StepConstants[i];
268
0
}
269
270
inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
271
0
{
272
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
273
274
0
  lsh_u32* submsg_e_l = i_state->submsg_e_l;
275
0
  lsh_u32* submsg_e_r = i_state->submsg_e_r;
276
277
0
  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
278
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
279
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
280
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
281
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
282
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
283
0
  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
284
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
285
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
286
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
287
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
288
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
289
0
}
290
291
inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
292
0
{
293
0
  CRYPTOPP_ASSERT(i_state != NULLPTR);
294
295
0
  lsh_u32* submsg_o_l = i_state->submsg_o_l;
296
0
  lsh_u32* submsg_o_r = i_state->submsg_o_r;
297
298
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
299
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
300
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
301
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
302
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
303
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
304
0
  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
305
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r)),
306
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
307
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
308
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
309
0
    _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
310
0
}
311
312
inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
313
0
{
314
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
315
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
316
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r))));
317
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
318
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
319
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
320
0
}
321
322
template <unsigned int R>
323
inline void rotate_blk(lsh_u32 cv[8])
324
0
{
325
#if defined(CRYPTOPP_XOP_AVAILABLE)
326
  _mm_storeu_si128(M128_CAST(cv),
327
    _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
328
  _mm_storeu_si128(M128_CAST(cv+4),
329
    _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
330
#else
331
0
  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
332
0
    _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
333
0
    _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
334
0
  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
335
0
    _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
336
0
    _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
337
0
#endif
338
0
}
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::rotate_blk<29u>(unsigned int*)
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::rotate_blk<1u>(unsigned int*)
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::rotate_blk<5u>(unsigned int*)
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::rotate_blk<17u>(unsigned int*)
339
340
inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
341
0
{
342
0
  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
343
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l)),
344
0
    _mm_loadu_si128(CONST_M128_CAST(const_v))));
345
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
346
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
347
0
    _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
348
0
}
349
350
inline void rotate_msg_gamma(lsh_u32 cv_r[8])
351
0
{
352
  // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
353
0
  _mm_storeu_si128(M128_CAST(cv_r+0),
354
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
355
0
      _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
356
0
  _mm_storeu_si128(M128_CAST(cv_r+4),
357
0
    _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
358
0
      _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
359
0
}
360
361
inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
362
0
{
363
0
  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
364
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
365
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
366
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
367
0
  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
368
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
369
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
370
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
371
372
0
  __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
373
0
  _mm_storeu_si128(M128_CAST(cv_l+0),
374
0
    _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
375
0
  _mm_storeu_si128(M128_CAST(cv_l+4),
376
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
377
0
  _mm_storeu_si128(M128_CAST(cv_r+4),
378
0
    _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
379
0
  _mm_storeu_si128(M128_CAST(cv_r+0), temp);
380
0
}
381
382
/* -------------------------------------------------------- *
383
* step function
384
* -------------------------------------------------------- */
385
386
template <unsigned int Alpha, unsigned int Beta>
387
inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
388
0
{
389
0
  add_blk(cv_l, cv_r);
390
0
  rotate_blk<Alpha>(cv_l);
391
0
  xor_with_const(cv_l, const_v);
392
0
  add_blk(cv_r, cv_l);
393
0
  rotate_blk<Beta>(cv_r);
394
0
  add_blk(cv_l, cv_r);
395
0
  rotate_msg_gamma(cv_r);
396
0
}
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::mix<29u, 1u>(unsigned int*, unsigned int*, unsigned int const*)
Unexecuted instantiation: lsh256_sse.cpp:void (anonymous namespace)::mix<5u, 17u>(unsigned int*, unsigned int*, unsigned int const*)
397
398
/* -------------------------------------------------------- *
399
* compression function
400
* -------------------------------------------------------- */
401
402
inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
403
0
{
404
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
405
406
0
  LSH256_SSSE3_Internal  s_state(ctx->cv_l);
407
0
  LSH256_SSSE3_Internal* i_state = &s_state;
408
409
0
  const lsh_u32* const_v = NULL;
410
0
  lsh_u32* cv_l = ctx->cv_l;
411
0
  lsh_u32* cv_r = ctx->cv_r;
412
413
0
  load_msg_blk(i_state, pdMsgBlk);
414
415
0
  msg_add_even(cv_l, cv_r, i_state);
416
0
  load_sc(&const_v, 0);
417
0
  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
418
0
  word_perm(cv_l, cv_r);
419
420
0
  msg_add_odd(cv_l, cv_r, i_state);
421
0
  load_sc(&const_v, 8);
422
0
  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
423
0
  word_perm(cv_l, cv_r);
424
425
0
  for (size_t i = 1; i < NUM_STEPS / 2; i++)
426
0
  {
427
0
    msg_exp_even(i_state);
428
0
    msg_add_even(cv_l, cv_r, i_state);
429
0
    load_sc(&const_v, 16 * i);
430
0
    mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
431
0
    word_perm(cv_l, cv_r);
432
433
0
    msg_exp_odd(i_state);
434
0
    msg_add_odd(cv_l, cv_r, i_state);
435
0
    load_sc(&const_v, 16 * i + 8);
436
0
    mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
437
0
    word_perm(cv_l, cv_r);
438
0
  }
439
440
0
  msg_exp_even(i_state);
441
0
  msg_add_even(cv_l, cv_r, i_state);
442
0
}
443
444
/* -------------------------------------------------------- */
445
446
inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
447
0
{
448
0
  _mm_storeu_si128(M128_CAST(cv_l+ 0),
449
0
    _mm_load_si128(CONST_M128_CAST(iv+ 0)));
450
0
  _mm_storeu_si128(M128_CAST(cv_l+ 4),
451
0
    _mm_load_si128(CONST_M128_CAST(iv+ 4)));
452
0
  _mm_storeu_si128(M128_CAST(cv_r+ 0),
453
0
    _mm_load_si128(CONST_M128_CAST(iv+ 8)));
454
0
  _mm_storeu_si128(M128_CAST(cv_r+ 4),
455
0
    _mm_load_si128(CONST_M128_CAST(iv+12)));
456
0
}
457
458
inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
459
0
{
460
0
  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
461
0
  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
462
0
  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
463
0
  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
464
0
}
465
466
inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
467
0
{
468
0
  lsh_u32* sub_msgs = ctx->sub_msgs;
469
470
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
471
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
472
0
  _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
473
0
  _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
474
0
  _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
475
0
  _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
476
0
  _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
477
0
  _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
478
0
}
479
480
inline void init224(LSH256_SSSE3_Context* ctx)
481
0
{
482
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
483
484
0
  zero_submsgs(ctx);
485
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
486
0
}
487
488
inline void init256(LSH256_SSSE3_Context* ctx)
489
0
{
490
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
491
492
0
  zero_submsgs(ctx);
493
0
  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
494
0
}
495
496
/* -------------------------------------------------------- */
497
498
inline void fin(LSH256_SSSE3_Context* ctx)
499
0
{
500
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
501
502
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
503
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
504
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
505
0
  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
506
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
507
0
    _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
508
0
}
509
510
/* -------------------------------------------------------- */
511
512
inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
513
0
{
514
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
515
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
516
0
  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
517
518
0
  lsh_uint alg_type = ctx->alg_type;
519
0
  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
520
0
  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
521
522
  // Multiplying by sizeof(lsh_u8) looks odd...
523
0
  std::memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
524
0
  if (hash_val_bit_len){
525
0
    pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
526
0
  }
527
0
}
528
529
/* -------------------------------------------------------- */
530
531
lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
532
0
{
533
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
534
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
535
536
0
  lsh_u32 alg_type = ctx->alg_type;
537
0
  const lsh_u32* const_v = NULL;
538
0
  ctx->remain_databitlen = 0;
539
540
0
  switch (alg_type)
541
0
  {
542
0
  case LSH_TYPE_256_256:
543
0
    init256(ctx);
544
0
    return LSH_SUCCESS;
545
0
  case LSH_TYPE_256_224:
546
0
    init224(ctx);
547
0
    return LSH_SUCCESS;
548
0
  default:
549
0
    break;
550
0
  }
551
552
0
  lsh_u32* cv_l = ctx->cv_l;
553
0
  lsh_u32* cv_r = ctx->cv_r;
554
555
0
  zero_iv(cv_l, cv_r);
556
0
  cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
557
0
  cv_l[1] = LSH_GET_HASHBIT(alg_type);
558
559
0
  for (size_t i = 0; i < NUM_STEPS / 2; i++)
560
0
  {
561
    //Mix
562
0
    load_sc(&const_v, i * 16);
563
0
    mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
564
0
    word_perm(cv_l, cv_r);
565
566
0
    load_sc(&const_v, i * 16 + 8);
567
0
    mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
568
0
    word_perm(cv_l, cv_r);
569
0
  }
570
571
0
  return LSH_SUCCESS;
572
0
}
573
574
lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
575
0
{
576
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
577
0
  CRYPTOPP_ASSERT(data != NULLPTR);
578
0
  CRYPTOPP_ASSERT(databitlen % 8 == 0);
579
0
  CRYPTOPP_ASSERT(ctx->alg_type != 0);
580
581
0
  if (databitlen == 0){
582
0
    return LSH_SUCCESS;
583
0
  }
584
585
  // We are byte oriented. tail bits will always be 0.
586
0
  size_t databytelen = databitlen >> 3;
587
  // lsh_uint pos2 = databitlen & 0x7;
588
0
  const size_t pos2 = 0;
589
590
0
  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
591
  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
592
0
  const size_t remain_msg_bit = 0;
593
594
0
  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
595
0
    return LSH_ERR_INVALID_STATE;
596
0
  }
597
0
  if (remain_msg_bit > 0){
598
0
    return LSH_ERR_INVALID_DATABITLEN;
599
0
  }
600
601
0
  if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
602
0
  {
603
0
    std::memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
604
0
    ctx->remain_databitlen += (lsh_uint)databitlen;
605
0
    remain_msg_byte += (lsh_uint)databytelen;
606
0
    if (pos2){
607
0
      ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
608
0
    }
609
0
    return LSH_SUCCESS;
610
0
  }
611
612
0
  if (remain_msg_byte > 0){
613
0
    size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
614
0
    std::memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
615
0
    compress(ctx, ctx->last_block);
616
0
    data += more_byte;
617
0
    databytelen -= more_byte;
618
0
    remain_msg_byte = 0;
619
0
    ctx->remain_databitlen = 0;
620
0
  }
621
622
0
  while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
623
0
  {
624
    // This call to compress caused some trouble.
625
    // The data pointer can become unaligned in the
626
    // previous block.
627
0
    compress(ctx, data);
628
0
    data += LSH256_MSG_BLK_BYTE_LEN;
629
0
    databytelen -= LSH256_MSG_BLK_BYTE_LEN;
630
0
  }
631
632
0
  if (databytelen > 0){
633
0
    std::memcpy(ctx->last_block, data, databytelen);
634
0
    ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
635
0
  }
636
637
0
  if (pos2){
638
0
    ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
639
0
    ctx->remain_databitlen += pos2;
640
0
  }
641
642
0
  return LSH_SUCCESS;
643
0
}
644
645
lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
646
0
{
647
0
  CRYPTOPP_ASSERT(ctx != NULLPTR);
648
0
  CRYPTOPP_ASSERT(hashval != NULLPTR);
649
650
  // We are byte oriented. tail bits will always be 0.
651
0
  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
652
  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
653
0
  const size_t remain_msg_bit = 0;
654
655
0
  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
656
0
    return LSH_ERR_INVALID_STATE;
657
0
  }
658
659
0
  if (remain_msg_bit){
660
0
    ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
661
0
  }
662
0
  else{
663
0
    ctx->last_block[remain_msg_byte] = 0x80;
664
0
  }
665
0
  std::memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
666
667
0
  compress(ctx, ctx->last_block);
668
669
0
  fin(ctx);
670
0
  get_hash(ctx, hashval);
671
672
0
  return LSH_SUCCESS;
673
0
}
674
675
ANONYMOUS_NAMESPACE_END  // Anonymous
676
677
NAMESPACE_BEGIN(CryptoPP)
678
679
extern
680
void LSH256_Base_Restart_SSSE3(word32* state)
681
0
{
682
0
  state[RemainingBits] = 0;
683
0
  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
684
0
  lsh_err err = lsh256_ssse3_init(&ctx);
685
686
0
  if (err != LSH_SUCCESS)
687
0
    throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
688
0
}
689
690
extern
691
void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
692
0
{
693
0
  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
694
0
  lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
695
696
0
  if (err != LSH_SUCCESS)
697
0
    throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
698
0
}
699
700
extern
701
void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
702
0
{
703
0
  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
704
0
  lsh_err err = lsh256_ssse3_final(&ctx, hash);
705
706
0
  if (err != LSH_SUCCESS)
707
0
    throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
708
0
}
709
710
NAMESPACE_END
711
712
#endif  // CRYPTOPP_SSSE3_AVAILABLE