Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/salsa.cpp
Line
Count
Source (jump to first uncovered line)
1
// salsa.cpp - originally written and placed in the public domain by Wei Dai
2
3
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM salsa.cpp" to generate MASM code
4
5
#include "pch.h"
6
#include "config.h"
7
8
#ifndef CRYPTOPP_GENERATE_X64_MASM
9
10
#include "salsa.h"
11
#include "argnames.h"
12
#include "misc.h"
13
#include "cpu.h"
14
15
#if CRYPTOPP_MSC_VERSION
16
# pragma warning(disable: 4702 4740)
17
#endif
18
19
// Clang due to "Inline assembly operands don't work with .intel_syntax"
20
//   https://llvm.org/bugs/show_bug.cgi?id=24232
21
#if defined(CRYPTOPP_DISABLE_SALSA_ASM)
22
# undef CRYPTOPP_X86_ASM_AVAILABLE
23
# undef CRYPTOPP_X32_ASM_AVAILABLE
24
# undef CRYPTOPP_X64_ASM_AVAILABLE
25
# undef CRYPTOPP_SSE2_ASM_AVAILABLE
26
# undef CRYPTOPP_SSSE3_ASM_AVAILABLE
27
#endif
28
29
ANONYMOUS_NAMESPACE_BEGIN
30
31
// Can't use GetAlignmentOf<word32>() because of C++11 and constexpr
32
// Can use 'const unsigned int' because of MSVC 2013
33
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
34
# define ALIGN_SPEC 16
35
#else
36
# define ALIGN_SPEC 4
37
#endif
38
39
ANONYMOUS_NAMESPACE_END
40
41
NAMESPACE_BEGIN(CryptoPP)
42
43
#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
44
void Salsa20_TestInstantiations()
45
{
46
  Salsa20::Encryption x1;
47
  XSalsa20::Encryption x2;
48
}
49
#endif
50
51
void Salsa20_Core(word32* data, unsigned int rounds)
52
77.0k
{
53
77.0k
  CRYPTOPP_ASSERT(data != NULLPTR);
54
77.0k
  CRYPTOPP_ASSERT(rounds % 2 == 0);
55
56
77.0k
  CRYPTOPP_ALIGN_DATA(ALIGN_SPEC) word32 x[16];
57
58
1.30M
  for (size_t i = 0; i < 16; ++i)
59
1.23M
    x[i] = data[i];
60
61
  // Rounds must be even
62
385k
  for (size_t i = 0; i < rounds; i += 2)
63
308k
  {
64
308k
    x[ 4] ^= rotlConstant< 7>(x[ 0]+x[12]);
65
308k
    x[ 8] ^= rotlConstant< 9>(x[ 4]+x[ 0]);
66
308k
    x[12] ^= rotlConstant<13>(x[ 8]+x[ 4]);
67
308k
    x[ 0] ^= rotlConstant<18>(x[12]+x[ 8]);
68
69
308k
    x[ 9] ^= rotlConstant< 7>(x[ 5]+x[ 1]);
70
308k
    x[13] ^= rotlConstant< 9>(x[ 9]+x[ 5]);
71
308k
    x[ 1] ^= rotlConstant<13>(x[13]+x[ 9]);
72
308k
    x[ 5] ^= rotlConstant<18>(x[ 1]+x[13]);
73
74
308k
    x[14] ^= rotlConstant< 7>(x[10]+x[ 6]);
75
308k
    x[ 2] ^= rotlConstant< 9>(x[14]+x[10]);
76
308k
    x[ 6] ^= rotlConstant<13>(x[ 2]+x[14]);
77
308k
    x[10] ^= rotlConstant<18>(x[ 6]+x[ 2]);
78
79
308k
    x[ 3] ^= rotlConstant< 7>(x[15]+x[11]);
80
308k
    x[ 7] ^= rotlConstant< 9>(x[ 3]+x[15]);
81
308k
    x[11] ^= rotlConstant<13>(x[ 7]+x[ 3]);
82
308k
    x[15] ^= rotlConstant<18>(x[11]+x[ 7]);
83
84
308k
    x[ 1] ^= rotlConstant< 7>(x[ 0]+x[ 3]);
85
308k
    x[ 2] ^= rotlConstant< 9>(x[ 1]+x[ 0]);
86
308k
    x[ 3] ^= rotlConstant<13>(x[ 2]+x[ 1]);
87
308k
    x[ 0] ^= rotlConstant<18>(x[ 3]+x[ 2]);
88
89
308k
    x[ 6] ^= rotlConstant< 7>(x[ 5]+x[ 4]);
90
308k
    x[ 7] ^= rotlConstant< 9>(x[ 6]+x[ 5]);
91
308k
    x[ 4] ^= rotlConstant<13>(x[ 7]+x[ 6]);
92
308k
    x[ 5] ^= rotlConstant<18>(x[ 4]+x[ 7]);
93
94
308k
    x[11] ^= rotlConstant< 7>(x[10]+x[ 9]);
95
308k
    x[ 8] ^= rotlConstant< 9>(x[11]+x[10]);
96
308k
    x[ 9] ^= rotlConstant<13>(x[ 8]+x[11]);
97
308k
    x[10] ^= rotlConstant<18>(x[ 9]+x[ 8]);
98
99
308k
    x[12] ^= rotlConstant< 7>(x[15]+x[14]);
100
308k
    x[13] ^= rotlConstant< 9>(x[12]+x[15]);
101
308k
    x[14] ^= rotlConstant<13>(x[13]+x[12]);
102
308k
    x[15] ^= rotlConstant<18>(x[14]+x[13]);
103
308k
  }
104
105
// OpenMP 4.0 released July 2013.
106
#if _OPENMP >= 201307
107
  #pragma omp simd
108
  for (size_t i = 0; i < 16; ++i)
109
    data[i] += x[i];
110
#else
111
1.30M
  for (size_t i = 0; i < 16; ++i)
112
1.23M
    data[i] += x[i];
113
77.0k
#endif
114
77.0k
}
115
116
std::string Salsa20_Policy::AlgorithmProvider() const
117
0
{
118
#if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SALSA_ASM)
119
  if (HasSSE2())
120
    return "SSE2";
121
#endif
122
0
  return "C++";
123
0
}
124
125
void Salsa20_Policy::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
126
0
{
127
  // Use previous rounds as the default value
128
0
  int rounds = params.GetIntValueWithDefault(Name::Rounds(), m_rounds);
129
0
  if (rounds != 20 && rounds != 12 && rounds != 8)
130
0
    throw InvalidRounds(Salsa20::StaticAlgorithmName(), rounds);
131
132
  // Latch a good value
133
0
  m_rounds = rounds;
134
135
  // m_state is reordered for SSE2
136
0
  GetBlock<word32, LittleEndian> get1(key);
137
0
  get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
138
0
  GetBlock<word32, LittleEndian> get2(key + length - 16);
139
0
  get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
140
141
  // "expand 16-byte k" or "expand 32-byte k"
142
0
  m_state[0] = 0x61707865;
143
0
  m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
144
0
  m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
145
0
  m_state[3] = 0x6b206574;
146
0
}
147
148
void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
149
0
{
150
0
  CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
151
0
  CRYPTOPP_ASSERT(length==8);
152
153
0
  GetBlock<word32, LittleEndian> get(IV);
154
0
  get(m_state[14])(m_state[11]);
155
0
  m_state[8] = m_state[5] = 0;
156
0
}
157
158
void Salsa20_Policy::SeekToIteration(lword iterationCount)
159
0
{
160
0
  m_state[8] = (word32)iterationCount;
161
0
  m_state[5] = (word32)SafeRightShift<32>(iterationCount);
162
0
}
163
164
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
165
unsigned int Salsa20_Policy::GetAlignment() const
166
0
{
167
#if CRYPTOPP_SSE2_ASM_AVAILABLE
168
  if (HasSSE2())
169
    return 16;
170
  else
171
#endif
172
0
    return GetAlignmentOf<word32>();
173
0
}
174
175
unsigned int Salsa20_Policy::GetOptimalBlockSize() const
176
0
{
177
#if CRYPTOPP_SSE2_ASM_AVAILABLE
178
  if (HasSSE2())
179
    return 4*BYTES_PER_ITERATION;
180
  else
181
#endif
182
0
    return BYTES_PER_ITERATION;
183
0
}
184
#endif
185
186
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
187
extern "C" {
188
void Salsa20_OperateKeystream(byte *output, const byte *input, size_t iterationCount, int rounds, void *state);
189
}
190
#endif
191
192
#if CRYPTOPP_MSC_VERSION
193
# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
194
#endif
195
196
void Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
197
0
{
198
0
#endif  // #ifdef CRYPTOPP_GENERATE_X64_MASM
199
200
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
201
  Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.data());
202
  return;
203
#endif
204
205
#if CRYPTOPP_SSE2_ASM_AVAILABLE
206
#ifdef CRYPTOPP_GENERATE_X64_MASM
207
    ALIGN   8
208
  Salsa20_OperateKeystream  PROC FRAME
209
    mov   r10, [rsp + 5*8]      ; state
210
    alloc_stack(10*16 + 32*16 + 8)
211
    save_xmm128 xmm6, 0200h
212
    save_xmm128 xmm7, 0210h
213
    save_xmm128 xmm8, 0220h
214
    save_xmm128 xmm9, 0230h
215
    save_xmm128 xmm10, 0240h
216
    save_xmm128 xmm11, 0250h
217
    save_xmm128 xmm12, 0260h
218
    save_xmm128 xmm13, 0270h
219
    save_xmm128 xmm14, 0280h
220
    save_xmm128 xmm15, 0290h
221
    .endprolog
222
223
  #define REG_output      rcx
224
  #define REG_input     rdx
225
  #define REG_iterationCount  r8
226
  #define REG_state     r10
227
  #define REG_rounds      e9d
228
  #define REG_roundsLeft    eax
229
  #define REG_temp32      r11d
230
  #define REG_temp      r11
231
  #define SSE2_WORKSPACE    rsp
232
#else
233
  if (HasSSE2())
234
  {
235
  #if CRYPTOPP_BOOL_X64
236
    #define REG_output      %1
237
    #define REG_input     %0
238
    #define REG_iterationCount  %2
239
    #define REG_state     %4    /* constant */
240
    #define REG_rounds      %3    /* constant */
241
    #define REG_roundsLeft    eax
242
    #define REG_temp32      edx
243
    #define REG_temp      rdx
244
    #define SSE2_WORKSPACE    %5    /* constant */
245
246
    CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
247
  #else
248
    #define REG_output      edi
249
    #define REG_input     eax
250
    #define REG_iterationCount  ecx
251
    #define REG_state     esi
252
    #define REG_rounds      edx
253
    #define REG_roundsLeft    ebx
254
    #define REG_temp32      ebp
255
    #define REG_temp      ebp
256
    #define SSE2_WORKSPACE    esp + WORD_SZ
257
  #endif
258
259
  #ifdef __GNUC__
260
    __asm__ __volatile__
261
    (
262
      INTEL_NOPREFIX
263
      AS_PUSH_IF86( bx)
264
  #else
265
    void *s = m_state.data();
266
    word32 r = m_rounds;
267
268
    AS2(  mov   REG_iterationCount, iterationCount)
269
    AS2(  mov   REG_input, input)
270
    AS2(  mov   REG_output, output)
271
    AS2(  mov   REG_state, s)
272
    AS2(  mov   REG_rounds, r)
273
  #endif
274
#endif  // #ifndef CRYPTOPP_GENERATE_X64_MASM
275
276
    AS_PUSH_IF86( bp)
277
    AS2(  cmp   REG_iterationCount, 4)
278
    ASJ(  jl,   5, f)
279
280
#if CRYPTOPP_BOOL_X86
281
    AS2(  mov   ebx, esp)
282
    AS2(  and   esp, -16)
283
    AS2(  sub   esp, 32*16)
284
    AS1(  push  ebx)
285
#endif
286
287
#define SSE2_EXPAND_S(i, j)   \
288
  ASS(  pshufd  xmm4, xmm##i, j, j, j, j) \
289
  AS2(  movdqa  [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4)
290
291
    AS2(  movdqa  xmm0, [REG_state + 0*16])
292
    AS2(  movdqa  xmm1, [REG_state + 1*16])
293
    AS2(  movdqa  xmm2, [REG_state + 2*16])
294
    AS2(  movdqa  xmm3, [REG_state + 3*16])
295
    SSE2_EXPAND_S(0, 0)
296
    SSE2_EXPAND_S(0, 1)
297
    SSE2_EXPAND_S(0, 2)
298
    SSE2_EXPAND_S(0, 3)
299
    SSE2_EXPAND_S(1, 0)
300
    SSE2_EXPAND_S(1, 2)
301
    SSE2_EXPAND_S(1, 3)
302
    SSE2_EXPAND_S(2, 1)
303
    SSE2_EXPAND_S(2, 2)
304
    SSE2_EXPAND_S(2, 3)
305
    SSE2_EXPAND_S(3, 0)
306
    SSE2_EXPAND_S(3, 1)
307
    SSE2_EXPAND_S(3, 2)
308
    SSE2_EXPAND_S(3, 3)
309
310
#define SSE2_EXPAND_S85(i)    \
311
    AS2(  mov   dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft)  \
312
    AS2(  mov   dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32)  \
313
    AS2(  add   REG_roundsLeft, 1)  \
314
    AS2(  adc   REG_temp32, 0)
315
316
    ASL(1)
317
    AS2(  mov   REG_roundsLeft, dword ptr [REG_state + 8*4])
318
    AS2(  mov   REG_temp32, dword ptr [REG_state + 5*4])
319
    SSE2_EXPAND_S85(0)
320
    SSE2_EXPAND_S85(1)
321
    SSE2_EXPAND_S85(2)
322
    SSE2_EXPAND_S85(3)
323
    AS2(  mov   dword ptr [REG_state + 8*4], REG_roundsLeft)
324
    AS2(  mov   dword ptr [REG_state + 5*4], REG_temp32)
325
326
#ifdef __XOP__
327
#define SSE2_QUARTER_ROUND(a, b, d, i)    \
328
  AS2(  movdqa  xmm4, xmm##d)     \
329
  AS2(  paddd xmm4, xmm##a)     \
330
  AS3(  vprotd  xmm4, xmm4, i)      \
331
  AS2(  pxor  xmm##b, xmm4)
332
#else
333
#define SSE2_QUARTER_ROUND(a, b, d, i)    \
334
  AS2(  movdqa  xmm4, xmm##d)     \
335
  AS2(  paddd xmm4, xmm##a)     \
336
  AS2(  movdqa  xmm5, xmm4)       \
337
  AS2(  pslld xmm4, i)        \
338
  AS2(  psrld xmm5, 32-i)       \
339
  AS2(  pxor  xmm##b, xmm4)     \
340
  AS2(  pxor  xmm##b, xmm5)
341
#endif
342
343
#define L01(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##A, [SSE2_WORKSPACE + d*16 + i*256])  /* y3 */
344
#define L02(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##C, [SSE2_WORKSPACE + a*16 + i*256])  /* y0 */
345
#define L03(A,B,C,D,a,b,c,d,i)    AS2(  paddd xmm##A, xmm##C)   /* y0+y3 */
346
347
#ifdef __XOP__
348
#define L04(A,B,C,D,a,b,c,d,i)
349
#define L05(A,B,C,D,a,b,c,d,i)    AS3(  vprotd  xmm##A, xmm##A, 7)
350
#define L06(A,B,C,D,a,b,c,d,i)
351
#define L07(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + b*16 + i*256])
352
#define L08(A,B,C,D,a,b,c,d,i)
353
#else
354
#define L04(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##B, xmm##A)
355
#define L05(A,B,C,D,a,b,c,d,i)    AS2(  pslld xmm##A, 7)
356
#define L06(A,B,C,D,a,b,c,d,i)    AS2(  psrld xmm##B, 32-7)
357
#define L07(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + b*16 + i*256])
358
#define L08(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##B)   /* z1 */
359
#endif
360
361
#define L09(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  [SSE2_WORKSPACE + b*16], xmm##A)
362
#define L10(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##B, xmm##A)
363
#define L11(A,B,C,D,a,b,c,d,i)    AS2(  paddd xmm##A, xmm##C)   /* z1+y0 */
364
365
#ifdef __XOP__
366
#define L12(A,B,C,D,a,b,c,d,i)
367
#define L13(A,B,C,D,a,b,c,d,i)    AS3(  vprotd  xmm##A, xmm##A, 9)
368
#define L14(A,B,C,D,a,b,c,d,i)
369
#define L15(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + c*16 + i*256])
370
#define L16(A,B,C,D,a,b,c,d,i)
371
#else
372
#define L12(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##D, xmm##A)
373
#define L13(A,B,C,D,a,b,c,d,i)    AS2(  pslld xmm##A, 9)
374
#define L14(A,B,C,D,a,b,c,d,i)    AS2(  psrld xmm##D, 32-9)
375
#define L15(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + c*16 + i*256])
376
#define L16(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##D)   /* z2 */
377
#endif
378
379
#define L17(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  [SSE2_WORKSPACE + c*16], xmm##A)
380
#define L18(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##D, xmm##A)
381
#define L19(A,B,C,D,a,b,c,d,i)    AS2(  paddd xmm##A, xmm##B)   /* z2+z1 */
382
383
#ifdef __XOP__
384
#define L20(A,B,C,D,a,b,c,d,i)
385
#define L21(A,B,C,D,a,b,c,d,i)    AS3(  vprotd  xmm##A, xmm##A, 13)
386
#define L22(A,B,C,D,a,b,c,d,i)
387
#define L23(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + d*16 + i*256])
388
#define L24(A,B,C,D,a,b,c,d,i)
389
#else
390
#define L20(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##B, xmm##A)
391
#define L21(A,B,C,D,a,b,c,d,i)    AS2(  pslld xmm##A, 13)
392
#define L22(A,B,C,D,a,b,c,d,i)    AS2(  psrld xmm##B, 32-13)
393
#define L23(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, [SSE2_WORKSPACE + d*16 + i*256])
394
#define L24(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##B)   /* z3 */
395
#endif
396
397
#define L25(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  [SSE2_WORKSPACE + d*16], xmm##A)
398
#define L26(A,B,C,D,a,b,c,d,i)    AS2(  paddd xmm##A, xmm##D)   /* z3+z2 */
399
400
#ifdef __XOP__
401
#define L27(A,B,C,D,a,b,c,d,i)
402
#define L28(A,B,C,D,a,b,c,d,i)    AS3(  vprotd  xmm##A, xmm##A, 18)
403
#define L29(A,B,C,D,a,b,c,d,i)
404
#define L30(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##C)   /* xor y0 */
405
#define L31(A,B,C,D,a,b,c,d,i)
406
#else
407
#define L27(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  xmm##D, xmm##A)
408
#define L28(A,B,C,D,a,b,c,d,i)    AS2(  pslld xmm##A, 18)
409
#define L29(A,B,C,D,a,b,c,d,i)    AS2(  psrld xmm##D, 32-18)
410
#define L30(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##C)   /* xor y0 */
411
#define L31(A,B,C,D,a,b,c,d,i)    AS2(  pxor  xmm##A, xmm##D)   /* z0 */
412
#endif
413
414
#define L32(A,B,C,D,a,b,c,d,i)    AS2(  movdqa  [SSE2_WORKSPACE + a*16], xmm##A)
415
416
#define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h)  \
417
  L01(0,1,2,3, a,b,c,d, i)  L01(4,5,6,7, e,f,g,h, i)  \
418
  L02(0,1,2,3, a,b,c,d, i)  L02(4,5,6,7, e,f,g,h, i)  \
419
  L03(0,1,2,3, a,b,c,d, i)  L03(4,5,6,7, e,f,g,h, i)  \
420
  L04(0,1,2,3, a,b,c,d, i)  L04(4,5,6,7, e,f,g,h, i)  \
421
  L05(0,1,2,3, a,b,c,d, i)  L05(4,5,6,7, e,f,g,h, i)  \
422
  L06(0,1,2,3, a,b,c,d, i)  L06(4,5,6,7, e,f,g,h, i)  \
423
  L07(0,1,2,3, a,b,c,d, i)  L07(4,5,6,7, e,f,g,h, i)  \
424
  L08(0,1,2,3, a,b,c,d, i)  L08(4,5,6,7, e,f,g,h, i)  \
425
  L09(0,1,2,3, a,b,c,d, i)  L09(4,5,6,7, e,f,g,h, i)  \
426
  L10(0,1,2,3, a,b,c,d, i)  L10(4,5,6,7, e,f,g,h, i)  \
427
  L11(0,1,2,3, a,b,c,d, i)  L11(4,5,6,7, e,f,g,h, i)  \
428
  L12(0,1,2,3, a,b,c,d, i)  L12(4,5,6,7, e,f,g,h, i)  \
429
  L13(0,1,2,3, a,b,c,d, i)  L13(4,5,6,7, e,f,g,h, i)  \
430
  L14(0,1,2,3, a,b,c,d, i)  L14(4,5,6,7, e,f,g,h, i)  \
431
  L15(0,1,2,3, a,b,c,d, i)  L15(4,5,6,7, e,f,g,h, i)  \
432
  L16(0,1,2,3, a,b,c,d, i)  L16(4,5,6,7, e,f,g,h, i)  \
433
  L17(0,1,2,3, a,b,c,d, i)  L17(4,5,6,7, e,f,g,h, i)  \
434
  L18(0,1,2,3, a,b,c,d, i)  L18(4,5,6,7, e,f,g,h, i)  \
435
  L19(0,1,2,3, a,b,c,d, i)  L19(4,5,6,7, e,f,g,h, i)  \
436
  L20(0,1,2,3, a,b,c,d, i)  L20(4,5,6,7, e,f,g,h, i)  \
437
  L21(0,1,2,3, a,b,c,d, i)  L21(4,5,6,7, e,f,g,h, i)  \
438
  L22(0,1,2,3, a,b,c,d, i)  L22(4,5,6,7, e,f,g,h, i)  \
439
  L23(0,1,2,3, a,b,c,d, i)  L23(4,5,6,7, e,f,g,h, i)  \
440
  L24(0,1,2,3, a,b,c,d, i)  L24(4,5,6,7, e,f,g,h, i)  \
441
  L25(0,1,2,3, a,b,c,d, i)  L25(4,5,6,7, e,f,g,h, i)  \
442
  L26(0,1,2,3, a,b,c,d, i)  L26(4,5,6,7, e,f,g,h, i)  \
443
  L27(0,1,2,3, a,b,c,d, i)  L27(4,5,6,7, e,f,g,h, i)  \
444
  L28(0,1,2,3, a,b,c,d, i)  L28(4,5,6,7, e,f,g,h, i)  \
445
  L29(0,1,2,3, a,b,c,d, i)  L29(4,5,6,7, e,f,g,h, i)  \
446
  L30(0,1,2,3, a,b,c,d, i)  L30(4,5,6,7, e,f,g,h, i)  \
447
  L31(0,1,2,3, a,b,c,d, i)  L31(4,5,6,7, e,f,g,h, i)  \
448
  L32(0,1,2,3, a,b,c,d, i)  L32(4,5,6,7, e,f,g,h, i)
449
450
#define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \
451
  L01(0,1,2,3, a,b,c,d, i)  L01(4,5,6,7, e,f,g,h, i)  L01(8,9,10,11, A,B,C,D, i)  L01(12,13,14,15, E,F,G,H, i)  \
452
  L02(0,1,2,3, a,b,c,d, i)  L02(4,5,6,7, e,f,g,h, i)  L02(8,9,10,11, A,B,C,D, i)  L02(12,13,14,15, E,F,G,H, i)  \
453
  L03(0,1,2,3, a,b,c,d, i)  L03(4,5,6,7, e,f,g,h, i)  L03(8,9,10,11, A,B,C,D, i)  L03(12,13,14,15, E,F,G,H, i)  \
454
  L04(0,1,2,3, a,b,c,d, i)  L04(4,5,6,7, e,f,g,h, i)  L04(8,9,10,11, A,B,C,D, i)  L04(12,13,14,15, E,F,G,H, i)  \
455
  L05(0,1,2,3, a,b,c,d, i)  L05(4,5,6,7, e,f,g,h, i)  L05(8,9,10,11, A,B,C,D, i)  L05(12,13,14,15, E,F,G,H, i)  \
456
  L06(0,1,2,3, a,b,c,d, i)  L06(4,5,6,7, e,f,g,h, i)  L06(8,9,10,11, A,B,C,D, i)  L06(12,13,14,15, E,F,G,H, i)  \
457
  L07(0,1,2,3, a,b,c,d, i)  L07(4,5,6,7, e,f,g,h, i)  L07(8,9,10,11, A,B,C,D, i)  L07(12,13,14,15, E,F,G,H, i)  \
458
  L08(0,1,2,3, a,b,c,d, i)  L08(4,5,6,7, e,f,g,h, i)  L08(8,9,10,11, A,B,C,D, i)  L08(12,13,14,15, E,F,G,H, i)  \
459
  L09(0,1,2,3, a,b,c,d, i)  L09(4,5,6,7, e,f,g,h, i)  L09(8,9,10,11, A,B,C,D, i)  L09(12,13,14,15, E,F,G,H, i)  \
460
  L10(0,1,2,3, a,b,c,d, i)  L10(4,5,6,7, e,f,g,h, i)  L10(8,9,10,11, A,B,C,D, i)  L10(12,13,14,15, E,F,G,H, i)  \
461
  L11(0,1,2,3, a,b,c,d, i)  L11(4,5,6,7, e,f,g,h, i)  L11(8,9,10,11, A,B,C,D, i)  L11(12,13,14,15, E,F,G,H, i)  \
462
  L12(0,1,2,3, a,b,c,d, i)  L12(4,5,6,7, e,f,g,h, i)  L12(8,9,10,11, A,B,C,D, i)  L12(12,13,14,15, E,F,G,H, i)  \
463
  L13(0,1,2,3, a,b,c,d, i)  L13(4,5,6,7, e,f,g,h, i)  L13(8,9,10,11, A,B,C,D, i)  L13(12,13,14,15, E,F,G,H, i)  \
464
  L14(0,1,2,3, a,b,c,d, i)  L14(4,5,6,7, e,f,g,h, i)  L14(8,9,10,11, A,B,C,D, i)  L14(12,13,14,15, E,F,G,H, i)  \
465
  L15(0,1,2,3, a,b,c,d, i)  L15(4,5,6,7, e,f,g,h, i)  L15(8,9,10,11, A,B,C,D, i)  L15(12,13,14,15, E,F,G,H, i)  \
466
  L16(0,1,2,3, a,b,c,d, i)  L16(4,5,6,7, e,f,g,h, i)  L16(8,9,10,11, A,B,C,D, i)  L16(12,13,14,15, E,F,G,H, i)  \
467
  L17(0,1,2,3, a,b,c,d, i)  L17(4,5,6,7, e,f,g,h, i)  L17(8,9,10,11, A,B,C,D, i)  L17(12,13,14,15, E,F,G,H, i)  \
468
  L18(0,1,2,3, a,b,c,d, i)  L18(4,5,6,7, e,f,g,h, i)  L18(8,9,10,11, A,B,C,D, i)  L18(12,13,14,15, E,F,G,H, i)  \
469
  L19(0,1,2,3, a,b,c,d, i)  L19(4,5,6,7, e,f,g,h, i)  L19(8,9,10,11, A,B,C,D, i)  L19(12,13,14,15, E,F,G,H, i)  \
470
  L20(0,1,2,3, a,b,c,d, i)  L20(4,5,6,7, e,f,g,h, i)  L20(8,9,10,11, A,B,C,D, i)  L20(12,13,14,15, E,F,G,H, i)  \
471
  L21(0,1,2,3, a,b,c,d, i)  L21(4,5,6,7, e,f,g,h, i)  L21(8,9,10,11, A,B,C,D, i)  L21(12,13,14,15, E,F,G,H, i)  \
472
  L22(0,1,2,3, a,b,c,d, i)  L22(4,5,6,7, e,f,g,h, i)  L22(8,9,10,11, A,B,C,D, i)  L22(12,13,14,15, E,F,G,H, i)  \
473
  L23(0,1,2,3, a,b,c,d, i)  L23(4,5,6,7, e,f,g,h, i)  L23(8,9,10,11, A,B,C,D, i)  L23(12,13,14,15, E,F,G,H, i)  \
474
  L24(0,1,2,3, a,b,c,d, i)  L24(4,5,6,7, e,f,g,h, i)  L24(8,9,10,11, A,B,C,D, i)  L24(12,13,14,15, E,F,G,H, i)  \
475
  L25(0,1,2,3, a,b,c,d, i)  L25(4,5,6,7, e,f,g,h, i)  L25(8,9,10,11, A,B,C,D, i)  L25(12,13,14,15, E,F,G,H, i)  \
476
  L26(0,1,2,3, a,b,c,d, i)  L26(4,5,6,7, e,f,g,h, i)  L26(8,9,10,11, A,B,C,D, i)  L26(12,13,14,15, E,F,G,H, i)  \
477
  L27(0,1,2,3, a,b,c,d, i)  L27(4,5,6,7, e,f,g,h, i)  L27(8,9,10,11, A,B,C,D, i)  L27(12,13,14,15, E,F,G,H, i)  \
478
  L28(0,1,2,3, a,b,c,d, i)  L28(4,5,6,7, e,f,g,h, i)  L28(8,9,10,11, A,B,C,D, i)  L28(12,13,14,15, E,F,G,H, i)  \
479
  L29(0,1,2,3, a,b,c,d, i)  L29(4,5,6,7, e,f,g,h, i)  L29(8,9,10,11, A,B,C,D, i)  L29(12,13,14,15, E,F,G,H, i)  \
480
  L30(0,1,2,3, a,b,c,d, i)  L30(4,5,6,7, e,f,g,h, i)  L30(8,9,10,11, A,B,C,D, i)  L30(12,13,14,15, E,F,G,H, i)  \
481
  L31(0,1,2,3, a,b,c,d, i)  L31(4,5,6,7, e,f,g,h, i)  L31(8,9,10,11, A,B,C,D, i)  L31(12,13,14,15, E,F,G,H, i)  \
482
  L32(0,1,2,3, a,b,c,d, i)  L32(4,5,6,7, e,f,g,h, i)  L32(8,9,10,11, A,B,C,D, i)  L32(12,13,14,15, E,F,G,H, i)
483
484
#if CRYPTOPP_BOOL_X64
485
    SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
486
#else
487
    SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
488
    SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
489
#endif
490
    AS2(  mov   REG_roundsLeft, REG_rounds)
491
    ASJ(  jmp,  2, f)
492
493
    ASL(SSE2_Salsa_Output)
494
    AS2(  movdqa    xmm0, xmm4)
495
    AS2(  punpckldq xmm4, xmm5)
496
    AS2(  movdqa    xmm1, xmm6)
497
    AS2(  punpckldq xmm6, xmm7)
498
    AS2(  movdqa    xmm2, xmm4)
499
    AS2(  punpcklqdq  xmm4, xmm6) // e
500
    AS2(  punpckhqdq  xmm2, xmm6) // f
501
    AS2(  punpckhdq xmm0, xmm5)
502
    AS2(  punpckhdq xmm1, xmm7)
503
    AS2(  movdqa    xmm6, xmm0)
504
    AS2(  punpcklqdq  xmm0, xmm1) // g
505
    AS2(  punpckhqdq  xmm6, xmm1) // h
506
    AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
507
    AS1(  ret)
508
509
    ASL(6)
510
#if CRYPTOPP_BOOL_X64
511
    SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
512
    ASL(2)
513
    SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
514
#else
515
    SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
516
    SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
517
    ASL(2)
518
    SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
519
    SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
520
#endif
521
    AS2(  sub   REG_roundsLeft, 2)
522
    ASJ(  jnz,  6, b)
523
524
#define SSE2_OUTPUT_4(a, b, c, d) \
525
  AS2(  movdqa    xmm4, [SSE2_WORKSPACE + a*16 + 256])\
526
  AS2(  paddd   xmm4, [SSE2_WORKSPACE + a*16])\
527
  AS2(  movdqa    xmm5, [SSE2_WORKSPACE + b*16 + 256])\
528
  AS2(  paddd   xmm5, [SSE2_WORKSPACE + b*16])\
529
  AS2(  movdqa    xmm6, [SSE2_WORKSPACE + c*16 + 256])\
530
  AS2(  paddd   xmm6, [SSE2_WORKSPACE + c*16])\
531
  AS2(  movdqa    xmm7, [SSE2_WORKSPACE + d*16 + 256])\
532
  AS2(  paddd   xmm7, [SSE2_WORKSPACE + d*16])\
533
  ASC(  call,   SSE2_Salsa_Output)
534
535
    SSE2_OUTPUT_4(0, 13, 10, 7)
536
    SSE2_OUTPUT_4(4, 1, 14, 11)
537
    SSE2_OUTPUT_4(8, 5, 2, 15)
538
    SSE2_OUTPUT_4(12, 9, 6, 3)
539
    AS2(  test  REG_input, REG_input)
540
    ASJ(  jz,   9, f)
541
    AS2(  add   REG_input, 12*16)
542
    ASL(9)
543
    AS2(  add   REG_output, 12*16)
544
    AS2(  sub   REG_iterationCount, 4)
545
    AS2(  cmp   REG_iterationCount, 4)
546
    ASJ(  jge,  1, b)
547
    AS_POP_IF86(  sp)
548
549
    ASL(5)
550
    AS2(  sub   REG_iterationCount, 1)
551
    ASJ(  jl,   4, f)
552
    AS2(  movdqa  xmm0, [REG_state + 0*16])
553
    AS2(  movdqa  xmm1, [REG_state + 1*16])
554
    AS2(  movdqa  xmm2, [REG_state + 2*16])
555
    AS2(  movdqa  xmm3, [REG_state + 3*16])
556
    AS2(  mov   REG_roundsLeft, REG_rounds)
557
558
    ASL(0)
559
    SSE2_QUARTER_ROUND(0, 1, 3, 7)
560
    SSE2_QUARTER_ROUND(1, 2, 0, 9)
561
    SSE2_QUARTER_ROUND(2, 3, 1, 13)
562
    SSE2_QUARTER_ROUND(3, 0, 2, 18)
563
    ASS(  pshufd  xmm1, xmm1, 2, 1, 0, 3)
564
    ASS(  pshufd  xmm2, xmm2, 1, 0, 3, 2)
565
    ASS(  pshufd  xmm3, xmm3, 0, 3, 2, 1)
566
    SSE2_QUARTER_ROUND(0, 3, 1, 7)
567
    SSE2_QUARTER_ROUND(3, 2, 0, 9)
568
    SSE2_QUARTER_ROUND(2, 1, 3, 13)
569
    SSE2_QUARTER_ROUND(1, 0, 2, 18)
570
    ASS(  pshufd  xmm1, xmm1, 0, 3, 2, 1)
571
    ASS(  pshufd  xmm2, xmm2, 1, 0, 3, 2)
572
    ASS(  pshufd  xmm3, xmm3, 2, 1, 0, 3)
573
    AS2(  sub   REG_roundsLeft, 2)
574
    ASJ(  jnz,  0, b)
575
576
    AS2(  paddd xmm0, [REG_state + 0*16])
577
    AS2(  paddd xmm1, [REG_state + 1*16])
578
    AS2(  paddd xmm2, [REG_state + 2*16])
579
    AS2(  paddd xmm3, [REG_state + 3*16])
580
581
    AS2(  add   dword ptr [REG_state + 8*4], 1)
582
    AS2(  adc   dword ptr [REG_state + 5*4], 0)
583
584
    AS2(  pcmpeqb xmm6, xmm6)     // all ones
585
    AS2(  psrlq xmm6, 32)     // lo32 mask
586
    ASS(  pshufd  xmm7, xmm6, 0, 1, 2, 3)   // hi32 mask
587
    AS2(  movdqa  xmm4, xmm0)
588
    AS2(  movdqa  xmm5, xmm3)
589
    AS2(  pand  xmm0, xmm7)
590
    AS2(  pand  xmm4, xmm6)
591
    AS2(  pand  xmm3, xmm6)
592
    AS2(  pand  xmm5, xmm7)
593
    AS2(  por   xmm4, xmm5)     // 0,13,2,15
594
    AS2(  movdqa  xmm5, xmm1)
595
    AS2(  pand  xmm1, xmm7)
596
    AS2(  pand  xmm5, xmm6)
597
    AS2(  por   xmm0, xmm5)     // 4,1,6,3
598
    AS2(  pand  xmm6, xmm2)
599
    AS2(  pand  xmm2, xmm7)
600
    AS2(  por   xmm1, xmm6)     // 8,5,10,7
601
    AS2(  por   xmm2, xmm3)     // 12,9,14,11
602
603
    AS2(  movdqa  xmm5, xmm4)
604
    AS2(  movdqa  xmm6, xmm0)
605
    AS3(  shufpd  xmm4, xmm1, 2)    // 0,13,10,7
606
    AS3(  shufpd  xmm0, xmm2, 2)    // 4,1,14,11
607
    AS3(  shufpd  xmm1, xmm5, 2)    // 8,5,2,15
608
    AS3(  shufpd  xmm2, xmm6, 2)    // 12,9,6,3
609
610
    // output keystream
611
    AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
612
    ASJ(  jmp,  5, b)
613
    ASL(4)
614
615
    AS_POP_IF86(  bp)
616
#ifdef __GNUC__
617
    AS_POP_IF86(  bx)
618
    ATT_PREFIX
619
  #if CRYPTOPP_BOOL_X64
620
      : "+r" (input), "+r" (output), "+r" (iterationCount)
621
      : "r" (m_rounds), "r" (m_state.begin()), "r" (workspace)
622
      : "%eax", "%rdx", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15"
623
  #else
624
      : "+a" (input), "+D" (output), "+c" (iterationCount)
625
      : "d" (m_rounds), "S" (m_state.begin())
626
      : "memory", "cc"
627
  #endif
628
    );
629
#endif
630
#ifdef CRYPTOPP_GENERATE_X64_MASM
631
  movdqa  xmm6, [rsp + 0200h]
632
  movdqa  xmm7, [rsp + 0210h]
633
  movdqa  xmm8, [rsp + 0220h]
634
  movdqa  xmm9, [rsp + 0230h]
635
  movdqa  xmm10, [rsp + 0240h]
636
  movdqa  xmm11, [rsp + 0250h]
637
  movdqa  xmm12, [rsp + 0260h]
638
  movdqa  xmm13, [rsp + 0270h]
639
  movdqa  xmm14, [rsp + 0280h]
640
  movdqa  xmm15, [rsp + 0290h]
641
  add   rsp, 10*16 + 32*16 + 8
642
  ret
643
Salsa20_OperateKeystream ENDP
644
#else
645
  }
646
  else
647
#endif
648
#endif
649
0
#ifndef CRYPTOPP_GENERATE_X64_MASM
650
0
  {
651
0
    word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
652
653
0
    while (iterationCount--)
654
0
    {
655
0
      x0 = m_state[0];  x1 = m_state[1];  x2 = m_state[2];  x3 = m_state[3];
656
0
      x4 = m_state[4];  x5 = m_state[5];  x6 = m_state[6];  x7 = m_state[7];
657
0
      x8 = m_state[8];  x9 = m_state[9];  x10 = m_state[10];  x11 = m_state[11];
658
0
      x12 = m_state[12];  x13 = m_state[13];  x14 = m_state[14];  x15 = m_state[15];
659
660
0
      for (int i=m_rounds; i>0; i-=2)
661
0
      {
662
0
        #define QUARTER_ROUND(a, b, c, d) \
663
0
          b = b ^ rotlConstant<7>(a + d); \
664
0
          c = c ^ rotlConstant<9>(b + a); \
665
0
          d = d ^ rotlConstant<13>(c + b);  \
666
0
          a = a ^ rotlConstant<18>(d + c);
667
668
0
        QUARTER_ROUND(x0, x4, x8, x12)
669
0
        QUARTER_ROUND(x1, x5, x9, x13)
670
0
        QUARTER_ROUND(x2, x6, x10, x14)
671
0
        QUARTER_ROUND(x3, x7, x11, x15)
672
673
0
        QUARTER_ROUND(x0, x13, x10, x7)
674
0
        QUARTER_ROUND(x1, x14, x11, x4)
675
0
        QUARTER_ROUND(x2, x15, x8, x5)
676
0
        QUARTER_ROUND(x3, x12, x9, x6)
677
0
      }
678
679
0
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
680
0
      #define SALSA_OUTPUT(x) {\
681
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
682
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\
683
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\
684
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\
685
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
686
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\
687
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\
688
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\
689
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
690
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\
691
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\
692
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\
693
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
694
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\
695
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\
696
0
        CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}
697
698
0
      CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION);
699
0
      #undef SALSA_OUTPUT
700
0
#endif
701
702
0
      if (++m_state[8] == 0)
703
0
        ++m_state[5];
704
0
    }
705
0
  }
706
0
}  // see comment above if an internal compiler error occurs here
707
708
void XSalsa20_Policy::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
709
0
{
710
0
  m_rounds = params.GetIntValueWithDefault(Name::Rounds(), m_rounds);
711
0
  if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
712
0
    throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
713
714
0
  GetUserKey(LITTLE_ENDIAN_ORDER, m_key.begin(), m_key.size(), key, length);
715
0
  if (length == 16)
716
0
    std::memcpy(m_key.begin()+4, m_key.begin(), 16);
717
718
  // "expand 32-byte k"
719
0
  m_state[0] = 0x61707865;
720
0
  m_state[1] = 0x3320646e;
721
0
  m_state[2] = 0x79622d32;
722
0
  m_state[3] = 0x6b206574;
723
0
}
724
725
void XSalsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
726
0
{
727
0
  CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
728
0
  CRYPTOPP_ASSERT(length==24);
729
730
0
  word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
731
732
0
  GetBlock<word32, LittleEndian> get(IV);
733
0
  get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
734
735
0
  x13 = m_key[0];   x10 = m_key[1];   x7 = m_key[2];    x4 = m_key[3];
736
0
  x15 = m_key[4];   x12 = m_key[5];   x9 = m_key[6];    x6 = m_key[7];
737
0
  x0 = m_state[0];  x1 = m_state[1];  x2 = m_state[2];  x3 = m_state[3];
738
739
0
  for (int i=m_rounds; i>0; i-=2)
740
0
  {
741
0
    QUARTER_ROUND(x0, x4, x8, x12)
742
0
    QUARTER_ROUND(x1, x5, x9, x13)
743
0
    QUARTER_ROUND(x2, x6, x10, x14)
744
0
    QUARTER_ROUND(x3, x7, x11, x15)
745
746
0
    QUARTER_ROUND(x0, x13, x10, x7)
747
0
    QUARTER_ROUND(x1, x14, x11, x4)
748
0
    QUARTER_ROUND(x2, x15, x8, x5)
749
0
    QUARTER_ROUND(x3, x12, x9, x6)
750
0
  }
751
752
0
  m_state[13] = x0; m_state[10] = x1; m_state[7] = x2;  m_state[4] = x3;
753
0
  m_state[15] = x14;  m_state[12] = x11;  m_state[9] = x8;  m_state[6] = x5;
754
0
  m_state[8] = m_state[5] = 0;
755
0
}
756
757
NAMESPACE_END
758
759
#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM