Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/lea_simd.cpp
Line
Count
Source (jump to first uncovered line)
1
// lea_simd.cpp - written and placed in the public domain by Jeffrey Walton
2
//
3
//    This source file uses intrinsics and built-ins to gain access to
4
//    SSSE3, ARM NEON and ARMv8a, and Power8 Altivec instructions. A separate
5
//    source file is needed because additional CXXFLAGS are required to enable
6
//    the appropriate instructions sets in some build configurations.
7
8
#include "pch.h"
9
#include "config.h"
10
11
#include "lea.h"
12
#include "misc.h"
13
14
// Uncomment for benchmarking C++ against SSE or NEON.
15
// Do so in both simon.cpp and simon_simd.cpp.
16
// #undef CRYPTOPP_SSSE3_AVAILABLE
17
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
18
19
#if (CRYPTOPP_SSSE3_AVAILABLE)
20
# include "adv_simd.h"
21
# include <pmmintrin.h>
22
# include <tmmintrin.h>
23
#endif
24
25
#if defined(__XOP__)
26
# if defined(CRYPTOPP_GCC_COMPATIBLE)
27
#  include <x86intrin.h>
28
# endif
29
# include <ammintrin.h>
30
#endif  // XOP
31
32
#if (CRYPTOPP_ARM_NEON_HEADER)
33
# include "adv_simd.h"
34
# include <arm_neon.h>
35
#endif
36
37
#if (CRYPTOPP_ARM_ACLE_HEADER)
38
# include <stdint.h>
39
# include <arm_acle.h>
40
#endif
41
42
#if defined(_M_ARM64)
43
# include "adv_simd.h"
44
#endif
45
46
// Do not port this to POWER architecture. Naively we hoped
47
// for a 2x to 3x speedup. The result was a 5x slow down.
48
// The table below shows MiB/s and cpb.
49
//
50
// C++:
51
// <TD>LEA-128(128)/CTR (128-bit key)<TD>C++<TD>207<TD>15.64
52
// <TD>LEA-128(192)/CTR (192-bit key)<TD>C++<TD>186<TD>17.48
53
// <TD>LEA-128(256)/CTR (256-bit key)<TD>C++<TD>124<TD>26.2
54
//
55
// Power8:
56
// <TD>LEA-128(128)/CTR (128-bit key)<TD>Power8<TD>37<TD>88.7
57
// <TD>LEA-128(192)/CTR (192-bit key)<TD>Power8<TD>40<TD>82.1
58
// <TD>LEA-128(256)/CTR (256-bit key)<TD>Power8<TD>28<TD>116.0
59
60
#undef CRYPTOPP_POWER8_AVAILABLE
61
#if defined(CRYPTOPP_POWER8_AVAILABLE)
62
# include "adv_simd.h"
63
# include "ppc_simd.h"
64
#endif
65
66
// Squash MS LNK4221 and libtool warnings
67
extern const char LEA_SIMD_FNAME[] = __FILE__;
68
69
ANONYMOUS_NAMESPACE_BEGIN
70
71
using CryptoPP::word32;
72
73
// *************************** ARM NEON ***************************//
74
75
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
76
77
inline uint32x4_t Xor(const uint32x4_t& a, const uint32x4_t& b)
78
{
79
    return veorq_u32(a, b);
80
}
81
82
inline uint32x4_t Add(const uint32x4_t& a, const uint32x4_t& b)
83
{
84
    return vaddq_u32(a, b);
85
}
86
87
inline uint32x4_t Sub(const uint32x4_t& a, const uint32x4_t& b)
88
{
89
    return vsubq_u32(a, b);
90
}
91
92
template <unsigned int R>
93
inline uint32x4_t RotateLeft(const uint32x4_t& val)
94
{
95
    const uint32x4_t a(vshlq_n_u32(val, R));
96
    const uint32x4_t b(vshrq_n_u32(val, 32 - R));
97
    return vorrq_u32(a, b);
98
}
99
100
template <unsigned int R>
101
inline uint32x4_t RotateRight(const uint32x4_t& val)
102
{
103
    const uint32x4_t a(vshlq_n_u32(val, 32 - R));
104
    const uint32x4_t b(vshrq_n_u32(val, R));
105
    return vorrq_u32(a, b);
106
}
107
108
#if defined(__aarch32__) || defined(__aarch64__)
109
template <>
110
inline uint32x4_t RotateLeft<8>(const uint32x4_t& val)
111
{
112
#if (CRYPTOPP_BIG_ENDIAN)
113
    const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
114
    const uint8x16_t mask = vld1q_u8(maskb);
115
#else
116
    const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
117
    const uint8x16_t mask = vld1q_u8(maskb);
118
#endif
119
120
    return vreinterpretq_u32_u8(
121
        vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
122
}
123
124
template <>
125
inline uint32x4_t RotateRight<8>(const uint32x4_t& val)
126
{
127
#if (CRYPTOPP_BIG_ENDIAN)
128
    const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
129
    const uint8x16_t mask = vld1q_u8(maskb);
130
#else
131
    const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
132
    const uint8x16_t mask = vld1q_u8(maskb);
133
#endif
134
135
    return vreinterpretq_u32_u8(
136
        vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
137
}
138
#endif
139
140
uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
141
{
142
    uint32x2_t a1 = vget_low_u32(a);
143
    uint32x2_t b1 = vget_low_u32(b);
144
    uint32x2x2_t result = vzip_u32(a1, b1);
145
    return vcombine_u32(result.val[0], result.val[1]);
146
}
147
148
uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
149
{
150
    uint32x2_t a1 = vget_high_u32(a);
151
    uint32x2_t b1 = vget_high_u32(b);
152
    uint32x2x2_t result = vzip_u32(a1, b1);
153
    return vcombine_u32(result.val[0], result.val[1]);
154
}
155
156
uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
157
{
158
    uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
159
    uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
160
    return (uint32x4_t)vcombine_u64(a1, b1);
161
}
162
163
uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
164
{
165
    uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
166
    uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
167
    return (uint32x4_t)vcombine_u64(a1, b1);
168
}
169
170
template <unsigned int IDX>
171
inline uint32x4_t LoadKey(const word32 rkey[])
172
{
173
    return vdupq_n_u32(rkey[IDX]);
174
}
175
176
template <unsigned int IDX>
177
inline uint32x4_t UnpackNEON(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
178
{
179
    // Should not be instantiated
180
    CRYPTOPP_ASSERT(0);
181
182
    CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
183
    CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
184
    return vmovq_n_u32(0);
185
}
186
187
template <>
188
inline uint32x4_t UnpackNEON<0>(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
189
{
190
    const uint32x4_t r1 = UnpackLow32(a, b);
191
    const uint32x4_t r2 = UnpackLow32(c, d);
192
    return UnpackLow64(r1, r2);
193
}
194
195
template <>
196
inline uint32x4_t UnpackNEON<1>(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
197
{
198
    const uint32x4_t r1 = UnpackLow32(a, b);
199
    const uint32x4_t r2 = UnpackLow32(c, d);
200
    return UnpackHigh64(r1, r2);
201
}
202
203
template <>
204
inline uint32x4_t UnpackNEON<2>(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
205
{
206
    const uint32x4_t r1 = UnpackHigh32(a, b);
207
    const uint32x4_t r2 = UnpackHigh32(c, d);
208
    return UnpackLow64(r1, r2);
209
}
210
211
template <>
212
inline uint32x4_t UnpackNEON<3>(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
213
{
214
    const uint32x4_t r1 = UnpackHigh32(a, b);
215
    const uint32x4_t r2 = UnpackHigh32(c, d);
216
    return UnpackHigh64(r1, r2);
217
}
218
219
template <unsigned int IDX>
220
inline uint32x4_t UnpackNEON(const uint32x4_t& v)
221
{
222
    // Should not be instantiated
223
    CRYPTOPP_ASSERT(0);
224
225
    CRYPTOPP_UNUSED(v);
226
    return vmovq_n_u32(0);
227
}
228
229
template <>
230
inline uint32x4_t UnpackNEON<0>(const uint32x4_t& v)
231
{
232
    // Splat to all lanes
233
    return vdupq_n_u32(vgetq_lane_u32(v, 0));
234
}
235
236
template <>
237
inline uint32x4_t UnpackNEON<1>(const uint32x4_t& v)
238
{
239
    // Splat to all lanes
240
    return vdupq_n_u32(vgetq_lane_u32(v, 1));
241
}
242
243
template <>
244
inline uint32x4_t UnpackNEON<2>(const uint32x4_t& v)
245
{
246
    // Splat to all lanes
247
    return vdupq_n_u32(vgetq_lane_u32(v, 2));
248
}
249
250
template <>
251
inline uint32x4_t UnpackNEON<3>(const uint32x4_t& v)
252
{
253
    // Splat to all lanes
254
    return vdupq_n_u32(vgetq_lane_u32(v, 3));
255
}
256
257
template <unsigned int IDX>
258
inline uint32x4_t RepackNEON(const uint32x4_t& a, const uint32x4_t& b, const uint32x4_t& c, const uint32x4_t& d)
259
{
260
    return UnpackNEON<IDX>(a, b, c, d);
261
}
262
263
template <unsigned int IDX>
264
inline uint32x4_t RepackNEON(const uint32x4_t& v)
265
{
266
    return UnpackNEON<IDX>(v);
267
}
268
269
#endif  // CRYPTOPP_ARM_NEON_AVAILABLE
270
271
// *************************** IA-32 ***************************//
272
273
#if (CRYPTOPP_SSSE3_AVAILABLE)
274
275
inline __m128i Xor(const __m128i& a, const __m128i& b)
276
2.08k
{
277
2.08k
    return _mm_xor_si128(a, b);
278
2.08k
}
279
280
inline __m128i Add(const __m128i& a, const __m128i& b)
281
960
{
282
960
    return _mm_add_epi32(a, b);
283
960
}
284
285
inline __m128i Sub(const __m128i& a, const __m128i& b)
286
84
{
287
84
    return _mm_sub_epi32(a, b);
288
84
}
289
290
template <unsigned int R>
291
inline __m128i RotateLeft(const __m128i& val)
292
376
{
293
#if defined(__XOP__)
294
    return _mm_roti_epi32(val, R);
295
#else
296
376
    return _mm_or_si128(
297
376
        _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
298
376
#endif
299
376
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateLeft<9u>(long long __vector(2) const&)
Line
Count
Source
292
320
{
293
#if defined(__XOP__)
294
    return _mm_roti_epi32(val, R);
295
#else
296
320
    return _mm_or_si128(
297
320
        _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
298
320
#endif
299
320
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateLeft<5u>(long long __vector(2) const&)
Line
Count
Source
292
28
{
293
#if defined(__XOP__)
294
    return _mm_roti_epi32(val, R);
295
#else
296
28
    return _mm_or_si128(
297
28
        _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
298
28
#endif
299
28
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateLeft<3u>(long long __vector(2) const&)
Line
Count
Source
292
28
{
293
#if defined(__XOP__)
294
    return _mm_roti_epi32(val, R);
295
#else
296
28
    return _mm_or_si128(
297
28
        _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
298
28
#endif
299
28
}
300
301
template <unsigned int R>
302
inline __m128i RotateRight(const __m128i& val)
303
668
{
304
#if defined(__XOP__)
305
    return _mm_roti_epi32(val, 32-R);
306
#else
307
668
    return _mm_or_si128(
308
668
        _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
309
668
#endif
310
668
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateRight<3u>(long long __vector(2) const&)
Line
Count
Source
303
320
{
304
#if defined(__XOP__)
305
    return _mm_roti_epi32(val, 32-R);
306
#else
307
320
    return _mm_or_si128(
308
320
        _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
309
320
#endif
310
320
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateRight<5u>(long long __vector(2) const&)
Line
Count
Source
303
320
{
304
#if defined(__XOP__)
305
    return _mm_roti_epi32(val, 32-R);
306
#else
307
320
    return _mm_or_si128(
308
320
        _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
309
320
#endif
310
320
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RotateRight<9u>(long long __vector(2) const&)
Line
Count
Source
303
28
{
304
#if defined(__XOP__)
305
    return _mm_roti_epi32(val, 32-R);
306
#else
307
28
    return _mm_or_si128(
308
28
        _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
309
28
#endif
310
28
}
311
312
// Faster than two Shifts and an Or.
313
template <>
314
inline __m128i RotateLeft<8>(const __m128i& val)
315
0
{
316
0
#if defined(__XOP__)
317
0
    return _mm_roti_epi32(val, 8);
318
0
#else
319
0
    const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
320
0
    return _mm_shuffle_epi8(val, mask);
321
0
#endif
322
0
}
323
324
// Faster than two Shifts and an Or.
325
template <>
326
inline __m128i RotateRight<8>(const __m128i& val)
327
0
{
328
0
#if defined(__XOP__)
329
0
    return _mm_roti_epi32(val, 32-8);
330
0
#else
331
0
    const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
332
0
    return _mm_shuffle_epi8(val, mask);
333
0
#endif
334
0
}
335
336
template <unsigned int IDX>
337
inline __m128i LoadKey(const word32 rkey[])
338
2.08k
{
339
2.08k
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
2.08k
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
2.08k
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<4u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<5u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<2u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<3u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<0u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<1u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<10u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<11u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<8u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<9u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<6u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<7u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<16u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<17u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<14u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<15u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<12u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<13u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<22u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<23u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<20u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<21u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<18u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<19u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<28u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<29u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<26u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<27u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<24u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<25u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<34u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<35u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<32u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<33u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<30u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<31u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<40u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<41u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<38u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<39u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<36u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<37u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<46u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<47u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<44u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<45u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<42u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<43u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<52u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<53u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<50u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<51u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<48u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<49u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<58u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<59u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<56u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<57u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<54u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<55u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<64u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<65u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<62u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<63u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<60u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<61u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<70u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<71u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<68u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<69u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<66u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<67u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<76u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<77u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<74u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<75u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<72u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<73u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<82u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<83u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<80u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<81u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<78u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<79u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<88u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<89u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<86u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<87u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<84u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<85u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<94u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<95u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<92u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<93u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<90u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<91u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<100u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<101u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<98u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<99u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<96u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<97u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<106u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<107u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<104u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<105u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<102u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<103u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<112u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<113u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<110u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<111u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<108u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<109u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<118u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<119u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<116u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<117u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<114u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<115u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<124u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<125u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<122u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<123u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<120u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<121u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<130u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<131u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<128u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<129u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<126u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<127u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<136u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<137u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<134u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<135u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<132u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<133u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<142u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<143u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<140u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<141u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<138u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<139u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<148u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<149u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<146u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<147u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<144u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<145u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<154u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<155u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<152u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<153u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<150u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<151u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<160u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<161u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<158u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<159u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<156u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<157u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<166u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<167u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<164u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<165u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<162u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<163u>(unsigned int const*)
Line
Count
Source
338
11
{
339
11
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
11
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<172u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<173u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<170u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<171u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<168u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<169u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<178u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<179u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<176u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<177u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<174u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<175u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<184u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<185u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<182u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<183u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<180u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<181u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<190u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<191u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<188u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<189u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<186u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::LoadKey<187u>(unsigned int const*)
Line
Count
Source
338
10
{
339
10
    float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
340
10
    return _mm_castps_si128(_mm_load_ps1(&rk));
341
10
}
342
343
template <unsigned int IDX>
344
inline __m128i UnpackXMM(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
345
{
346
    // Should not be instantiated
347
    CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
348
    CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
349
    CRYPTOPP_ASSERT(0);
350
    return _mm_setzero_si128();
351
}
352
353
template <>
354
inline __m128i UnpackXMM<0>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
355
19
{
356
    // LEA is little-endian oriented, so there is no need for a separate shuffle.
357
19
    const __m128i r1 = _mm_unpacklo_epi32(a, b);
358
19
    const __m128i r2 = _mm_unpacklo_epi32(c, d);
359
19
    return _mm_unpacklo_epi64(r1, r2);
360
19
}
361
362
template <>
363
inline __m128i UnpackXMM<1>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
364
16
{
365
    // LEA is little-endian oriented, so there is no need for a separate shuffle.
366
16
    const __m128i r1 = _mm_unpacklo_epi32(a, b);
367
16
    const __m128i r2 = _mm_unpacklo_epi32(c, d);
368
16
    return _mm_unpackhi_epi64(r1, r2);
369
16
}
370
371
template <>
372
inline __m128i UnpackXMM<2>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
373
16
{
374
    // LEA is little-endian oriented, so there is no need for a separate shuffle.
375
16
    const __m128i r1 = _mm_unpackhi_epi32(a, b);
376
16
    const __m128i r2 = _mm_unpackhi_epi32(c, d);
377
16
    return _mm_unpacklo_epi64(r1, r2);
378
16
}
379
380
template <>
381
inline __m128i UnpackXMM<3>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
382
16
{
383
    // LEA is little-endian oriented, so there is no need for a separate shuffle.
384
16
    const __m128i r1 = _mm_unpackhi_epi32(a, b);
385
16
    const __m128i r2 = _mm_unpackhi_epi32(c, d);
386
16
    return _mm_unpackhi_epi64(r1, r2);
387
16
}
388
389
template <unsigned int IDX>
390
inline __m128i UnpackXMM(const __m128i& v)
391
{
392
    // Should not be instantiated
393
    CRYPTOPP_UNUSED(v); CRYPTOPP_ASSERT(0);
394
    return _mm_setzero_si128();
395
}
396
397
template <>
398
inline __m128i UnpackXMM<0>(const __m128i& v)
399
3
{
400
    // Splat to all lanes
401
3
    return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
402
3
}
403
404
template <>
405
inline __m128i UnpackXMM<1>(const __m128i& v)
406
3
{
407
    // Splat to all lanes
408
3
    return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
409
3
}
410
411
template <>
412
inline __m128i UnpackXMM<2>(const __m128i& v)
413
3
{
414
    // Splat to all lanes
415
3
    return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
416
3
}
417
418
template <>
419
inline __m128i UnpackXMM<3>(const __m128i& v)
420
3
{
421
    // Splat to all lanes
422
3
    return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
423
3
}
424
425
template <unsigned int IDX>
426
inline __m128i RepackXMM(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
427
35
{
428
35
    return UnpackXMM<IDX>(a, b, c, d);
429
35
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RepackXMM<0u>(long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&)
Line
Count
Source
427
11
{
428
11
    return UnpackXMM<IDX>(a, b, c, d);
429
11
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RepackXMM<1u>(long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&)
Line
Count
Source
427
8
{
428
8
    return UnpackXMM<IDX>(a, b, c, d);
429
8
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RepackXMM<2u>(long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&)
Line
Count
Source
427
8
{
428
8
    return UnpackXMM<IDX>(a, b, c, d);
429
8
}
lea_simd.cpp:long long __vector(2) (anonymous namespace)::RepackXMM<3u>(long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&, long long __vector(2) const&)
Line
Count
Source
427
8
{
428
8
    return UnpackXMM<IDX>(a, b, c, d);
429
8
}
430
431
template <unsigned int IDX>
432
inline __m128i RepackXMM(const __m128i& v)
433
{
434
    return UnpackXMM<IDX>(v);
435
}
436
437
#endif  // CRYPTOPP_SSSE3_AVAILABLE
438
439
// *************************** Power8 ***************************//
440
441
#if (CRYPTOPP_POWER8_AVAILABLE)
442
443
using CryptoPP::uint8x16_p;
444
using CryptoPP::uint32x4_p;
445
using CryptoPP::uint64x2_p;
446
447
inline uint32x4_p Xor(const uint32x4_p& a, const uint32x4_p& b)
448
{
449
    return VecXor(a, b);
450
}
451
452
inline uint32x4_p Add(const uint32x4_p& a, const uint32x4_p& b)
453
{
454
    return VecAdd(a, b);
455
}
456
457
inline uint32x4_p Sub(const uint32x4_p& a, const uint32x4_p& b)
458
{
459
    return VecSub(a, b);
460
}
461
462
template <unsigned int R>
463
inline uint32x4_p RotateLeft(const uint32x4_p& val)
464
{
465
    const uint32x4_p m = {R, R, R, R};
466
    return vec_rl(val, m);
467
}
468
469
template <unsigned int R>
470
inline uint32x4_p RotateRight(const uint32x4_p& val)
471
{
472
    const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
473
    return vec_rl(val, m);
474
}
475
476
template <unsigned int IDX>
477
inline uint32x4_p LoadKey(const word32 rkey[])
478
{
479
    return vec_splats(rkey[IDX]);
480
}
481
482
template <unsigned int IDX>
483
inline uint32x4_p UnpackSIMD(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
484
{
485
    // Should not be instantiated
486
    CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
487
    CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
488
    CRYPTOPP_ASSERT(0);
489
    return VecXor(a, a);
490
}
491
492
template <>
493
inline uint32x4_p UnpackSIMD<0>(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
494
{
495
    const uint64x2_p r1 = (uint64x2_p)vec_mergel(a, b);
496
    const uint64x2_p r2 = (uint64x2_p)vec_mergel(c, d);
497
    return (uint32x4_p)vec_mergel(r1, r2);
498
}
499
500
template <>
501
inline uint32x4_p UnpackSIMD<1>(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
502
{
503
    const uint64x2_p r1 = (uint64x2_p)vec_mergel(a, b);
504
    const uint64x2_p r2 = (uint64x2_p)vec_mergel(c, d);
505
    return (uint32x4_p)vec_mergeh(r1, r2);
506
}
507
508
template <>
509
inline uint32x4_p UnpackSIMD<2>(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
510
{
511
    const uint64x2_p r1 = (uint64x2_p)vec_mergeh(a, b);
512
    const uint64x2_p r2 = (uint64x2_p)vec_mergeh(c, d);
513
    return (uint32x4_p)vec_mergel(r1, r2);
514
}
515
516
template <>
517
inline uint32x4_p UnpackSIMD<3>(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
518
{
519
    const uint64x2_p r1 = (uint64x2_p)vec_mergeh(a, b);
520
    const uint64x2_p r2 = (uint64x2_p)vec_mergeh(c, d);
521
    return (uint32x4_p)vec_mergeh(r1, r2);
522
}
523
524
template <unsigned int IDX>
525
inline uint32x4_p UnpackSIMD(const uint32x4_p& v)
526
{
527
    // Should not be instantiated
528
    CRYPTOPP_ASSERT(0);
529
    return VecXor(v, v);
530
}
531
532
template <>
533
inline uint32x4_p UnpackSIMD<0>(const uint32x4_p& v)
534
{
535
    // Splat to all lanes
536
    const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
537
    return (uint32x4_p)VecPermute(v, v, m);
538
}
539
540
template <>
541
inline uint32x4_p UnpackSIMD<1>(const uint32x4_p& v)
542
{
543
    // Splat to all lanes
544
    const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
545
    return (uint32x4_p)VecPermute(v, v, m);
546
}
547
548
template <>
549
inline uint32x4_p UnpackSIMD<2>(const uint32x4_p& v)
550
{
551
    // Splat to all lanes
552
    const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
553
    return (uint32x4_p)VecPermute(v, v, m);
554
}
555
556
template <>
557
inline uint32x4_p UnpackSIMD<3>(const uint32x4_p& v)
558
{
559
    // Splat to all lanes
560
    const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
561
    return (uint32x4_p)VecPermute(v, v, m);
562
}
563
564
template <unsigned int IDX>
565
inline uint32x4_p RepackSIMD(const uint32x4_p& a, const uint32x4_p& b, const uint32x4_p& c, const uint32x4_p& d)
566
{
567
    return UnpackSIMD<IDX>(a, b, c, d);
568
}
569
570
template <unsigned int IDX>
571
inline uint32x4_p RepackSIMD(const uint32x4_p& v)
572
{
573
    return UnpackSIMD<IDX>(v);
574
}
575
576
#endif  // CRYPTOPP_POWER8_AVAILABLE
577
578
// *************************** LEA Encryption ***************************//
579
580
#if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE)
581
582
template <class W>
583
inline void LEA_Encryption(W temp[4], const word32 *subkeys, unsigned int rounds)
584
10
{
585
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
586
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
587
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
588
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
589
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
590
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
591
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
592
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
593
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
594
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
595
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
596
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
597
598
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
599
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
600
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
601
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
602
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
603
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
604
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
605
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
606
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
607
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
608
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
609
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
610
611
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
612
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
613
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
614
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
615
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
616
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
617
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
618
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
619
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
620
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
621
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
622
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
623
624
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
625
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
626
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
627
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
628
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
629
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
630
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
631
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
632
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
633
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
634
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
635
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
636
637
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
638
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
639
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
640
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
641
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
642
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
643
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
644
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
645
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
646
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
647
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
648
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
649
650
10
    temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
651
10
    temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
652
10
    temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
653
10
    temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
654
10
    temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
655
10
    temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
656
10
    temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
657
10
    temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
658
10
    temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
659
10
    temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
660
10
    temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
661
10
    temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
662
663
10
    if(rounds > 24)
664
10
    {
665
10
        temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
666
10
        temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
667
10
        temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
668
10
        temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
669
10
        temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
670
10
        temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
671
10
        temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
672
10
        temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
673
10
        temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
674
10
        temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
675
10
        temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
676
10
        temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
677
10
    }
678
679
10
    if(rounds > 28)
680
10
    {
681
10
        temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
682
10
        temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
683
10
        temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
684
10
        temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
685
10
        temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
686
10
        temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
687
10
        temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
688
10
        temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
689
10
        temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
690
10
        temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
691
10
        temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
692
10
        temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
693
10
    }
694
10
}
695
696
// *************************** LEA Decryption ***************************//
697
698
template <class W>
699
inline void LEA_Decryption(W temp[4], const word32 *subkeys, unsigned int rounds)
700
1
{
701
1
    if(rounds > 28)
702
0
    {
703
0
        temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
704
0
        temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
705
0
        temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
706
0
        temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
707
0
        temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
708
0
        temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
709
0
        temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
710
0
        temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
711
0
        temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
712
0
        temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
713
0
        temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
714
0
        temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
715
0
    }
716
717
1
    if(rounds > 24)
718
1
    {
719
1
        temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
720
1
        temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
721
1
        temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
722
1
        temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
723
1
        temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
724
1
        temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
725
1
        temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
726
1
        temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
727
1
        temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
728
1
        temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
729
1
        temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
730
1
        temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
731
1
    }
732
733
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
734
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
735
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
736
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
737
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
738
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
739
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
740
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
741
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
742
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
743
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
744
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
745
746
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
747
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
748
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
749
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
750
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
751
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
752
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
753
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
754
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
755
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
756
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
757
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
758
759
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
760
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
761
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
762
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
763
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
764
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
765
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
766
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
767
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
768
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
769
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
770
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
771
772
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
773
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
774
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
775
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
776
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
777
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
778
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
779
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
780
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
781
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
782
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
783
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
784
785
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
786
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
787
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
788
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
789
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
790
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
791
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
792
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
793
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
794
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
795
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
796
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
797
798
1
    temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
799
1
    temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
800
1
    temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
801
1
    temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
802
1
    temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
803
1
    temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
804
1
    temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
805
1
    temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
806
1
    temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
807
1
    temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
808
1
    temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
809
1
    temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
810
1
}
811
812
#endif  // LEA Encryption and Decryption
813
814
// *************************** ARM NEON ***************************//
815
816
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
817
818
inline void LEA_Enc_Block(uint32x4_t &block0,
819
    const word32 *subkeys, unsigned int rounds)
820
{
821
    uint32x4_t temp[4];
822
    temp[0] = UnpackNEON<0>(block0);
823
    temp[1] = UnpackNEON<1>(block0);
824
    temp[2] = UnpackNEON<2>(block0);
825
    temp[3] = UnpackNEON<3>(block0);
826
827
    LEA_Encryption(temp, subkeys, rounds);
828
829
    block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
830
}
831
832
inline void LEA_Dec_Block(uint32x4_t &block0,
833
    const word32 *subkeys, unsigned int rounds)
834
{
835
    uint32x4_t temp[4];
836
    temp[0] = UnpackNEON<0>(block0);
837
    temp[1] = UnpackNEON<1>(block0);
838
    temp[2] = UnpackNEON<2>(block0);
839
    temp[3] = UnpackNEON<3>(block0);
840
841
    LEA_Decryption(temp, subkeys, rounds);
842
843
    block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
844
}
845
846
inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
847
    uint32x4_t &block2, uint32x4_t &block3, const word32 *subkeys, unsigned int rounds)
848
{
849
    uint32x4_t temp[4];
850
    temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
851
    temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
852
    temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
853
    temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
854
855
    LEA_Encryption(temp, subkeys, rounds);
856
857
    block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
858
    block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
859
    block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
860
    block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
861
}
862
863
inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
864
    uint32x4_t &block2, uint32x4_t &block3, const word32 *subkeys, unsigned int rounds)
865
{
866
    uint32x4_t temp[4];
867
    temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
868
    temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
869
    temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
870
    temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
871
872
    LEA_Decryption(temp, subkeys, rounds);
873
874
    block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
875
    block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
876
    block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
877
    block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
878
}
879
880
#endif  // CRYPTOPP_ARM_NEON_AVAILABLE
881
882
// *************************** IA-32 ***************************//
883
884
#if (CRYPTOPP_SSSE3_AVAILABLE)
885
886
inline void LEA_Enc_Block(__m128i &block0,
887
    const word32 *subkeys, unsigned int rounds)
888
2
{
889
2
    __m128i temp[4];
890
2
    temp[0] = UnpackXMM<0>(block0);
891
2
    temp[1] = UnpackXMM<1>(block0);
892
2
    temp[2] = UnpackXMM<2>(block0);
893
2
    temp[3] = UnpackXMM<3>(block0);
894
895
2
    LEA_Encryption(temp, subkeys, rounds);
896
897
2
    block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
898
2
}
899
900
inline void LEA_Dec_Block(__m128i &block0,
901
    const word32 *subkeys, unsigned int rounds)
902
1
{
903
1
    __m128i temp[4];
904
1
    temp[0] = UnpackXMM<0>(block0);
905
1
    temp[1] = UnpackXMM<1>(block0);
906
1
    temp[2] = UnpackXMM<2>(block0);
907
1
    temp[3] = UnpackXMM<3>(block0);
908
909
1
    LEA_Decryption(temp, subkeys, rounds);
910
911
1
    block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
912
1
}
913
914
inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
915
    __m128i &block2, __m128i &block3, const word32 *subkeys, unsigned int rounds)
916
8
{
917
8
    __m128i temp[4];
918
8
    temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
919
8
    temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
920
8
    temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
921
8
    temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
922
923
8
    LEA_Encryption(temp, subkeys, rounds);
924
925
8
    block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
926
8
    block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
927
8
    block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
928
8
    block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
929
8
}
930
931
inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
932
    __m128i &block2, __m128i &block3, const word32 *subkeys, unsigned int rounds)
933
0
{
934
0
    __m128i temp[4];
935
0
    temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
936
0
    temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
937
0
    temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
938
0
    temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
939
940
0
    LEA_Decryption(temp, subkeys, rounds);
941
942
0
    block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
943
0
    block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
944
0
    block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
945
0
    block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
946
0
}
947
948
#endif  // CRYPTOPP_SSSE3_AVAILABLE
949
950
// *************************** Power8 ***************************//
951
952
#if (CRYPTOPP_POWER8_AVAILABLE)
953
954
inline void LEA_Enc_Block(uint32x4_p &block0,
955
    const word32 *subkeys, unsigned int rounds)
956
{
957
    uint32x4_p temp[4];
958
    temp[0] = UnpackSIMD<0>(block0);
959
    temp[1] = UnpackSIMD<1>(block0);
960
    temp[2] = UnpackSIMD<2>(block0);
961
    temp[3] = UnpackSIMD<3>(block0);
962
963
    LEA_Encryption(temp, subkeys, rounds);
964
965
    block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
966
}
967
968
inline void LEA_Dec_Block(uint32x4_p &block0,
969
    const word32 *subkeys, unsigned int rounds)
970
{
971
    uint32x4_p temp[4];
972
    temp[0] = UnpackSIMD<0>(block0);
973
    temp[1] = UnpackSIMD<1>(block0);
974
    temp[2] = UnpackSIMD<2>(block0);
975
    temp[3] = UnpackSIMD<3>(block0);
976
977
    LEA_Decryption(temp, subkeys, rounds);
978
979
    block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
980
}
981
982
inline void LEA_Enc_4_Blocks(uint32x4_p &block0, uint32x4_p &block1,
983
    uint32x4_p &block2, uint32x4_p &block3, const word32 *subkeys, unsigned int rounds)
984
{
985
    uint32x4_p temp[4];
986
    temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
987
    temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
988
    temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
989
    temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
990
991
    LEA_Encryption(temp, subkeys, rounds);
992
993
    block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
994
    block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
995
    block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
996
    block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
997
}
998
999
inline void LEA_Dec_4_Blocks(uint32x4_p &block0, uint32x4_p &block1,
1000
    uint32x4_p &block2, uint32x4_p &block3, const word32 *subkeys, unsigned int rounds)
1001
{
1002
    uint32x4_p temp[4];
1003
    temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
1004
    temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
1005
    temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
1006
    temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1007
1008
    LEA_Decryption(temp, subkeys, rounds);
1009
1010
    block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1011
    block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1012
    block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1013
    block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1014
}
1015
1016
#endif  // CRYPTOPP_POWER8_AVAILABLE
1017
1018
ANONYMOUS_NAMESPACE_END
1019
1020
// *************************** SIMD Templates ***************************//
1021
1022
NAMESPACE_BEGIN(CryptoPP)
1023
1024
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
1025
size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
1026
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1027
4
{
1028
4
    return AdvancedProcessBlocks128_4x1_SSE(LEA_Enc_Block, LEA_Enc_4_Blocks,
1029
4
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1030
4
}
1031
1032
size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
1033
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1034
1
{
1035
1
    return AdvancedProcessBlocks128_4x1_SSE(LEA_Dec_Block, LEA_Dec_4_Blocks,
1036
1
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1037
1
}
1038
#endif // CRYPTOPP_SSSE3_AVAILABLE
1039
1040
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
1041
size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
1042
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1043
{
1044
    return AdvancedProcessBlocks128_4x1_NEON(LEA_Enc_Block, LEA_Enc_4_Blocks,
1045
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1046
}
1047
1048
size_t LEA_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
1049
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1050
{
1051
    return AdvancedProcessBlocks128_4x1_NEON(LEA_Dec_Block, LEA_Dec_4_Blocks,
1052
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1053
}
1054
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
1055
1056
#if defined(CRYPTOPP_POWER8_AVAILABLE)
1057
size_t LEA_Enc_AdvancedProcessBlocks_POWER8(const word32* subKeys, size_t rounds,
1058
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1059
{
1060
    return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Enc_Block, LEA_Enc_4_Blocks,
1061
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1062
}
1063
1064
size_t LEA_Dec_AdvancedProcessBlocks_POWER8(const word32* subKeys, size_t rounds,
1065
    const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
1066
{
1067
    return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Dec_Block, LEA_Dec_4_Blocks,
1068
        subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1069
}
1070
#endif // CRYPTOPP_POWER8_AVAILABLE
1071
1072
NAMESPACE_END