/src/botan/src/lib/pbkdf/argon2/argon2_avx2/argon2_avx2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * (C) 2023 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/argon2.h> |
8 | | |
9 | | #include <botan/compiler.h> |
10 | | #include <botan/internal/isa_extn.h> |
11 | | #include <botan/internal/simd_4x64.h> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | namespace { |
16 | | |
17 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2 void blamka_G(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) { |
18 | 0 | A += B + SIMD_4x64::mul2_32(A, B); |
19 | 0 | D ^= A; |
20 | 0 | D = D.rotr<32>(); |
21 | |
|
22 | 0 | C += D + SIMD_4x64::mul2_32(C, D); |
23 | 0 | B ^= C; |
24 | 0 | B = B.rotr<24>(); |
25 | |
|
26 | 0 | A += B + SIMD_4x64::mul2_32(A, B); |
27 | 0 | D ^= A; |
28 | 0 | D = D.rotr<16>(); |
29 | |
|
30 | 0 | C += D + SIMD_4x64::mul2_32(C, D); |
31 | 0 | B ^= C; |
32 | 0 | B = B.rotr<63>(); |
33 | 0 | } |
34 | | |
35 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2 void blamka_R(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) { |
36 | 0 | blamka_G(A, B, C, D); |
37 | |
|
38 | 0 | SIMD_4x64::twist(B, C, D); |
39 | 0 | blamka_G(A, B, C, D); |
40 | 0 | SIMD_4x64::untwist(B, C, D); |
41 | 0 | } |
42 | | |
43 | | } // namespace |
44 | | |
45 | 0 | BOTAN_FN_ISA_AVX2 void Argon2::blamka_avx2(uint64_t N[128], uint64_t T[128]) { |
46 | 0 | for(size_t i = 0; i != 8; ++i) { |
47 | 0 | SIMD_4x64 A = SIMD_4x64::load_le(&N[16 * i + 4 * 0]); |
48 | 0 | SIMD_4x64 B = SIMD_4x64::load_le(&N[16 * i + 4 * 1]); |
49 | 0 | SIMD_4x64 C = SIMD_4x64::load_le(&N[16 * i + 4 * 2]); |
50 | 0 | SIMD_4x64 D = SIMD_4x64::load_le(&N[16 * i + 4 * 3]); |
51 | |
|
52 | 0 | blamka_R(A, B, C, D); |
53 | |
|
54 | 0 | A.store_le(&T[16 * i + 4 * 0]); |
55 | 0 | B.store_le(&T[16 * i + 4 * 1]); |
56 | 0 | C.store_le(&T[16 * i + 4 * 2]); |
57 | 0 | D.store_le(&T[16 * i + 4 * 3]); |
58 | 0 | } |
59 | |
|
60 | 0 | for(size_t i = 0; i != 8; ++i) { |
61 | 0 | SIMD_4x64 A = SIMD_4x64::load_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]); |
62 | 0 | SIMD_4x64 B = SIMD_4x64::load_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]); |
63 | 0 | SIMD_4x64 C = SIMD_4x64::load_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]); |
64 | 0 | SIMD_4x64 D = SIMD_4x64::load_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]); |
65 | |
|
66 | 0 | blamka_R(A, B, C, D); |
67 | |
|
68 | 0 | A.store_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]); |
69 | 0 | B.store_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]); |
70 | 0 | C.store_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]); |
71 | 0 | D.store_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]); |
72 | 0 | } |
73 | |
|
74 | 0 | for(size_t i = 0; i != 128 / 8; ++i) { |
75 | 0 | SIMD_4x64 n0 = SIMD_4x64::load_le(&N[8 * i]); |
76 | 0 | SIMD_4x64 n1 = SIMD_4x64::load_le(&N[8 * i + 4]); |
77 | 0 | SIMD_4x64 t0 = SIMD_4x64::load_le(&T[8 * i]); |
78 | 0 | SIMD_4x64 t1 = SIMD_4x64::load_le(&T[8 * i + 4]); |
79 | |
|
80 | 0 | n0 ^= t0; |
81 | 0 | n1 ^= t1; |
82 | 0 | n0.store_le(&N[8 * i]); |
83 | 0 | n1.store_le(&N[8 * i + 4]); |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | } // namespace Botan |