/src/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2018 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/shacal2.h> |
8 | | #include <botan/internal/simd_avx2.h> |
9 | | |
10 | | namespace Botan { |
11 | | |
12 | | namespace { |
13 | | |
14 | | void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") |
15 | | SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, |
16 | | const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, |
17 | | uint32_t RK) |
18 | 0 | { |
19 | 0 | H += E.sigma1() + SIMD_8x32::choose(E, F, G) + SIMD_8x32::splat(RK); |
20 | 0 | D += H; |
21 | 0 | H += A.sigma0() + SIMD_8x32::majority(A, B, C); |
22 | 0 | } |
23 | | |
24 | | void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") |
25 | | SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, |
26 | | const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, |
27 | | uint32_t RK) |
28 | 0 | { |
29 | 0 | H -= A.sigma0() + SIMD_8x32::majority(A, B, C); |
30 | 0 | D -= H; |
31 | 0 | H -= E.sigma1() + SIMD_8x32::choose(E, F, G) + SIMD_8x32::splat(RK); |
32 | 0 | } |
33 | | |
34 | | } |
35 | | |
36 | | void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const |
37 | 0 | { |
38 | 0 | SIMD_8x32::reset_registers(); |
39 | |
|
40 | 0 | SIMD_8x32 A = SIMD_8x32::load_be(in); |
41 | 0 | SIMD_8x32 B = SIMD_8x32::load_be(in+32); |
42 | 0 | SIMD_8x32 C = SIMD_8x32::load_be(in+64); |
43 | 0 | SIMD_8x32 D = SIMD_8x32::load_be(in+96); |
44 | |
|
45 | 0 | SIMD_8x32 E = SIMD_8x32::load_be(in+128); |
46 | 0 | SIMD_8x32 F = SIMD_8x32::load_be(in+160); |
47 | 0 | SIMD_8x32 G = SIMD_8x32::load_be(in+192); |
48 | 0 | SIMD_8x32 H = SIMD_8x32::load_be(in+224); |
49 | |
|
50 | 0 | SIMD_8x32::transpose(A, B, C, D, E, F, G, H); |
51 | |
|
52 | 0 | for(size_t r = 0; r != 64; r += 8) |
53 | 0 | { |
54 | 0 | SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); |
55 | 0 | SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); |
56 | 0 | SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); |
57 | 0 | SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); |
58 | 0 | SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); |
59 | 0 | SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); |
60 | 0 | SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); |
61 | 0 | SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); |
62 | 0 | } |
63 | |
|
64 | 0 | SIMD_8x32::transpose(A, B, C, D, E, F, G, H); |
65 | |
|
66 | 0 | A.store_be(out); |
67 | 0 | B.store_be(out+32); |
68 | 0 | C.store_be(out+64); |
69 | 0 | D.store_be(out+96); |
70 | |
|
71 | 0 | E.store_be(out+128); |
72 | 0 | F.store_be(out+160); |
73 | 0 | G.store_be(out+192); |
74 | 0 | H.store_be(out+224); |
75 | |
|
76 | 0 | SIMD_8x32::zero_registers(); |
77 | 0 | } |
78 | | |
79 | | BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const |
80 | 0 | { |
81 | 0 | SIMD_8x32::reset_registers(); |
82 | |
|
83 | 0 | SIMD_8x32 A = SIMD_8x32::load_be(in); |
84 | 0 | SIMD_8x32 B = SIMD_8x32::load_be(in+32); |
85 | 0 | SIMD_8x32 C = SIMD_8x32::load_be(in+64); |
86 | 0 | SIMD_8x32 D = SIMD_8x32::load_be(in+96); |
87 | |
|
88 | 0 | SIMD_8x32 E = SIMD_8x32::load_be(in+128); |
89 | 0 | SIMD_8x32 F = SIMD_8x32::load_be(in+160); |
90 | 0 | SIMD_8x32 G = SIMD_8x32::load_be(in+192); |
91 | 0 | SIMD_8x32 H = SIMD_8x32::load_be(in+224); |
92 | |
|
93 | 0 | SIMD_8x32::transpose(A, B, C, D, E, F, G, H); |
94 | |
|
95 | 0 | for(size_t r = 0; r != 64; r += 8) |
96 | 0 | { |
97 | 0 | SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); |
98 | 0 | SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); |
99 | 0 | SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); |
100 | 0 | SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); |
101 | 0 | SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); |
102 | 0 | SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); |
103 | 0 | SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); |
104 | 0 | SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); |
105 | 0 | } |
106 | |
|
107 | 0 | SIMD_8x32::transpose(A, B, C, D, E, F, G, H); |
108 | |
|
109 | 0 | A.store_be(out); |
110 | 0 | B.store_be(out+32); |
111 | 0 | C.store_be(out+64); |
112 | 0 | D.store_be(out+96); |
113 | |
|
114 | 0 | E.store_be(out+128); |
115 | 0 | F.store_be(out+160); |
116 | 0 | G.store_be(out+192); |
117 | 0 | H.store_be(out+224); |
118 | |
|
119 | 0 | SIMD_8x32::zero_registers(); |
120 | 0 | } |
121 | | |
122 | | } |