/src/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * SHACAL-2 using SIMD |
3 | | * (C) 2017 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/shacal2.h> |
9 | | #include <botan/internal/simd_32.h> |
10 | | |
11 | | namespace Botan { |
12 | | |
13 | | namespace { |
14 | | |
15 | | inline |
16 | | void SHACAL2_Fwd(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, |
17 | | const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, |
18 | | uint32_t RK) |
19 | 0 | { |
20 | 0 | H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); |
21 | 0 | D += H; |
22 | 0 | H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); |
23 | 0 | } |
24 | | |
25 | | inline |
26 | | void SHACAL2_Rev(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, |
27 | | const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, |
28 | | uint32_t RK) |
29 | 0 | { |
30 | 0 | H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); |
31 | 0 | D -= H; |
32 | 0 | H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); |
33 | 0 | } |
34 | | |
35 | | } |
36 | | |
37 | | void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const |
38 | 0 | { |
39 | 0 | SIMD_4x32 A = SIMD_4x32::load_be(in); |
40 | 0 | SIMD_4x32 E = SIMD_4x32::load_be(in+16); |
41 | 0 | SIMD_4x32 B = SIMD_4x32::load_be(in+32); |
42 | 0 | SIMD_4x32 F = SIMD_4x32::load_be(in+48); |
43 | |
|
44 | 0 | SIMD_4x32 C = SIMD_4x32::load_be(in+64); |
45 | 0 | SIMD_4x32 G = SIMD_4x32::load_be(in+80); |
46 | 0 | SIMD_4x32 D = SIMD_4x32::load_be(in+96); |
47 | 0 | SIMD_4x32 H = SIMD_4x32::load_be(in+112); |
48 | |
|
49 | 0 | SIMD_4x32::transpose(A, B, C, D); |
50 | 0 | SIMD_4x32::transpose(E, F, G, H); |
51 | |
|
52 | 0 | for(size_t r = 0; r != 64; r += 8) |
53 | 0 | { |
54 | 0 | SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); |
55 | 0 | SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); |
56 | 0 | SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); |
57 | 0 | SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); |
58 | 0 | SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); |
59 | 0 | SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); |
60 | 0 | SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); |
61 | 0 | SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); |
62 | 0 | } |
63 | |
|
64 | 0 | SIMD_4x32::transpose(A, B, C, D); |
65 | 0 | SIMD_4x32::transpose(E, F, G, H); |
66 | |
|
67 | 0 | A.store_be(out); |
68 | 0 | E.store_be(out+16); |
69 | 0 | B.store_be(out+32); |
70 | 0 | F.store_be(out+48); |
71 | |
|
72 | 0 | C.store_be(out+64); |
73 | 0 | G.store_be(out+80); |
74 | 0 | D.store_be(out+96); |
75 | 0 | H.store_be(out+112); |
76 | 0 | } |
77 | | |
78 | | void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const |
79 | 0 | { |
80 | 0 | SIMD_4x32 A = SIMD_4x32::load_be(in); |
81 | 0 | SIMD_4x32 E = SIMD_4x32::load_be(in+16); |
82 | 0 | SIMD_4x32 B = SIMD_4x32::load_be(in+32); |
83 | 0 | SIMD_4x32 F = SIMD_4x32::load_be(in+48); |
84 | |
|
85 | 0 | SIMD_4x32 C = SIMD_4x32::load_be(in+64); |
86 | 0 | SIMD_4x32 G = SIMD_4x32::load_be(in+80); |
87 | 0 | SIMD_4x32 D = SIMD_4x32::load_be(in+96); |
88 | 0 | SIMD_4x32 H = SIMD_4x32::load_be(in+112); |
89 | |
|
90 | 0 | SIMD_4x32::transpose(A, B, C, D); |
91 | 0 | SIMD_4x32::transpose(E, F, G, H); |
92 | |
|
93 | 0 | for(size_t r = 0; r != 64; r += 8) |
94 | 0 | { |
95 | 0 | SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); |
96 | 0 | SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); |
97 | 0 | SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); |
98 | 0 | SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); |
99 | 0 | SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); |
100 | 0 | SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); |
101 | 0 | SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); |
102 | 0 | SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); |
103 | 0 | } |
104 | |
|
105 | 0 | SIMD_4x32::transpose(A, B, C, D); |
106 | 0 | SIMD_4x32::transpose(E, F, G, H); |
107 | |
|
108 | 0 | A.store_be(out); |
109 | 0 | E.store_be(out+16); |
110 | 0 | B.store_be(out+32); |
111 | 0 | F.store_be(out+48); |
112 | |
|
113 | 0 | C.store_be(out+64); |
114 | 0 | G.store_be(out+80); |
115 | 0 | D.store_be(out+96); |
116 | 0 | H.store_be(out+112); |
117 | 0 | } |
118 | | |
119 | | } |