/src/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Noekeon in SIMD |
3 | | * (C) 2010 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/noekeon.h> |
9 | | #include <botan/internal/simd_32.h> |
10 | | |
11 | | namespace Botan { |
12 | | |
13 | | /* |
14 | | * Noekeon's Theta Operation |
15 | | */ |
16 | | inline void theta(SIMD_4x32& A0, SIMD_4x32& A1, |
17 | | SIMD_4x32& A2, SIMD_4x32& A3, |
18 | | const SIMD_4x32& K0, |
19 | | const SIMD_4x32& K1, |
20 | | const SIMD_4x32& K2, |
21 | | const SIMD_4x32& K3) |
22 | 0 | { |
23 | 0 | SIMD_4x32 T = A0 ^ A2; |
24 | 0 | T ^= T.rotl<8>() ^ T.rotr<8>(); |
25 | 0 | A1 ^= T; |
26 | 0 | A3 ^= T; |
27 | |
|
28 | 0 | A0 ^= K0; |
29 | 0 | A1 ^= K1; |
30 | 0 | A2 ^= K2; |
31 | 0 | A3 ^= K3; |
32 | |
|
33 | 0 | T = A1 ^ A3; |
34 | 0 | T ^= T.rotl<8>() ^ T.rotr<8>(); |
35 | 0 | A0 ^= T; |
36 | 0 | A2 ^= T; |
37 | 0 | } |
38 | | |
39 | | /* |
40 | | * Noekeon's Gamma S-Box Layer |
41 | | */ |
42 | | inline void gamma(SIMD_4x32& A0, SIMD_4x32& A1, |
43 | | SIMD_4x32& A2, SIMD_4x32& A3) |
44 | 0 | { |
45 | 0 | A1 ^= ~(A2 | A3); |
46 | 0 | A0 ^= A2 & A1; |
47 | |
|
48 | 0 | SIMD_4x32 T = A3; |
49 | 0 | A3 = A0; |
50 | 0 | A0 = T; |
51 | |
|
52 | 0 | A2 ^= A0 ^ A1 ^ A3; |
53 | |
|
54 | 0 | A1 ^= ~(A2 | A3); |
55 | 0 | A0 ^= A2 & A1; |
56 | 0 | } |
57 | | |
58 | | /* |
59 | | * Noekeon Encryption |
60 | | */ |
61 | | void Noekeon::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const |
62 | 0 | { |
63 | 0 | const SIMD_4x32 K0 = SIMD_4x32::splat(m_EK[0]); |
64 | 0 | const SIMD_4x32 K1 = SIMD_4x32::splat(m_EK[1]); |
65 | 0 | const SIMD_4x32 K2 = SIMD_4x32::splat(m_EK[2]); |
66 | 0 | const SIMD_4x32 K3 = SIMD_4x32::splat(m_EK[3]); |
67 | |
|
68 | 0 | SIMD_4x32 A0 = SIMD_4x32::load_be(in ); |
69 | 0 | SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); |
70 | 0 | SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); |
71 | 0 | SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); |
72 | |
|
73 | 0 | SIMD_4x32::transpose(A0, A1, A2, A3); |
74 | |
|
75 | 0 | for(size_t i = 0; i != 16; ++i) |
76 | 0 | { |
77 | 0 | A0 ^= SIMD_4x32::splat(RC[i]); |
78 | |
|
79 | 0 | theta(A0, A1, A2, A3, K0, K1, K2, K3); |
80 | |
|
81 | 0 | A1 = A1.rotl<1>(); |
82 | 0 | A2 = A2.rotl<5>(); |
83 | 0 | A3 = A3.rotl<2>(); |
84 | |
|
85 | 0 | gamma(A0, A1, A2, A3); |
86 | |
|
87 | 0 | A1 = A1.rotr<1>(); |
88 | 0 | A2 = A2.rotr<5>(); |
89 | 0 | A3 = A3.rotr<2>(); |
90 | 0 | } |
91 | |
|
92 | 0 | A0 ^= SIMD_4x32::splat(RC[16]); |
93 | 0 | theta(A0, A1, A2, A3, K0, K1, K2, K3); |
94 | |
|
95 | 0 | SIMD_4x32::transpose(A0, A1, A2, A3); |
96 | |
|
97 | 0 | A0.store_be(out); |
98 | 0 | A1.store_be(out + 16); |
99 | 0 | A2.store_be(out + 32); |
100 | 0 | A3.store_be(out + 48); |
101 | 0 | } |
102 | | |
103 | | /* |
104 | | * Noekeon Encryption |
105 | | */ |
106 | | void Noekeon::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const |
107 | 0 | { |
108 | 0 | const SIMD_4x32 K0 = SIMD_4x32::splat(m_DK[0]); |
109 | 0 | const SIMD_4x32 K1 = SIMD_4x32::splat(m_DK[1]); |
110 | 0 | const SIMD_4x32 K2 = SIMD_4x32::splat(m_DK[2]); |
111 | 0 | const SIMD_4x32 K3 = SIMD_4x32::splat(m_DK[3]); |
112 | |
|
113 | 0 | SIMD_4x32 A0 = SIMD_4x32::load_be(in ); |
114 | 0 | SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); |
115 | 0 | SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); |
116 | 0 | SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); |
117 | |
|
118 | 0 | SIMD_4x32::transpose(A0, A1, A2, A3); |
119 | |
|
120 | 0 | for(size_t i = 0; i != 16; ++i) |
121 | 0 | { |
122 | 0 | theta(A0, A1, A2, A3, K0, K1, K2, K3); |
123 | |
|
124 | 0 | A0 ^= SIMD_4x32::splat(RC[16-i]); |
125 | |
|
126 | 0 | A1 = A1.rotl<1>(); |
127 | 0 | A2 = A2.rotl<5>(); |
128 | 0 | A3 = A3.rotl<2>(); |
129 | |
|
130 | 0 | gamma(A0, A1, A2, A3); |
131 | |
|
132 | 0 | A1 = A1.rotr<1>(); |
133 | 0 | A2 = A2.rotr<5>(); |
134 | 0 | A3 = A3.rotr<2>(); |
135 | 0 | } |
136 | |
|
137 | 0 | theta(A0, A1, A2, A3, K0, K1, K2, K3); |
138 | 0 | A0 ^= SIMD_4x32::splat(RC[0]); |
139 | |
|
140 | 0 | SIMD_4x32::transpose(A0, A1, A2, A3); |
141 | |
|
142 | 0 | A0.store_be(out); |
143 | 0 | A1.store_be(out + 16); |
144 | 0 | A2.store_be(out + 32); |
145 | 0 | A3.store_be(out + 48); |
146 | 0 | } |
147 | | |
148 | | } |