/src/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Serpent (SIMD) |
3 | | * (C) 2009,2013 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/serpent.h> |
9 | | #include <botan/internal/serpent_sbox.h> |
10 | | #include <botan/internal/simd_32.h> |
11 | | |
12 | | namespace Botan { |
13 | | |
14 | | #define key_xor(round, B0, B1, B2, B3) \ |
15 | 0 | do { \ |
16 | 0 | B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \ |
17 | 0 | B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \ |
18 | 0 | B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \ |
19 | 0 | B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \ |
20 | 0 | } while(0) |
21 | | |
22 | | /* |
23 | | * Serpent's linear transformations |
24 | | */ |
25 | | #define transform(B0, B1, B2, B3) \ |
26 | 0 | do { \ |
27 | 0 | B0 = B0.rotl<13>(); \ |
28 | 0 | B2 = B2.rotl<3>(); \ |
29 | 0 | B1 ^= B0 ^ B2; \ |
30 | 0 | B3 ^= B2 ^ B0.shl<3>(); \ |
31 | 0 | B1 = B1.rotl<1>(); \ |
32 | 0 | B3 = B3.rotl<7>(); \ |
33 | 0 | B0 ^= B1 ^ B3; \ |
34 | 0 | B2 ^= B3 ^ B1.shl<7>(); \ |
35 | 0 | B0 = B0.rotl<5>(); \ |
36 | 0 | B2 = B2.rotl<22>(); \ |
37 | 0 | } while(0) |
38 | | |
39 | | #define i_transform(B0, B1, B2, B3) \ |
40 | 0 | do { \ |
41 | 0 | B2 = B2.rotr<22>(); \ |
42 | 0 | B0 = B0.rotr<5>(); \ |
43 | 0 | B2 ^= B3 ^ B1.shl<7>(); \ |
44 | 0 | B0 ^= B1 ^ B3; \ |
45 | 0 | B3 = B3.rotr<7>(); \ |
46 | 0 | B1 = B1.rotr<1>(); \ |
47 | 0 | B3 ^= B2 ^ B0.shl<3>(); \ |
48 | 0 | B1 ^= B0 ^ B2; \ |
49 | 0 | B2 = B2.rotr<3>(); \ |
50 | 0 | B0 = B0.rotr<13>(); \ |
51 | 0 | } while(0) |
52 | | |
53 | | /* |
54 | | * SIMD Serpent Encryption of 4 blocks in parallel |
55 | | */ |
56 | | void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const |
57 | 0 | { |
58 | 0 | SIMD_4x32 B0 = SIMD_4x32::load_le(in); |
59 | 0 | SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); |
60 | 0 | SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); |
61 | 0 | SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); |
62 | |
|
63 | 0 | SIMD_4x32::transpose(B0, B1, B2, B3); |
64 | |
|
65 | 0 | key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
66 | 0 | key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
67 | 0 | key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
68 | 0 | key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
69 | 0 | key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
70 | 0 | key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
71 | 0 | key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
72 | 0 | key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
73 | |
|
74 | 0 | key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
75 | 0 | key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
76 | 0 | key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
77 | 0 | key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
78 | 0 | key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
79 | 0 | key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
80 | 0 | key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
81 | 0 | key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
82 | |
|
83 | 0 | key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
84 | 0 | key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
85 | 0 | key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
86 | 0 | key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
87 | 0 | key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
88 | 0 | key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
89 | 0 | key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
90 | 0 | key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
91 | |
|
92 | 0 | key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
93 | 0 | key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
94 | 0 | key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
95 | 0 | key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
96 | 0 | key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
97 | 0 | key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
98 | 0 | key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); |
99 | 0 | key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); |
100 | |
|
101 | 0 | SIMD_4x32::transpose(B0, B1, B2, B3); |
102 | |
|
103 | 0 | B0.store_le(out); |
104 | 0 | B1.store_le(out + 16); |
105 | 0 | B2.store_le(out + 32); |
106 | 0 | B3.store_le(out + 48); |
107 | 0 | } |
108 | | |
109 | | /* |
110 | | * SIMD Serpent Decryption of 4 blocks in parallel |
111 | | */ |
112 | | void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const |
113 | 0 | { |
114 | 0 | SIMD_4x32 B0 = SIMD_4x32::load_le(in); |
115 | 0 | SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); |
116 | 0 | SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); |
117 | 0 | SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); |
118 | |
|
119 | 0 | SIMD_4x32::transpose(B0, B1, B2, B3); |
120 | |
|
121 | 0 | key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); |
122 | 0 | i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); |
123 | 0 | i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); |
124 | 0 | i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); |
125 | 0 | i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); |
126 | 0 | i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); |
127 | 0 | i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); |
128 | 0 | i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); |
129 | |
|
130 | 0 | i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); |
131 | 0 | i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); |
132 | 0 | i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); |
133 | 0 | i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); |
134 | 0 | i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); |
135 | 0 | i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); |
136 | 0 | i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); |
137 | 0 | i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); |
138 | |
|
139 | 0 | i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); |
140 | 0 | i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); |
141 | 0 | i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); |
142 | 0 | i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); |
143 | 0 | i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); |
144 | 0 | i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); |
145 | 0 | i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); |
146 | 0 | i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); |
147 | |
|
148 | 0 | i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); |
149 | 0 | i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); |
150 | 0 | i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); |
151 | 0 | i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); |
152 | 0 | i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); |
153 | 0 | i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); |
154 | 0 | i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); |
155 | 0 | i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); |
156 | |
|
157 | 0 | SIMD_4x32::transpose(B0, B1, B2, B3); |
158 | |
|
159 | 0 | B0.store_le(out); |
160 | 0 | B1.store_le(out + 16); |
161 | 0 | B2.store_le(out + 32); |
162 | 0 | B3.store_le(out + 48); |
163 | 0 | } |
164 | | |
165 | | #undef key_xor |
166 | | #undef transform |
167 | | #undef i_transform |
168 | | |
169 | | } |