Coverage Report

Created: 2023-06-07 07:00

/src/botan/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/chacha.h>
8
9
#include <botan/internal/simd_avx2.h>
10
11
namespace Botan {
12
13
//static
14
BOTAN_AVX2_FN
15
8.27k
void ChaCha::chacha_avx2_x8(uint8_t output[64 * 8], uint32_t state[16], size_t rounds) {
16
8.27k
   SIMD_8x32::reset_registers();
17
18
8.27k
   BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
19
8.27k
   const SIMD_8x32 CTR0 = SIMD_8x32(0, 1, 2, 3, 4, 5, 6, 7);
20
21
8.27k
   const uint32_t C = 0xFFFFFFFF - state[12];
22
8.27k
   const SIMD_8x32 CTR1 = SIMD_8x32(0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7);
23
24
8.27k
   SIMD_8x32 R00 = SIMD_8x32::splat(state[0]);
25
8.27k
   SIMD_8x32 R01 = SIMD_8x32::splat(state[1]);
26
8.27k
   SIMD_8x32 R02 = SIMD_8x32::splat(state[2]);
27
8.27k
   SIMD_8x32 R03 = SIMD_8x32::splat(state[3]);
28
8.27k
   SIMD_8x32 R04 = SIMD_8x32::splat(state[4]);
29
8.27k
   SIMD_8x32 R05 = SIMD_8x32::splat(state[5]);
30
8.27k
   SIMD_8x32 R06 = SIMD_8x32::splat(state[6]);
31
8.27k
   SIMD_8x32 R07 = SIMD_8x32::splat(state[7]);
32
8.27k
   SIMD_8x32 R08 = SIMD_8x32::splat(state[8]);
33
8.27k
   SIMD_8x32 R09 = SIMD_8x32::splat(state[9]);
34
8.27k
   SIMD_8x32 R10 = SIMD_8x32::splat(state[10]);
35
8.27k
   SIMD_8x32 R11 = SIMD_8x32::splat(state[11]);
36
8.27k
   SIMD_8x32 R12 = SIMD_8x32::splat(state[12]) + CTR0;
37
8.27k
   SIMD_8x32 R13 = SIMD_8x32::splat(state[13]) + CTR1;
38
8.27k
   SIMD_8x32 R14 = SIMD_8x32::splat(state[14]);
39
8.27k
   SIMD_8x32 R15 = SIMD_8x32::splat(state[15]);
40
41
91.0k
   for(size_t r = 0; r != rounds / 2; ++r) {
42
82.7k
      R00 += R04;
43
82.7k
      R01 += R05;
44
82.7k
      R02 += R06;
45
82.7k
      R03 += R07;
46
47
82.7k
      R12 ^= R00;
48
82.7k
      R13 ^= R01;
49
82.7k
      R14 ^= R02;
50
82.7k
      R15 ^= R03;
51
52
82.7k
      R12 = R12.rotl<16>();
53
82.7k
      R13 = R13.rotl<16>();
54
82.7k
      R14 = R14.rotl<16>();
55
82.7k
      R15 = R15.rotl<16>();
56
57
82.7k
      R08 += R12;
58
82.7k
      R09 += R13;
59
82.7k
      R10 += R14;
60
82.7k
      R11 += R15;
61
62
82.7k
      R04 ^= R08;
63
82.7k
      R05 ^= R09;
64
82.7k
      R06 ^= R10;
65
82.7k
      R07 ^= R11;
66
67
82.7k
      R04 = R04.rotl<12>();
68
82.7k
      R05 = R05.rotl<12>();
69
82.7k
      R06 = R06.rotl<12>();
70
82.7k
      R07 = R07.rotl<12>();
71
72
82.7k
      R00 += R04;
73
82.7k
      R01 += R05;
74
82.7k
      R02 += R06;
75
82.7k
      R03 += R07;
76
77
82.7k
      R12 ^= R00;
78
82.7k
      R13 ^= R01;
79
82.7k
      R14 ^= R02;
80
82.7k
      R15 ^= R03;
81
82
82.7k
      R12 = R12.rotl<8>();
83
82.7k
      R13 = R13.rotl<8>();
84
82.7k
      R14 = R14.rotl<8>();
85
82.7k
      R15 = R15.rotl<8>();
86
87
82.7k
      R08 += R12;
88
82.7k
      R09 += R13;
89
82.7k
      R10 += R14;
90
82.7k
      R11 += R15;
91
92
82.7k
      R04 ^= R08;
93
82.7k
      R05 ^= R09;
94
82.7k
      R06 ^= R10;
95
82.7k
      R07 ^= R11;
96
97
82.7k
      R04 = R04.rotl<7>();
98
82.7k
      R05 = R05.rotl<7>();
99
82.7k
      R06 = R06.rotl<7>();
100
82.7k
      R07 = R07.rotl<7>();
101
102
82.7k
      R00 += R05;
103
82.7k
      R01 += R06;
104
82.7k
      R02 += R07;
105
82.7k
      R03 += R04;
106
107
82.7k
      R15 ^= R00;
108
82.7k
      R12 ^= R01;
109
82.7k
      R13 ^= R02;
110
82.7k
      R14 ^= R03;
111
112
82.7k
      R15 = R15.rotl<16>();
113
82.7k
      R12 = R12.rotl<16>();
114
82.7k
      R13 = R13.rotl<16>();
115
82.7k
      R14 = R14.rotl<16>();
116
117
82.7k
      R10 += R15;
118
82.7k
      R11 += R12;
119
82.7k
      R08 += R13;
120
82.7k
      R09 += R14;
121
122
82.7k
      R05 ^= R10;
123
82.7k
      R06 ^= R11;
124
82.7k
      R07 ^= R08;
125
82.7k
      R04 ^= R09;
126
127
82.7k
      R05 = R05.rotl<12>();
128
82.7k
      R06 = R06.rotl<12>();
129
82.7k
      R07 = R07.rotl<12>();
130
82.7k
      R04 = R04.rotl<12>();
131
132
82.7k
      R00 += R05;
133
82.7k
      R01 += R06;
134
82.7k
      R02 += R07;
135
82.7k
      R03 += R04;
136
137
82.7k
      R15 ^= R00;
138
82.7k
      R12 ^= R01;
139
82.7k
      R13 ^= R02;
140
82.7k
      R14 ^= R03;
141
142
82.7k
      R15 = R15.rotl<8>();
143
82.7k
      R12 = R12.rotl<8>();
144
82.7k
      R13 = R13.rotl<8>();
145
82.7k
      R14 = R14.rotl<8>();
146
147
82.7k
      R10 += R15;
148
82.7k
      R11 += R12;
149
82.7k
      R08 += R13;
150
82.7k
      R09 += R14;
151
152
82.7k
      R05 ^= R10;
153
82.7k
      R06 ^= R11;
154
82.7k
      R07 ^= R08;
155
82.7k
      R04 ^= R09;
156
157
82.7k
      R05 = R05.rotl<7>();
158
82.7k
      R06 = R06.rotl<7>();
159
82.7k
      R07 = R07.rotl<7>();
160
82.7k
      R04 = R04.rotl<7>();
161
82.7k
   }
162
163
8.27k
   R00 += SIMD_8x32::splat(state[0]);
164
8.27k
   R01 += SIMD_8x32::splat(state[1]);
165
8.27k
   R02 += SIMD_8x32::splat(state[2]);
166
8.27k
   R03 += SIMD_8x32::splat(state[3]);
167
8.27k
   R04 += SIMD_8x32::splat(state[4]);
168
8.27k
   R05 += SIMD_8x32::splat(state[5]);
169
8.27k
   R06 += SIMD_8x32::splat(state[6]);
170
8.27k
   R07 += SIMD_8x32::splat(state[7]);
171
8.27k
   R08 += SIMD_8x32::splat(state[8]);
172
8.27k
   R09 += SIMD_8x32::splat(state[9]);
173
8.27k
   R10 += SIMD_8x32::splat(state[10]);
174
8.27k
   R11 += SIMD_8x32::splat(state[11]);
175
8.27k
   R12 += SIMD_8x32::splat(state[12]) + CTR0;
176
8.27k
   R13 += SIMD_8x32::splat(state[13]) + CTR1;
177
8.27k
   R14 += SIMD_8x32::splat(state[14]);
178
8.27k
   R15 += SIMD_8x32::splat(state[15]);
179
180
8.27k
   SIMD_8x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07);
181
8.27k
   SIMD_8x32::transpose(R08, R09, R10, R11, R12, R13, R14, R15);
182
183
8.27k
   R00.store_le(output);
184
8.27k
   R08.store_le(output + 32 * 1);
185
8.27k
   R01.store_le(output + 32 * 2);
186
8.27k
   R09.store_le(output + 32 * 3);
187
8.27k
   R02.store_le(output + 32 * 4);
188
8.27k
   R10.store_le(output + 32 * 5);
189
8.27k
   R03.store_le(output + 32 * 6);
190
8.27k
   R11.store_le(output + 32 * 7);
191
8.27k
   R04.store_le(output + 32 * 8);
192
8.27k
   R12.store_le(output + 32 * 9);
193
8.27k
   R05.store_le(output + 32 * 10);
194
8.27k
   R13.store_le(output + 32 * 11);
195
8.27k
   R06.store_le(output + 32 * 12);
196
8.27k
   R14.store_le(output + 32 * 13);
197
8.27k
   R07.store_le(output + 32 * 14);
198
8.27k
   R15.store_le(output + 32 * 15);
199
200
8.27k
   SIMD_8x32::zero_registers();
201
202
8.27k
   state[12] += 8;
203
8.27k
   if(state[12] < 8) {
204
0
      state[13]++;
205
0
   }
206
8.27k
}
207
}  // namespace Botan