Coverage Report

Created: 2020-09-16 07:52

/src/botan/src/lib/stream/chacha/chacha_simd32/chacha_simd32.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/chacha.h>
8
#include <botan/internal/simd_32.h>
9
10
namespace Botan {
11
12
//static
13
void ChaCha::chacha_simd32_x4(uint8_t output[64*4], uint32_t state[16], size_t rounds)
14
0
   {
15
0
   BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
16
0
   const SIMD_4x32 CTR0 = SIMD_4x32(0, 1, 2, 3);
17
0
18
0
   const uint32_t C = 0xFFFFFFFF - state[12];
19
0
   const SIMD_4x32 CTR1 = SIMD_4x32(0, C < 1, C < 2, C < 3);
20
0
21
0
   SIMD_4x32 R00 = SIMD_4x32::splat(state[ 0]);
22
0
   SIMD_4x32 R01 = SIMD_4x32::splat(state[ 1]);
23
0
   SIMD_4x32 R02 = SIMD_4x32::splat(state[ 2]);
24
0
   SIMD_4x32 R03 = SIMD_4x32::splat(state[ 3]);
25
0
   SIMD_4x32 R04 = SIMD_4x32::splat(state[ 4]);
26
0
   SIMD_4x32 R05 = SIMD_4x32::splat(state[ 5]);
27
0
   SIMD_4x32 R06 = SIMD_4x32::splat(state[ 6]);
28
0
   SIMD_4x32 R07 = SIMD_4x32::splat(state[ 7]);
29
0
   SIMD_4x32 R08 = SIMD_4x32::splat(state[ 8]);
30
0
   SIMD_4x32 R09 = SIMD_4x32::splat(state[ 9]);
31
0
   SIMD_4x32 R10 = SIMD_4x32::splat(state[10]);
32
0
   SIMD_4x32 R11 = SIMD_4x32::splat(state[11]);
33
0
   SIMD_4x32 R12 = SIMD_4x32::splat(state[12]) + CTR0;
34
0
   SIMD_4x32 R13 = SIMD_4x32::splat(state[13]) + CTR1;
35
0
   SIMD_4x32 R14 = SIMD_4x32::splat(state[14]);
36
0
   SIMD_4x32 R15 = SIMD_4x32::splat(state[15]);
37
0
38
0
   for(size_t r = 0; r != rounds / 2; ++r)
39
0
      {
40
0
      R00 += R04;
41
0
      R01 += R05;
42
0
      R02 += R06;
43
0
      R03 += R07;
44
0
45
0
      R12 ^= R00;
46
0
      R13 ^= R01;
47
0
      R14 ^= R02;
48
0
      R15 ^= R03;
49
0
50
0
      R12 = R12.rotl<16>();
51
0
      R13 = R13.rotl<16>();
52
0
      R14 = R14.rotl<16>();
53
0
      R15 = R15.rotl<16>();
54
0
55
0
      R08 += R12;
56
0
      R09 += R13;
57
0
      R10 += R14;
58
0
      R11 += R15;
59
0
60
0
      R04 ^= R08;
61
0
      R05 ^= R09;
62
0
      R06 ^= R10;
63
0
      R07 ^= R11;
64
0
65
0
      R04 = R04.rotl<12>();
66
0
      R05 = R05.rotl<12>();
67
0
      R06 = R06.rotl<12>();
68
0
      R07 = R07.rotl<12>();
69
0
70
0
      R00 += R04;
71
0
      R01 += R05;
72
0
      R02 += R06;
73
0
      R03 += R07;
74
0
75
0
      R12 ^= R00;
76
0
      R13 ^= R01;
77
0
      R14 ^= R02;
78
0
      R15 ^= R03;
79
0
80
0
      R12 = R12.rotl<8>();
81
0
      R13 = R13.rotl<8>();
82
0
      R14 = R14.rotl<8>();
83
0
      R15 = R15.rotl<8>();
84
0
85
0
      R08 += R12;
86
0
      R09 += R13;
87
0
      R10 += R14;
88
0
      R11 += R15;
89
0
90
0
      R04 ^= R08;
91
0
      R05 ^= R09;
92
0
      R06 ^= R10;
93
0
      R07 ^= R11;
94
0
95
0
      R04 = R04.rotl<7>();
96
0
      R05 = R05.rotl<7>();
97
0
      R06 = R06.rotl<7>();
98
0
      R07 = R07.rotl<7>();
99
0
100
0
      R00 += R05;
101
0
      R01 += R06;
102
0
      R02 += R07;
103
0
      R03 += R04;
104
0
105
0
      R15 ^= R00;
106
0
      R12 ^= R01;
107
0
      R13 ^= R02;
108
0
      R14 ^= R03;
109
0
110
0
      R15 = R15.rotl<16>();
111
0
      R12 = R12.rotl<16>();
112
0
      R13 = R13.rotl<16>();
113
0
      R14 = R14.rotl<16>();
114
0
115
0
      R10 += R15;
116
0
      R11 += R12;
117
0
      R08 += R13;
118
0
      R09 += R14;
119
0
120
0
      R05 ^= R10;
121
0
      R06 ^= R11;
122
0
      R07 ^= R08;
123
0
      R04 ^= R09;
124
0
125
0
      R05 = R05.rotl<12>();
126
0
      R06 = R06.rotl<12>();
127
0
      R07 = R07.rotl<12>();
128
0
      R04 = R04.rotl<12>();
129
0
130
0
      R00 += R05;
131
0
      R01 += R06;
132
0
      R02 += R07;
133
0
      R03 += R04;
134
0
135
0
      R15 ^= R00;
136
0
      R12 ^= R01;
137
0
      R13 ^= R02;
138
0
      R14 ^= R03;
139
0
140
0
      R15 = R15.rotl<8>();
141
0
      R12 = R12.rotl<8>();
142
0
      R13 = R13.rotl<8>();
143
0
      R14 = R14.rotl<8>();
144
0
145
0
      R10 += R15;
146
0
      R11 += R12;
147
0
      R08 += R13;
148
0
      R09 += R14;
149
0
150
0
      R05 ^= R10;
151
0
      R06 ^= R11;
152
0
      R07 ^= R08;
153
0
      R04 ^= R09;
154
0
155
0
      R05 = R05.rotl<7>();
156
0
      R06 = R06.rotl<7>();
157
0
      R07 = R07.rotl<7>();
158
0
      R04 = R04.rotl<7>();
159
0
      }
160
0
161
0
   R00 += SIMD_4x32::splat(state[0]);
162
0
   R01 += SIMD_4x32::splat(state[1]);
163
0
   R02 += SIMD_4x32::splat(state[2]);
164
0
   R03 += SIMD_4x32::splat(state[3]);
165
0
   R04 += SIMD_4x32::splat(state[4]);
166
0
   R05 += SIMD_4x32::splat(state[5]);
167
0
   R06 += SIMD_4x32::splat(state[6]);
168
0
   R07 += SIMD_4x32::splat(state[7]);
169
0
   R08 += SIMD_4x32::splat(state[8]);
170
0
   R09 += SIMD_4x32::splat(state[9]);
171
0
   R10 += SIMD_4x32::splat(state[10]);
172
0
   R11 += SIMD_4x32::splat(state[11]);
173
0
   R12 += SIMD_4x32::splat(state[12]) + CTR0;
174
0
   R13 += SIMD_4x32::splat(state[13]) + CTR1;
175
0
   R14 += SIMD_4x32::splat(state[14]);
176
0
   R15 += SIMD_4x32::splat(state[15]);
177
0
178
0
   SIMD_4x32::transpose(R00, R01, R02, R03);
179
0
   SIMD_4x32::transpose(R04, R05, R06, R07);
180
0
   SIMD_4x32::transpose(R08, R09, R10, R11);
181
0
   SIMD_4x32::transpose(R12, R13, R14, R15);
182
0
183
0
   R00.store_le(output + 0*16);
184
0
   R04.store_le(output + 1*16);
185
0
   R08.store_le(output + 2*16);
186
0
   R12.store_le(output + 3*16);
187
0
   R01.store_le(output + 4*16);
188
0
   R05.store_le(output + 5*16);
189
0
   R09.store_le(output + 6*16);
190
0
   R13.store_le(output + 7*16);
191
0
   R02.store_le(output + 8*16);
192
0
   R06.store_le(output + 9*16);
193
0
   R10.store_le(output + 10*16);
194
0
   R14.store_le(output + 11*16);
195
0
   R03.store_le(output + 12*16);
196
0
   R07.store_le(output + 13*16);
197
0
   R11.store_le(output + 14*16);
198
0
   R15.store_le(output + 15*16);
199
0
200
0
   state[12] += 4;
201
0
   if(state[12] < 4)
202
0
      state[13]++;
203
0
   }
204
205
}