/work/workdir/UnpackedTarball/argon2/src/opt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Argon2 reference source code package - reference C implementations |
3 | | * |
4 | | * Copyright 2015 |
5 | | * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves |
6 | | * |
7 | | * You may use this work under the terms of a Creative Commons CC0 1.0 |
8 | | * License/Waiver or the Apache Public License 2.0, at your option. The terms of |
9 | | * these licenses can be found at: |
10 | | * |
11 | | * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 |
12 | | * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 |
13 | | * |
14 | | * You should have received a copy of both of these licenses along with this |
15 | | * software. If not, they may be obtained at the above URLs. |
16 | | */ |
17 | | |
18 | | #include <stdint.h> |
19 | | #include <string.h> |
20 | | #include <stdlib.h> |
21 | | |
22 | | #include "argon2.h" |
23 | | #include "core.h" |
24 | | |
25 | | #include "blake2/blake2.h" |
26 | | #include "blake2/blamka-round-opt.h" |
27 | | |
28 | | /* |
29 | | * Function fills a new memory block and optionally XORs the old block over the new one. |
30 | | * Memory must be initialized. |
31 | | * @param state Pointer to the just produced block. Content will be updated(!) |
32 | | * @param ref_block Pointer to the reference block |
33 | | * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block |
34 | | * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) |
35 | | * @pre all block pointers must be valid |
36 | | */ |
37 | | #if defined(__AVX512F__) |
38 | | static void fill_block(__m512i *state, const block *ref_block, |
39 | | block *next_block, int with_xor) { |
40 | | __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; |
41 | | unsigned int i; |
42 | | |
43 | | if (with_xor) { |
44 | | for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
45 | | state[i] = _mm512_xor_si512( |
46 | | state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); |
47 | | block_XY[i] = _mm512_xor_si512( |
48 | | state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); |
49 | | } |
50 | | } else { |
51 | | for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
52 | | block_XY[i] = state[i] = _mm512_xor_si512( |
53 | | state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); |
54 | | } |
55 | | } |
56 | | |
57 | | for (i = 0; i < 2; ++i) { |
58 | | BLAKE2_ROUND_1( |
59 | | state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], |
60 | | state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); |
61 | | } |
62 | | |
63 | | for (i = 0; i < 2; ++i) { |
64 | | BLAKE2_ROUND_2( |
65 | | state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], |
66 | | state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); |
67 | | } |
68 | | |
69 | | for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
70 | | state[i] = _mm512_xor_si512(state[i], block_XY[i]); |
71 | | _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); |
72 | | } |
73 | | } |
74 | | #elif defined(__AVX2__) |
75 | | static void fill_block(__m256i *state, const block *ref_block, |
76 | | block *next_block, int with_xor) { |
77 | | __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; |
78 | | unsigned int i; |
79 | | |
80 | | if (with_xor) { |
81 | | for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
82 | | state[i] = _mm256_xor_si256( |
83 | | state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); |
84 | | block_XY[i] = _mm256_xor_si256( |
85 | | state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); |
86 | | } |
87 | | } else { |
88 | | for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
89 | | block_XY[i] = state[i] = _mm256_xor_si256( |
90 | | state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); |
91 | | } |
92 | | } |
93 | | |
94 | | for (i = 0; i < 4; ++i) { |
95 | | BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], |
96 | | state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); |
97 | | } |
98 | | |
99 | | for (i = 0; i < 4; ++i) { |
100 | | BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], |
101 | | state[16 + i], state[20 + i], state[24 + i], state[28 + i]); |
102 | | } |
103 | | |
104 | | for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
105 | | state[i] = _mm256_xor_si256(state[i], block_XY[i]); |
106 | | _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]); |
107 | | } |
108 | | } |
109 | | #else |
110 | | static void fill_block(__m128i *state, const block *ref_block, |
111 | 0 | block *next_block, int with_xor) { |
112 | 0 | __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; |
113 | 0 | unsigned int i; |
114 | |
|
115 | 0 | if (with_xor) { |
116 | 0 | for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
117 | 0 | state[i] = _mm_xor_si128( |
118 | 0 | state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); |
119 | 0 | block_XY[i] = _mm_xor_si128( |
120 | 0 | state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); |
121 | 0 | } |
122 | 0 | } else { |
123 | 0 | for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
124 | 0 | block_XY[i] = state[i] = _mm_xor_si128( |
125 | 0 | state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); |
126 | 0 | } |
127 | 0 | } |
128 | |
|
129 | 0 | for (i = 0; i < 8; ++i) { |
130 | 0 | BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], |
131 | 0 | state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], |
132 | 0 | state[8 * i + 6], state[8 * i + 7]); |
133 | 0 | } |
134 | |
|
135 | 0 | for (i = 0; i < 8; ++i) { |
136 | 0 | BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], |
137 | 0 | state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], |
138 | 0 | state[8 * 6 + i], state[8 * 7 + i]); |
139 | 0 | } |
140 | |
|
141 | 0 | for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
142 | 0 | state[i] = _mm_xor_si128(state[i], block_XY[i]); |
143 | 0 | _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); |
144 | 0 | } |
145 | 0 | } |
146 | | #endif |
147 | | |
148 | 0 | static void next_addresses(block *address_block, block *input_block) { |
149 | | /*Temporary zero-initialized blocks*/ |
150 | | #if defined(__AVX512F__) |
151 | | __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK]; |
152 | | __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK]; |
153 | | #elif defined(__AVX2__) |
154 | | __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; |
155 | | __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; |
156 | | #else |
157 | 0 | __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; |
158 | 0 | __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; |
159 | 0 | #endif |
160 | |
|
161 | 0 | memset(zero_block, 0, sizeof(zero_block)); |
162 | 0 | memset(zero2_block, 0, sizeof(zero2_block)); |
163 | | |
164 | | /*Increasing index counter*/ |
165 | 0 | input_block->v[6]++; |
166 | | |
167 | | /*First iteration of G*/ |
168 | 0 | fill_block(zero_block, input_block, address_block, 0); |
169 | | |
170 | | /*Second iteration of G*/ |
171 | 0 | fill_block(zero2_block, address_block, address_block, 0); |
172 | 0 | } |
173 | | |
174 | | void fill_segment(const argon2_instance_t *instance, |
175 | 0 | argon2_position_t position) { |
176 | 0 | block *ref_block = NULL, *curr_block = NULL; |
177 | 0 | block address_block, input_block; |
178 | 0 | uint64_t pseudo_rand, ref_index, ref_lane; |
179 | 0 | uint32_t prev_offset, curr_offset; |
180 | 0 | uint32_t starting_index, i; |
181 | | #if defined(__AVX512F__) |
182 | | __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; |
183 | | #elif defined(__AVX2__) |
184 | | __m256i state[ARGON2_HWORDS_IN_BLOCK]; |
185 | | #else |
186 | 0 | __m128i state[ARGON2_OWORDS_IN_BLOCK]; |
187 | 0 | #endif |
188 | 0 | int data_independent_addressing; |
189 | |
|
190 | 0 | if (instance == NULL) { |
191 | 0 | return; |
192 | 0 | } |
193 | | |
194 | 0 | data_independent_addressing = |
195 | 0 | (instance->type == Argon2_i) || |
196 | 0 | (instance->type == Argon2_id && (position.pass == 0) && |
197 | 0 | (position.slice < ARGON2_SYNC_POINTS / 2)); |
198 | |
|
199 | 0 | if (data_independent_addressing) { |
200 | 0 | init_block_value(&input_block, 0); |
201 | |
|
202 | 0 | input_block.v[0] = position.pass; |
203 | 0 | input_block.v[1] = position.lane; |
204 | 0 | input_block.v[2] = position.slice; |
205 | 0 | input_block.v[3] = instance->memory_blocks; |
206 | 0 | input_block.v[4] = instance->passes; |
207 | 0 | input_block.v[5] = instance->type; |
208 | 0 | } |
209 | |
|
210 | 0 | starting_index = 0; |
211 | |
|
212 | 0 | if ((0 == position.pass) && (0 == position.slice)) { |
213 | 0 | starting_index = 2; /* we have already generated the first two blocks */ |
214 | | |
215 | | /* Don't forget to generate the first block of addresses: */ |
216 | 0 | if (data_independent_addressing) { |
217 | 0 | next_addresses(&address_block, &input_block); |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | | /* Offset of the current block */ |
222 | 0 | curr_offset = position.lane * instance->lane_length + |
223 | 0 | position.slice * instance->segment_length + starting_index; |
224 | |
|
225 | 0 | if (0 == curr_offset % instance->lane_length) { |
226 | | /* Last block in this lane */ |
227 | 0 | prev_offset = curr_offset + instance->lane_length - 1; |
228 | 0 | } else { |
229 | | /* Previous block */ |
230 | 0 | prev_offset = curr_offset - 1; |
231 | 0 | } |
232 | |
|
233 | 0 | memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); |
234 | |
|
235 | 0 | for (i = starting_index; i < instance->segment_length; |
236 | 0 | ++i, ++curr_offset, ++prev_offset) { |
237 | | /*1.1 Rotating prev_offset if needed */ |
238 | 0 | if (curr_offset % instance->lane_length == 1) { |
239 | 0 | prev_offset = curr_offset - 1; |
240 | 0 | } |
241 | | |
242 | | /* 1.2 Computing the index of the reference block */ |
243 | | /* 1.2.1 Taking pseudo-random value from the previous block */ |
244 | 0 | if (data_independent_addressing) { |
245 | 0 | if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { |
246 | 0 | next_addresses(&address_block, &input_block); |
247 | 0 | } |
248 | 0 | pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; |
249 | 0 | } else { |
250 | 0 | pseudo_rand = instance->memory[prev_offset].v[0]; |
251 | 0 | } |
252 | | |
253 | | /* 1.2.2 Computing the lane of the reference block */ |
254 | 0 | ref_lane = ((pseudo_rand >> 32)) % instance->lanes; |
255 | |
|
256 | 0 | if ((position.pass == 0) && (position.slice == 0)) { |
257 | | /* Can not reference other lanes yet */ |
258 | 0 | ref_lane = position.lane; |
259 | 0 | } |
260 | | |
261 | | /* 1.2.3 Computing the number of possible reference block within the |
262 | | * lane. |
263 | | */ |
264 | 0 | position.index = i; |
265 | 0 | ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, |
266 | 0 | ref_lane == position.lane); |
267 | | |
268 | | /* 2 Creating a new block */ |
269 | 0 | ref_block = |
270 | 0 | instance->memory + instance->lane_length * ref_lane + ref_index; |
271 | 0 | curr_block = instance->memory + curr_offset; |
272 | 0 | if (ARGON2_VERSION_10 == instance->version) { |
273 | | /* version 1.2.1 and earlier: overwrite, not XOR */ |
274 | 0 | fill_block(state, ref_block, curr_block, 0); |
275 | 0 | } else { |
276 | 0 | if(0 == position.pass) { |
277 | 0 | fill_block(state, ref_block, curr_block, 0); |
278 | 0 | } else { |
279 | 0 | fill_block(state, ref_block, curr_block, 1); |
280 | 0 | } |
281 | 0 | } |
282 | 0 | } |
283 | 0 | } |