Line | Count | Source (jump to first uncovered line) |
1 | | // scrypt.cpp - written and placed in public domain by Jeffrey Walton. |
2 | | // Based on reference source code by Colin Percival for |
3 | | // Scrypt and Daniel Bernstein for Salsa20 core. |
4 | | |
5 | | #include "pch.h" |
6 | | |
7 | | #include "scrypt.h" |
8 | | #include "algparam.h" |
9 | | #include "argnames.h" |
10 | | #include "pwdbased.h" |
11 | | #include "stdcpp.h" |
12 | | #include "salsa.h" |
13 | | #include "misc.h" |
14 | | #include "sha.h" |
15 | | |
16 | | #include <sstream> |
17 | | #include <limits> |
18 | | |
19 | | #ifdef _OPENMP |
20 | | # include <omp.h> |
21 | | #endif |
22 | | |
23 | | // https://github.com/weidai11/cryptopp/issues/777 |
24 | | #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE |
25 | | # if defined(__clang__) |
26 | | # pragma GCC diagnostic ignored "-Wtautological-compare" |
27 | | # elif defined(__GNUC__) |
28 | | # pragma GCC diagnostic ignored "-Wtype-limits" |
29 | | # endif |
30 | | #endif |
31 | | |
32 | | ANONYMOUS_NAMESPACE_BEGIN |
33 | | |
34 | | using CryptoPP::byte; |
35 | | using CryptoPP::word32; |
36 | | using CryptoPP::word64; |
37 | | using CryptoPP::GetWord; |
38 | | using CryptoPP::PutWord; |
39 | | using CryptoPP::Salsa20_Core; |
40 | | using CryptoPP::rotlConstant; |
41 | | using CryptoPP::LITTLE_ENDIAN_ORDER; |
42 | | using CryptoPP::AlignedSecByteBlock; |
43 | | |
44 | | inline void LE32ENC(byte* out, word32 in) |
45 | 1.23M | { |
46 | 1.23M | PutWord(false, LITTLE_ENDIAN_ORDER, out, in); |
47 | 1.23M | } |
48 | | |
49 | | inline word32 LE32DEC(const byte* in) |
50 | 1.23M | { |
51 | 1.23M | return GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in); |
52 | 1.23M | } |
53 | | |
54 | | inline word64 LE64DEC(const byte* in) |
55 | 3.50k | { |
56 | 3.50k | return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in); |
57 | 3.50k | } |
58 | | |
59 | | inline void BlockCopy(byte* dest, byte* src, size_t len) |
60 | 166k | { |
61 | | // OpenMP 4.0 released July 2013. |
62 | | #if _OPENMP >= 201307 |
63 | | #pragma omp simd |
64 | | for (size_t i = 0; i < len; ++i) |
65 | | dest[i] = src[i]; |
66 | | #else |
67 | 14.5M | for (size_t i = 0; i < len; ++i) |
68 | 14.4M | dest[i] = src[i]; |
69 | 166k | #endif |
70 | 166k | } |
71 | | |
72 | | inline void BlockXOR(byte* dest, byte* src, size_t len) |
73 | 80.5k | { |
74 | | // OpenMP 4.0 released July 2013. |
75 | | #if _OPENMP >= 201307 |
76 | | #pragma omp simd |
77 | | for (size_t i = 0; i < len; ++i) |
78 | | dest[i] ^= src[i]; |
79 | | #else |
80 | 7.47M | for (size_t i = 0; i < len; ++i) |
81 | 7.39M | dest[i] ^= src[i]; |
82 | 80.5k | #endif |
83 | 80.5k | } |
84 | | |
85 | | inline void PBKDF2_SHA256(byte* buf, size_t dkLen, |
86 | | const byte* passwd, size_t passwdlen, |
87 | | const byte* salt, size_t saltlen, byte count) |
88 | 858 | { |
89 | 858 | using CryptoPP::SHA256; |
90 | 858 | using CryptoPP::PKCS5_PBKDF2_HMAC; |
91 | | |
92 | 858 | PKCS5_PBKDF2_HMAC<SHA256> pbkdf; |
93 | 858 | pbkdf.DeriveKey(buf, dkLen, 0, passwd, passwdlen, salt, saltlen, count, 0.0f); |
94 | 858 | } |
95 | | |
96 | | inline void Salsa20_8(byte B[64]) |
97 | 77.0k | { |
98 | 77.0k | word32 B32[16]; |
99 | | |
100 | 1.30M | for (size_t i = 0; i < 16; ++i) |
101 | 1.23M | B32[i] = LE32DEC(&B[i * 4]); |
102 | | |
103 | 77.0k | Salsa20_Core(B32, 8); |
104 | | |
105 | 1.30M | for (size_t i = 0; i < 16; ++i) |
106 | 1.23M | LE32ENC(&B[4 * i], B32[i]); |
107 | 77.0k | } |
108 | | |
109 | | inline void BlockMix(byte* B, byte* Y, size_t r) |
110 | 7.01k | { |
111 | 7.01k | byte X[64]; |
112 | | |
113 | | // 1: X <-- B_{2r - 1} |
114 | 7.01k | BlockCopy(X, &B[(2 * r - 1) * 64], 64); |
115 | | |
116 | | // 2: for i = 0 to 2r - 1 do |
117 | 84.0k | for (size_t i = 0; i < 2 * r; ++i) |
118 | 77.0k | { |
119 | | // 3: X <-- H(X \xor B_i) |
120 | 77.0k | BlockXOR(X, &B[i * 64], 64); |
121 | 77.0k | Salsa20_8(X); |
122 | | |
123 | | // 4: Y_i <-- X |
124 | 77.0k | BlockCopy(&Y[i * 64], X, 64); |
125 | 77.0k | } |
126 | | |
127 | | // 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) |
128 | 45.5k | for (size_t i = 0; i < r; ++i) |
129 | 38.5k | BlockCopy(&B[i * 64], &Y[(i * 2) * 64], 64); |
130 | | |
131 | 45.5k | for (size_t i = 0; i < r; ++i) |
132 | 38.5k | BlockCopy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64); |
133 | 7.01k | } |
134 | | |
135 | | inline word64 Integerify(byte* B, size_t r) |
136 | 3.50k | { |
137 | 3.50k | byte* X = &B[(2 * r - 1) * 64]; |
138 | 3.50k | return LE64DEC(X); |
139 | 3.50k | } |
140 | | |
141 | | inline void Smix(byte* B, size_t r, word64 N, byte* V, byte* XY) |
142 | 1.19k | { |
143 | 1.19k | byte* X = XY; |
144 | 1.19k | byte* Y = XY+128*r; |
145 | | |
146 | | // 1: X <-- B |
147 | 1.19k | BlockCopy(X, B, 128 * r); |
148 | | |
149 | | // 2: for i = 0 to N - 1 do |
150 | 4.69k | for (word64 i = 0; i < N; ++i) |
151 | 3.50k | { |
152 | | // 3: V_i <-- X |
153 | 3.50k | BlockCopy(&V[i * (128 * r)], X, 128 * r); |
154 | | |
155 | | // 4: X <-- H(X) |
156 | 3.50k | BlockMix(X, Y, r); |
157 | 3.50k | } |
158 | | |
159 | | // 6: for i = 0 to N - 1 do |
160 | 4.69k | for (word64 i = 0; i < N; ++i) |
161 | 3.50k | { |
162 | | // 7: j <-- Integerify(X) mod N |
163 | 3.50k | word64 j = Integerify(X, r) & (N - 1); |
164 | | |
165 | | // 8: X <-- H(X \xor V_j) |
166 | 3.50k | BlockXOR(X, &V[j * (128 * r)], 128 * r); |
167 | 3.50k | BlockMix(X, Y, r); |
168 | 3.50k | } |
169 | | |
170 | | // 10: B' <-- X |
171 | 1.19k | BlockCopy(B, X, 128 * r); |
172 | 1.19k | } |
173 | | |
174 | | ANONYMOUS_NAMESPACE_END |
175 | | |
176 | | NAMESPACE_BEGIN(CryptoPP) |
177 | | |
178 | | size_t Scrypt::GetValidDerivedLength(size_t keylength) const |
179 | 581 | { |
180 | 581 | if (keylength > MaxDerivedKeyLength()) |
181 | 0 | return MaxDerivedKeyLength(); |
182 | 581 | return keylength; |
183 | 581 | } |
184 | | |
185 | | void Scrypt::ValidateParameters(size_t derivedLen, word64 cost, word64 blockSize, word64 parallelization) const |
186 | 581 | { |
187 | | // https://github.com/weidai11/cryptopp/issues/842 |
188 | 581 | CRYPTOPP_ASSERT(derivedLen != 0); |
189 | 581 | CRYPTOPP_ASSERT(cost != 0); |
190 | 581 | CRYPTOPP_ASSERT(blockSize != 0); |
191 | 581 | CRYPTOPP_ASSERT(parallelization != 0); |
192 | | |
193 | 581 | if (cost == 0) |
194 | 31 | throw InvalidArgument("Scrypt: cost cannot be 0"); |
195 | | |
196 | 550 | if (blockSize == 0) |
197 | 0 | throw InvalidArgument("Scrypt: block size cannot be 0"); |
198 | | |
199 | 550 | if (parallelization == 0) |
200 | 80 | throw InvalidArgument("Scrypt: parallelization cannot be 0"); |
201 | | |
202 | | // Optimizer should remove this on 32-bit platforms |
203 | 470 | if ((std::numeric_limits<size_t>::max)() > (std::numeric_limits<word32>::max)()) |
204 | 470 | { |
205 | 470 | const word64 maxLen = ((static_cast<word64>(1) << 32) - 1) * 32; |
206 | 470 | if (derivedLen > maxLen) { |
207 | 0 | std::ostringstream oss; |
208 | 0 | oss << "derivedLen " << derivedLen << " is larger than " << maxLen; |
209 | 0 | throw InvalidArgument("Scrypt: " + oss.str()); |
210 | 0 | } |
211 | 470 | } |
212 | | |
213 | | // https://github.com/weidai11/cryptopp/issues/787 |
214 | 470 | CRYPTOPP_ASSERT(parallelization <= static_cast<word64>((std::numeric_limits<int>::max)())); |
215 | 470 | if (parallelization > static_cast<word64>((std::numeric_limits<int>::max)())) |
216 | 0 | { |
217 | 0 | std::ostringstream oss; |
218 | 0 | oss << " parallelization " << parallelization << " is larger than "; |
219 | 0 | oss << (std::numeric_limits<int>::max)(); |
220 | 0 | throw InvalidArgument("Scrypt: " + oss.str()); |
221 | 0 | } |
222 | | |
223 | 470 | CRYPTOPP_ASSERT(IsPowerOf2(cost)); |
224 | 470 | if (IsPowerOf2(cost) == false) |
225 | 41 | throw InvalidArgument("Scrypt: cost must be a power of 2"); |
226 | | |
227 | 429 | const word64 prod = static_cast<word64>(blockSize) * parallelization; |
228 | 429 | CRYPTOPP_ASSERT(prod < (1U << 30)); |
229 | | |
230 | 429 | if (prod >= (1U << 30)) { |
231 | 0 | std::ostringstream oss; |
232 | 0 | oss << "r*p " << prod << " is larger than " << (1U << 30); |
233 | 0 | throw InvalidArgument("Scrypt: " + oss.str()); |
234 | 0 | } |
235 | | |
236 | | // Scrypt has several tests that effectively verify allocations like |
237 | | // '128 * r * N' and '128 * r * p' do not overflow. They are the tests |
238 | | // that set errno to ENOMEM. We can make the logic a little more clear |
239 | | // using word128. At first blush the word128 may seem like overkill. |
240 | | // However, this algorithm is dominated by slow moving parts, so a |
241 | | // one-time check is insignificant in the bigger picture. |
242 | 429 | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
243 | 429 | const word128 maxElems = static_cast<word128>(SIZE_MAX); |
244 | 429 | bool bLimit = (maxElems >= static_cast<word128>(cost) * blockSize * 128U); |
245 | 429 | bool xyLimit = (maxElems >= static_cast<word128>(parallelization) * blockSize * 128U); |
246 | 429 | bool vLimit = (maxElems >= static_cast<word128>(blockSize) * 256U + 64U); |
247 | | #else |
248 | | const word64 maxElems = static_cast<word64>(SIZE_MAX); |
249 | | bool bLimit = (blockSize < maxElems / 128U / cost); |
250 | | bool xyLimit = (blockSize < maxElems / 128U / parallelization); |
251 | | bool vLimit = (blockSize < (maxElems - 64U) / 256U); |
252 | | #endif |
253 | | |
254 | 429 | CRYPTOPP_ASSERT(bLimit); CRYPTOPP_ASSERT(xyLimit); CRYPTOPP_ASSERT(vLimit); |
255 | 429 | if (!bLimit || !xyLimit || !vLimit) |
256 | 0 | throw std::bad_alloc(); |
257 | 429 | } |
258 | | |
259 | | size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, |
260 | | const byte*secret, size_t secretLen, const NameValuePairs& params) const |
261 | 0 | { |
262 | 0 | CRYPTOPP_ASSERT(secret /*&& secretLen*/); |
263 | 0 | CRYPTOPP_ASSERT(derived && derivedLen); |
264 | 0 | CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength()); |
265 | |
|
266 | 0 | word64 cost=0, blockSize=0, parallelization=0; |
267 | 0 | if(params.GetValue("Cost", cost) == false) |
268 | 0 | cost = defaultCost; |
269 | |
|
270 | 0 | if(params.GetValue("BlockSize", blockSize) == false) |
271 | 0 | blockSize = defaultBlockSize; |
272 | |
|
273 | 0 | if(params.GetValue("Parallelization", parallelization) == false) |
274 | 0 | parallelization = defaultParallelization; |
275 | |
|
276 | 0 | ConstByteArrayParameter salt; |
277 | 0 | (void)params.GetValue("Salt", salt); |
278 | |
|
279 | 0 | return DeriveKey(derived, derivedLen, secret, secretLen, salt.begin(), salt.size(), cost, blockSize, parallelization); |
280 | 0 | } |
281 | | |
282 | | size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen, |
283 | | const byte*salt, size_t saltLen, word64 cost, word64 blockSize, word64 parallel) const |
284 | 581 | { |
285 | 581 | CRYPTOPP_ASSERT(secret /*&& secretLen*/); |
286 | 581 | CRYPTOPP_ASSERT(derived && derivedLen); |
287 | 581 | CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength()); |
288 | | |
289 | 581 | ThrowIfInvalidDerivedKeyLength(derivedLen); |
290 | 581 | ValidateParameters(derivedLen, cost, blockSize, parallel); |
291 | | |
292 | 581 | AlignedSecByteBlock B(static_cast<size_t>(blockSize * parallel * 128U)); |
293 | | |
294 | | // 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) |
295 | 581 | PBKDF2_SHA256(B, B.size(), secret, secretLen, salt, saltLen, 1); |
296 | | |
297 | | // Visual Studio and OpenMP 2.0 fixup. We must use int, not size_t. |
298 | 581 | int maxParallel=0; |
299 | 581 | if (!SafeConvert(parallel, maxParallel)) |
300 | 0 | maxParallel = (std::numeric_limits<int>::max)(); |
301 | | |
302 | | #ifdef _OPENMP |
303 | | int threads = STDMIN(omp_get_max_threads(), maxParallel); |
304 | | #endif |
305 | | |
306 | | // http://stackoverflow.com/q/49604260/608639 |
307 | 581 | #pragma omp parallel num_threads(threads) |
308 | 581 | { |
309 | | // Each thread gets its own copy |
310 | 581 | AlignedSecByteBlock XY(static_cast<size_t>(blockSize * 256U)); |
311 | 581 | AlignedSecByteBlock V(static_cast<size_t>(blockSize * cost * 128U)); |
312 | | |
313 | | // 2: for i = 0 to p - 1 do |
314 | 581 | #pragma omp for |
315 | 1.77k | for (int i = 0; i < maxParallel; ++i) |
316 | 1.19k | { |
317 | | // 3: B_i <-- MF(B_i, N) |
318 | 1.19k | const ptrdiff_t offset = static_cast<ptrdiff_t>(blockSize*i*128); |
319 | 1.19k | Smix(B+offset, static_cast<size_t>(blockSize), cost, V, XY); |
320 | 1.19k | } |
321 | 581 | } |
322 | | |
323 | | // 5: DK <-- PBKDF2(P, B, 1, dkLen) |
324 | 581 | PBKDF2_SHA256(derived, derivedLen, secret, secretLen, B, B.size(), 1); |
325 | | |
326 | 581 | return 1; |
327 | 581 | } |
328 | | |
329 | | NAMESPACE_END |