Line | Count | Source (jump to first uncovered line) |
1 | | // misc.cpp - originally written and placed in the public domain by Wei Dai |
2 | | |
3 | | #include "pch.h" |
4 | | #include "config.h" |
5 | | |
6 | | #if CRYPTOPP_MSC_VERSION |
7 | | # pragma warning(disable: 4189) |
8 | | # if (CRYPTOPP_MSC_VERSION >= 1400) |
9 | | # pragma warning(disable: 6237) |
10 | | # endif |
11 | | #endif |
12 | | |
13 | | #ifndef CRYPTOPP_IMPORTS |
14 | | |
15 | | #include "misc.h" |
16 | | #include "trap.h" |
17 | | #include "words.h" |
18 | | #include "stdcpp.h" |
19 | | #include "integer.h" |
20 | | #include "secblock.h" |
21 | | |
22 | | // Hack for OpenBSD and GCC 4.2.1. I believe they are stuck at 4.2.1 due to GPLv3. |
23 | | #if defined(__OpenBSD__) |
24 | | # if defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 43000) |
25 | | # undef CRYPTOPP_DISABLE_ASM |
26 | | # define CRYPTOPP_DISABLE_ASM 1 |
27 | | # endif |
28 | | #endif |
29 | | |
30 | | #ifndef CRYPTOPP_DISABLE_ASM |
31 | | # if defined(__SSE2__) |
32 | | # include <emmintrin.h> |
33 | | # endif |
34 | | # if defined(__AVX__) |
35 | | # include <immintrin.h> |
36 | | # endif |
37 | | |
38 | | # if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) |
39 | | # if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE) |
40 | | # include <arm_neon.h> |
41 | | # endif |
42 | | # endif |
43 | | #endif // CRYPTOPP_DISABLE_ASM |
44 | | |
45 | | NAMESPACE_BEGIN(CryptoPP) |
46 | | |
47 | | byte* BytePtr(SecByteBlock& str) |
48 | 0 | { |
49 | | // Caller wants a writeable pointer |
50 | 0 | CRYPTOPP_ASSERT(str.empty() == false); |
51 | |
|
52 | 0 | if (str.empty()) |
53 | 0 | return NULLPTR; |
54 | 0 | return reinterpret_cast<byte*>(str.data()); |
55 | 0 | } |
56 | | |
57 | | const byte* ConstBytePtr(const SecByteBlock& str) |
58 | 0 | { |
59 | 0 | if (str.empty()) |
60 | 0 | return NULLPTR; |
61 | 0 | return reinterpret_cast<const byte*>(str.data()); |
62 | 0 | } |
63 | | |
64 | | size_t BytePtrSize(const SecByteBlock& str) |
65 | 0 | { |
66 | 0 | return str.size(); |
67 | 0 | } |
68 | | |
69 | | // xorbuf simplified at https://github.com/weidai11/cryptopp/issues/1020 |
70 | | void xorbuf(byte *buf, const byte *mask, size_t count) |
71 | 1.37M | { |
72 | 1.37M | CRYPTOPP_ASSERT(buf != NULLPTR); |
73 | 1.37M | CRYPTOPP_ASSERT(mask != NULLPTR); |
74 | 1.37M | CRYPTOPP_ASSERT(count > 0); |
75 | | |
76 | 1.37M | #ifndef CRYPTOPP_DISABLE_ASM |
77 | | # if defined(__AVX__) |
78 | | while (count >= 32) |
79 | | { |
80 | | __m256i b = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(buf)); |
81 | | __m256i m = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(mask)); |
82 | | _mm256_storeu_si256(reinterpret_cast<__m256i*>(buf), _mm256_castps_si256( |
83 | | _mm256_xor_ps(_mm256_castsi256_ps(b), _mm256_castsi256_ps(m)))); |
84 | | buf += 32; mask += 32; count -= 32; |
85 | | } |
86 | | // https://software.intel.com/en-us/articles/avoiding-avx-sse-transition-penalties |
87 | | _mm256_zeroupper(); |
88 | | # endif |
89 | 1.37M | # if defined(__SSE2__) |
90 | 10.0M | while (count >= 16) |
91 | 8.65M | { |
92 | 8.65M | __m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buf)); |
93 | 8.65M | __m128i m = _mm_loadu_si128(reinterpret_cast<const __m128i*>(mask)); |
94 | 8.65M | _mm_storeu_si128(reinterpret_cast<__m128i*>(buf), _mm_castps_si128( |
95 | 8.65M | _mm_xor_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(m)))); |
96 | 8.65M | buf += 16; mask += 16; count -= 16; |
97 | 8.65M | } |
98 | 1.37M | # endif |
99 | | # if defined(__aarch64__) || defined(__aarch32__) || defined(_M_ARM64) |
100 | | while (count >= 16) |
101 | | { |
102 | | vst1q_u8(buf, veorq_u8(vld1q_u8(buf), vld1q_u8(mask))); |
103 | | buf += 16; mask += 16; count -= 16; |
104 | | } |
105 | | # endif |
106 | 1.37M | #endif // CRYPTOPP_DISABLE_ASM |
107 | | |
108 | | #if CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 |
109 | | // word64 and stride of 8 slows things down on x86_64. |
110 | | // word64 and stride of 8 makes no difference on ARM. |
111 | | // word64 and stride of 16 benefits PowerPC. |
112 | | while (count >= 16) |
113 | | { |
114 | | word64 r[2], b[2], m[2]; |
115 | | std::memcpy(&b, buf, 16); std::memcpy(&m, mask, 16); |
116 | | |
117 | | r[0] = b[0] ^ m[0]; |
118 | | r[1] = b[1] ^ m[1]; |
119 | | std::memcpy(buf, &r, 16); |
120 | | |
121 | | buf += 16; mask += 16; count -= 16; |
122 | | } |
123 | | #endif |
124 | | |
125 | | // One of the arch specific xor's may have cleared the request |
126 | 1.37M | if (count == 0) return; |
127 | | |
128 | 3.39M | while (count >= 4) |
129 | 2.24M | { |
130 | 2.24M | word32 r, b, m; |
131 | 2.24M | std::memcpy(&b, buf, 4); std::memcpy(&m, mask, 4); |
132 | | |
133 | 2.24M | r = b ^ m; |
134 | 2.24M | std::memcpy(buf, &r, 4); |
135 | | |
136 | 2.24M | buf += 4; mask += 4; count -= 4; |
137 | 2.24M | } |
138 | | |
139 | 1.24M | for (size_t i=0; i<count; i++) |
140 | 82.6k | buf[i] ^= mask[i]; |
141 | 1.15M | } |
142 | | |
143 | | // xorbuf simplified at https://github.com/weidai11/cryptopp/issues/1020 |
144 | | void xorbuf(byte *output, const byte *input, const byte *mask, size_t count) |
145 | 750 | { |
146 | 750 | CRYPTOPP_ASSERT(output != NULLPTR); |
147 | 750 | CRYPTOPP_ASSERT(input != NULLPTR); |
148 | 750 | CRYPTOPP_ASSERT(count > 0); |
149 | | |
150 | 750 | #ifndef CRYPTOPP_DISABLE_ASM |
151 | | # if defined(__AVX__) |
152 | | while (count >= 32) |
153 | | { |
154 | | __m256i b = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input)); |
155 | | __m256i m = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(mask)); |
156 | | _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), _mm256_castps_si256( |
157 | | _mm256_xor_ps(_mm256_castsi256_ps(b), _mm256_castsi256_ps(m)))); |
158 | | output += 32; input += 32; mask += 32; count -= 32; |
159 | | } |
160 | | // https://software.intel.com/en-us/articles/avoiding-avx-sse-transition-penalties |
161 | | _mm256_zeroupper(); |
162 | | # endif |
163 | 750 | # if defined(__SSE2__) |
164 | 2.87k | while (count >= 16) |
165 | 2.12k | { |
166 | 2.12k | __m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input)); |
167 | 2.12k | __m128i m = _mm_loadu_si128(reinterpret_cast<const __m128i*>(mask)); |
168 | 2.12k | _mm_storeu_si128(reinterpret_cast<__m128i*>(output), _mm_castps_si128( |
169 | 2.12k | _mm_xor_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(m)))); |
170 | 2.12k | output += 16; input += 16; mask += 16; count -= 16; |
171 | 2.12k | } |
172 | 750 | # endif |
173 | | # if defined(__aarch64__) || defined(__aarch32__) || defined(_M_ARM64) |
174 | | while (count >= 16) |
175 | | { |
176 | | vst1q_u8(output, veorq_u8(vld1q_u8(input), vld1q_u8(mask))); |
177 | | output += 16; input += 16; mask += 16; count -= 16; |
178 | | } |
179 | | # endif |
180 | 750 | #endif // CRYPTOPP_DISABLE_ASM |
181 | | |
182 | | #if CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64 |
183 | | // word64 and stride of 8 slows things down on x86_64. |
184 | | // word64 and stride of 8 makes no difference on ARM. |
185 | | // word64 and stride of 16 benefits PowerPC. |
186 | | while (count >= 16) |
187 | | { |
188 | | word64 b[2], m[2], r[2]; |
189 | | std::memcpy(&b, input, 16); std::memcpy(&m, mask, 16); |
190 | | |
191 | | r[0] = b[0] ^ m[0]; |
192 | | r[1] = b[1] ^ m[1]; |
193 | | std::memcpy(output, &r, 16); |
194 | | |
195 | | output += 16; input += 16; mask += 16; count -= 16; |
196 | | } |
197 | | #endif |
198 | | |
199 | | // One of the arch specific xor's may have cleared the request |
200 | 750 | if (count == 0) return; |
201 | | |
202 | 643 | while (count >= 4) |
203 | 366 | { |
204 | 366 | word32 b, m, r; |
205 | 366 | std::memcpy(&b, input, 4); std::memcpy(&m, mask, 4); |
206 | | |
207 | 366 | r = b ^ m; |
208 | 366 | std::memcpy(output, &r, 4); |
209 | | |
210 | 366 | output += 4; input += 4; mask += 4; count -= 4; |
211 | 366 | } |
212 | | |
213 | 556 | for (size_t i=0; i<count; i++) |
214 | 279 | output[i] = input[i] ^ mask[i]; |
215 | 277 | } |
216 | | |
217 | | // VerifyBufsEqual simplified at https://github.com/weidai11/cryptopp/issues/1020 |
218 | | bool VerifyBufsEqual(const byte *buf, const byte *mask, size_t count) |
219 | 38 | { |
220 | 38 | CRYPTOPP_ASSERT(buf != NULLPTR); |
221 | 38 | CRYPTOPP_ASSERT(mask != NULLPTR); |
222 | | // CRYPTOPP_ASSERT(count > 0); |
223 | | |
224 | 38 | #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_ARM64 || CRYPTOPP_BOOL_PPC64 || CRYPTOPP_BOOL_MIPS64 || CRYPTOPP_BOOL_SPARC64 |
225 | 38 | word64 acc64 = 0; |
226 | 67 | while (count >= 8) |
227 | 29 | { |
228 | 29 | word64 b, m; |
229 | 29 | std::memcpy(&b, buf, 8); std::memcpy(&m, mask, 8); |
230 | 29 | acc64 |= b ^ m; |
231 | | |
232 | 29 | buf += 8; mask += 8; count -= 8; |
233 | 29 | } |
234 | | |
235 | 38 | word32 acc8 = (acc64 >> 32) | (acc64 & 0xffffffff); |
236 | 38 | acc8 = static_cast<byte>(acc8) | static_cast<byte>(acc8 >> 8) | |
237 | 38 | static_cast<byte>(acc8 >> 16) | static_cast<byte>(acc8 >> 24); |
238 | | #else |
239 | | word32 acc32 = 0; |
240 | | while (count >= 4) |
241 | | { |
242 | | word32 b, m; |
243 | | std::memcpy(&b, buf, 4); std::memcpy(&m, mask, 4); |
244 | | acc32 |= b ^ m; |
245 | | |
246 | | buf += 4; mask += 4; count -= 4; |
247 | | } |
248 | | |
249 | | word32 acc8 = acc32; |
250 | | acc8 = static_cast<byte>(acc8) | static_cast<byte>(acc8 >> 8) | |
251 | | static_cast<byte>(acc8 >> 16) | static_cast<byte>(acc8 >> 24); |
252 | | #endif |
253 | | |
254 | 81 | for (size_t i=0; i<count; i++) |
255 | 43 | acc8 |= buf[i] ^ mask[i]; |
256 | | |
257 | | // word32 results in this tail code on x86: |
258 | | // 33a: 85 c0 test %eax, %eax |
259 | | // 33c: 0f 94 c0 sete %al |
260 | | // 33f: c3 ret |
261 | 38 | return acc8 == 0; |
262 | 38 | } |
263 | | |
264 | | std::string StringNarrow(const wchar_t *str, bool throwOnError) |
265 | 0 | { |
266 | 0 | CRYPTOPP_ASSERT(str); |
267 | 0 | std::string result; |
268 | | |
269 | | // Safer functions on Windows for C&A, https://github.com/weidai11/cryptopp/issues/55 |
270 | | #if (CRYPTOPP_MSC_VERSION >= 1400) |
271 | | size_t len=0, size=0; |
272 | | errno_t err = 0; |
273 | | |
274 | | //const wchar_t* ptr = str; |
275 | | //while (*ptr++) len++; |
276 | | len = wcslen(str)+1; |
277 | | |
278 | | err = wcstombs_s(&size, NULLPTR, 0, str, len*sizeof(wchar_t)); |
279 | | CRYPTOPP_ASSERT(err == 0); |
280 | | if (err != 0) |
281 | | { |
282 | | if (throwOnError) |
283 | | throw InvalidArgument("StringNarrow: wcstombs_s() failed with error " + IntToString(err)); |
284 | | else |
285 | | return std::string(); |
286 | | } |
287 | | |
288 | | result.resize(size); |
289 | | err = wcstombs_s(&size, &result[0], size, str, len*sizeof(wchar_t)); |
290 | | CRYPTOPP_ASSERT(err == 0); |
291 | | if (err != 0) |
292 | | { |
293 | | if (throwOnError) |
294 | | throw InvalidArgument("StringNarrow: wcstombs_s() failed with error " + IntToString(err)); |
295 | | else |
296 | | return std::string(); |
297 | | } |
298 | | |
299 | | // The safe routine's size includes the NULL. |
300 | | if (!result.empty() && result[size - 1] == '\0') |
301 | | result.erase(size - 1); |
302 | | #else |
303 | 0 | size_t size = wcstombs(NULLPTR, str, 0); |
304 | 0 | CRYPTOPP_ASSERT(size != (size_t)-1); |
305 | 0 | if (size == (size_t)-1) |
306 | 0 | { |
307 | 0 | if (throwOnError) |
308 | 0 | throw InvalidArgument("StringNarrow: wcstombs() failed"); |
309 | 0 | else |
310 | 0 | return std::string(); |
311 | 0 | } |
312 | | |
313 | 0 | result.resize(size); |
314 | 0 | size = wcstombs(&result[0], str, size); |
315 | 0 | CRYPTOPP_ASSERT(size != (size_t)-1); |
316 | 0 | if (size == (size_t)-1) |
317 | 0 | { |
318 | 0 | if (throwOnError) |
319 | 0 | throw InvalidArgument("StringNarrow: wcstombs() failed"); |
320 | 0 | else |
321 | 0 | return std::string(); |
322 | 0 | } |
323 | 0 | #endif |
324 | | |
325 | 0 | return result; |
326 | 0 | } |
327 | | |
328 | | std::wstring StringWiden(const char *str, bool throwOnError) |
329 | 0 | { |
330 | 0 | CRYPTOPP_ASSERT(str); |
331 | 0 | std::wstring result; |
332 | | |
333 | | // Safer functions on Windows for C&A, https://github.com/weidai11/cryptopp/issues/55 |
334 | | #if (CRYPTOPP_MSC_VERSION >= 1400) |
335 | | size_t len=0, size=0; |
336 | | errno_t err = 0; |
337 | | |
338 | | //const char* ptr = str; |
339 | | //while (*ptr++) len++; |
340 | | len = std::strlen(str)+1; |
341 | | |
342 | | err = mbstowcs_s(&size, NULLPTR, 0, str, len); |
343 | | CRYPTOPP_ASSERT(err == 0); |
344 | | if (err != 0) |
345 | | { |
346 | | if (throwOnError) |
347 | | throw InvalidArgument("StringWiden: wcstombs_s() failed with error " + IntToString(err)); |
348 | | else |
349 | | return std::wstring(); |
350 | | } |
351 | | |
352 | | result.resize(size); |
353 | | err = mbstowcs_s(&size, &result[0], size, str, len); |
354 | | CRYPTOPP_ASSERT(err == 0); |
355 | | if (err != 0) |
356 | | { |
357 | | if (throwOnError) |
358 | | throw InvalidArgument("StringWiden: wcstombs_s() failed with error " + IntToString(err)); |
359 | | else |
360 | | return std::wstring(); |
361 | | } |
362 | | |
363 | | // The safe routine's size includes the NULL. |
364 | | if (!result.empty() && result[size - 1] == '\0') |
365 | | result.erase(size - 1); |
366 | | #else |
367 | 0 | size_t size = mbstowcs(NULLPTR, str, 0); |
368 | 0 | CRYPTOPP_ASSERT(size != (size_t)-1); |
369 | 0 | if (size == (size_t)-1) |
370 | 0 | { |
371 | 0 | if (throwOnError) |
372 | 0 | throw InvalidArgument("StringWiden: mbstowcs() failed"); |
373 | 0 | else |
374 | 0 | return std::wstring(); |
375 | 0 | } |
376 | | |
377 | 0 | result.resize(size); |
378 | 0 | size = mbstowcs(&result[0], str, size); |
379 | 0 | CRYPTOPP_ASSERT(size != (size_t)-1); |
380 | 0 | if (size == (size_t)-1) |
381 | 0 | { |
382 | 0 | if (throwOnError) |
383 | 0 | throw InvalidArgument("StringWiden: mbstowcs() failed"); |
384 | 0 | else |
385 | 0 | return std::wstring(); |
386 | 0 | } |
387 | 0 | #endif |
388 | | |
389 | 0 | return result; |
390 | 0 | } |
391 | | |
392 | | NAMESPACE_END |
393 | | |
394 | | #endif |