/src/boost/boost/json/detail/sse2.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), |
3 | | // Vinnie Falco (vinnie.falco@gmail.com) |
4 | | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) |
5 | | // |
6 | | // Distributed under the Boost Software License, Version 1.0. (See accompanying |
7 | | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
8 | | // |
9 | | // Official repository: https://github.com/boostorg/json |
10 | | // |
11 | | |
12 | | #ifndef BOOST_JSON_DETAIL_SSE2_HPP |
13 | | #define BOOST_JSON_DETAIL_SSE2_HPP |
14 | | |
15 | | #include <boost/json/detail/config.hpp> |
16 | | #include <boost/json/detail/utf8.hpp> |
17 | | #include <cstddef> |
18 | | #include <cstring> |
19 | | #ifdef BOOST_JSON_USE_SSE2 |
20 | | # include <emmintrin.h> |
21 | | # include <xmmintrin.h> |
22 | | # ifdef _MSC_VER |
23 | | # include <intrin.h> |
24 | | # endif |
25 | | #endif |
26 | | |
27 | | namespace boost { |
28 | | namespace json { |
29 | | namespace detail { |
30 | | |
31 | | #ifdef BOOST_JSON_USE_SSE2 |
32 | | |
33 | | template<bool AllowBadUTF8> |
34 | | inline |
35 | | const char* |
36 | | count_valid( |
37 | | char const* p, |
38 | | const char* end) noexcept |
39 | 377k | { |
40 | 377k | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' |
41 | 377k | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' |
42 | 377k | __m128i const q3 = _mm_set1_epi8( 0x1F ); |
43 | | |
44 | 1.80M | while(end - p >= 16) |
45 | 1.80M | { |
46 | 1.80M | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); |
47 | 1.80M | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote |
48 | 1.80M | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash |
49 | 1.80M | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash |
50 | 1.80M | __m128i v5 = _mm_min_epu8( v1, q3 ); |
51 | 1.80M | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls |
52 | 1.80M | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control |
53 | | |
54 | 1.80M | int w = _mm_movemask_epi8( v7 ); |
55 | | |
56 | 1.80M | if( w != 0 ) |
57 | 375k | { |
58 | 375k | int m; |
59 | 375k | #if defined(__GNUC__) || defined(__clang__) |
60 | 375k | m = __builtin_ffs( w ) - 1; |
61 | | #else |
62 | | unsigned long index; |
63 | | _BitScanForward( &index, w ); |
64 | | m = index; |
65 | | #endif |
66 | 375k | return p + m; |
67 | 375k | } |
68 | | |
69 | 1.43M | p += 16; |
70 | 1.43M | } |
71 | | |
72 | 4.70k | while(p != end) |
73 | 4.42k | { |
74 | 4.42k | const unsigned char c = *p; |
75 | 4.42k | if(c == '\x22' || c == '\\' || c < 0x20) |
76 | 1.96k | break; |
77 | 2.46k | ++p; |
78 | 2.46k | } |
79 | | |
80 | 2.24k | return p; |
81 | 377k | } |
82 | | |
83 | | template<> |
84 | | inline |
85 | | const char* |
86 | | count_valid<false>( |
87 | | char const* p, |
88 | | const char* end) noexcept |
89 | 1.07M | { |
90 | 1.07M | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' |
91 | 1.07M | __m128i const q2 = _mm_set1_epi8( '\\' ); |
92 | 1.07M | __m128i const q3 = _mm_set1_epi8( 0x20 ); |
93 | | |
94 | 1.42M | while(end - p >= 16) |
95 | 1.41M | { |
96 | 1.41M | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); |
97 | | |
98 | 1.41M | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); |
99 | 1.41M | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); |
100 | 1.41M | __m128i v4 = _mm_cmplt_epi8( v1, q3 ); |
101 | | |
102 | 1.41M | __m128i v5 = _mm_or_si128( v2, v3 ); |
103 | 1.41M | __m128i v6 = _mm_or_si128( v5, v4 ); |
104 | | |
105 | 1.41M | int w = _mm_movemask_epi8( v6 ); |
106 | | |
107 | 1.41M | if( w != 0 ) |
108 | 1.06M | { |
109 | 1.06M | int m; |
110 | 1.06M | #if defined(__GNUC__) || defined(__clang__) |
111 | 1.06M | m = __builtin_ffs( w ) - 1; |
112 | | #else |
113 | | unsigned long index; |
114 | | _BitScanForward( &index, w ); |
115 | | m = index; |
116 | | #endif |
117 | 1.06M | p += m; |
118 | 1.06M | break; |
119 | 1.06M | } |
120 | | |
121 | 347k | p += 16; |
122 | 347k | } |
123 | | |
124 | 33.8M | while(p != end) |
125 | 33.8M | { |
126 | 33.8M | const unsigned char c = *p; |
127 | 33.8M | if(c == '\x22' || c == '\\' || c < 0x20) |
128 | 1.07M | break; |
129 | 32.8M | if(c < 0x80) |
130 | 32.6M | { |
131 | 32.6M | ++p; |
132 | 32.6M | continue; |
133 | 32.6M | } |
134 | | // validate utf-8 |
135 | 147k | uint16_t first = classify_utf8(c); |
136 | 147k | uint8_t len = first & 0xFF; |
137 | 147k | if(BOOST_JSON_UNLIKELY(end - p < len)) |
138 | 101 | break; |
139 | 147k | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) |
140 | 745 | break; |
141 | 146k | p += len; |
142 | 146k | } |
143 | | |
144 | 1.07M | return p; |
145 | 1.07M | } |
146 | | |
147 | | #else |
148 | | |
149 | | template<bool AllowBadUTF8> |
150 | | char const* |
151 | | count_valid( |
152 | | char const* p, |
153 | | char const* end) noexcept |
154 | | { |
155 | | while(p != end) |
156 | | { |
157 | | const unsigned char c = *p; |
158 | | if(c == '\x22' || c == '\\' || c < 0x20) |
159 | | break; |
160 | | ++p; |
161 | | } |
162 | | |
163 | | return p; |
164 | | } |
165 | | |
166 | | template<> |
167 | | inline |
168 | | char const* |
169 | | count_valid<false>( |
170 | | char const* p, |
171 | | char const* end) noexcept |
172 | | { |
173 | | while(p != end) |
174 | | { |
175 | | const unsigned char c = *p; |
176 | | if(c == '\x22' || c == '\\' || c < 0x20) |
177 | | break; |
178 | | if(c < 0x80) |
179 | | { |
180 | | ++p; |
181 | | continue; |
182 | | } |
183 | | // validate utf-8 |
184 | | uint16_t first = classify_utf8(c); |
185 | | uint8_t len = first & 0xFF; |
186 | | if(BOOST_JSON_UNLIKELY(end - p < len)) |
187 | | break; |
188 | | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) |
189 | | break; |
190 | | p += len; |
191 | | } |
192 | | |
193 | | return p; |
194 | | } |
195 | | |
196 | | #endif |
197 | | |
198 | | // KRYSTIAN NOTE: does not stop to validate |
199 | | // count_unescaped |
200 | | |
201 | | #ifdef BOOST_JSON_USE_SSE2 |
202 | | |
203 | | inline |
204 | | size_t |
205 | | count_unescaped( |
206 | | char const* s, |
207 | | size_t n) noexcept |
208 | 17.4k | { |
209 | | |
210 | 17.4k | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' |
211 | 17.4k | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' |
212 | 17.4k | __m128i const q3 = _mm_set1_epi8( 0x1F ); |
213 | | |
214 | 17.4k | char const * s0 = s; |
215 | | |
216 | 264k | while( n >= 16 ) |
217 | 250k | { |
218 | 250k | __m128i v1 = _mm_loadu_si128( (__m128i const*)s ); |
219 | 250k | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote |
220 | 250k | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash |
221 | 250k | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash |
222 | 250k | __m128i v5 = _mm_min_epu8( v1, q3 ); |
223 | 250k | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls |
224 | 250k | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control |
225 | | |
226 | 250k | int w = _mm_movemask_epi8( v7 ); |
227 | | |
228 | 250k | if( w != 0 ) |
229 | 3.67k | { |
230 | 3.67k | int m; |
231 | 3.67k | #if defined(__GNUC__) || defined(__clang__) |
232 | 3.67k | m = __builtin_ffs( w ) - 1; |
233 | | #else |
234 | | unsigned long index; |
235 | | _BitScanForward( &index, w ); |
236 | | m = index; |
237 | | #endif |
238 | | |
239 | 3.67k | s += m; |
240 | 3.67k | break; |
241 | 3.67k | } |
242 | | |
243 | 247k | s += 16; |
244 | 247k | n -= 16; |
245 | 247k | } |
246 | | |
247 | 17.4k | return s - s0; |
248 | 17.4k | } |
249 | | |
250 | | #else |
251 | | |
252 | | inline |
253 | | std::size_t |
254 | | count_unescaped( |
255 | | char const*, |
256 | | std::size_t) noexcept |
257 | | { |
258 | | return 0; |
259 | | } |
260 | | |
261 | | #endif |
262 | | |
263 | | // count_digits |
264 | | |
265 | | #ifdef BOOST_JSON_USE_SSE2 |
266 | | |
267 | | // assumes p..p+15 are valid |
268 | | inline int count_digits( char const* p ) noexcept |
269 | 10.0M | { |
270 | 10.0M | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); |
271 | 10.0M | v1 = _mm_add_epi8(v1, _mm_set1_epi8(70)); |
272 | 10.0M | v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118)); |
273 | | |
274 | 10.0M | int m = _mm_movemask_epi8(v1); |
275 | | |
276 | 10.0M | int n; |
277 | | |
278 | 10.0M | if( m == 0 ) |
279 | 63.9k | { |
280 | 63.9k | n = 16; |
281 | 63.9k | } |
282 | 9.94M | else |
283 | 9.94M | { |
284 | 9.94M | #if defined(__GNUC__) || defined(__clang__) |
285 | 9.94M | n = __builtin_ffs( m ) - 1; |
286 | | #else |
287 | | unsigned long index; |
288 | | _BitScanForward( &index, m ); |
289 | | n = static_cast<int>(index); |
290 | | #endif |
291 | 9.94M | } |
292 | | |
293 | 10.0M | return n; |
294 | 10.0M | } |
295 | | |
296 | | #else |
297 | | |
298 | | // assumes p..p+15 are valid |
299 | | inline int count_digits( char const* p ) noexcept |
300 | | { |
301 | | int n = 0; |
302 | | |
303 | | for( ; n < 16; ++n ) |
304 | | { |
305 | | unsigned char const d = *p++ - '0'; |
306 | | if(d > 9) break; |
307 | | } |
308 | | |
309 | | return n; |
310 | | } |
311 | | |
312 | | #endif |
313 | | |
314 | | // parse_unsigned |
315 | | |
316 | | inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept |
317 | 10.0M | { |
318 | 10.3M | while( n >= 4 ) |
319 | 380k | { |
320 | | // faster on on clang for x86, |
321 | | // slower on gcc |
322 | 380k | #ifdef __clang__ |
323 | 380k | r = r * 10 + p[0] - '0'; |
324 | 380k | r = r * 10 + p[1] - '0'; |
325 | 380k | r = r * 10 + p[2] - '0'; |
326 | 380k | r = r * 10 + p[3] - '0'; |
327 | | #else |
328 | | uint32_t v; |
329 | | std::memcpy( &v, p, 4 ); |
330 | | |
331 | | v -= 0x30303030; |
332 | | |
333 | | unsigned w0 = v & 0xFF; |
334 | | unsigned w1 = (v >> 8) & 0xFF; |
335 | | unsigned w2 = (v >> 16) & 0xFF; |
336 | | unsigned w3 = (v >> 24); |
337 | | |
338 | | #ifdef BOOST_JSON_BIG_ENDIAN |
339 | | r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0; |
340 | | #else |
341 | | r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; |
342 | | #endif |
343 | | #endif |
344 | 380k | p += 4; |
345 | 380k | n -= 4; |
346 | 380k | } |
347 | | |
348 | 10.0M | switch( n ) |
349 | 10.0M | { |
350 | 92.3k | case 0: |
351 | 92.3k | break; |
352 | 9.70M | case 1: |
353 | 9.70M | r = r * 10 + p[0] - '0'; |
354 | 9.70M | break; |
355 | 131k | case 2: |
356 | 131k | r = r * 10 + p[0] - '0'; |
357 | 131k | r = r * 10 + p[1] - '0'; |
358 | 131k | break; |
359 | 70.6k | case 3: |
360 | 70.6k | r = r * 10 + p[0] - '0'; |
361 | 70.6k | r = r * 10 + p[1] - '0'; |
362 | 70.6k | r = r * 10 + p[2] - '0'; |
363 | 70.6k | break; |
364 | 10.0M | } |
365 | 10.0M | return r; |
366 | 10.0M | } |
367 | | |
368 | | // KRYSTIAN: this function is unused |
369 | | // count_leading |
370 | | |
371 | | /* |
372 | | #ifdef BOOST_JSON_USE_SSE2 |
373 | | |
374 | | // assumes p..p+15 |
375 | | inline std::size_t count_leading( char const * p, char ch ) noexcept |
376 | | { |
377 | | __m128i const q1 = _mm_set1_epi8( ch ); |
378 | | |
379 | | __m128i v = _mm_loadu_si128( (__m128i const*)p ); |
380 | | |
381 | | __m128i w = _mm_cmpeq_epi8( v, q1 ); |
382 | | |
383 | | int m = _mm_movemask_epi8( w ) ^ 0xFFFF; |
384 | | |
385 | | std::size_t n; |
386 | | |
387 | | if( m == 0 ) |
388 | | { |
389 | | n = 16; |
390 | | } |
391 | | else |
392 | | { |
393 | | #if defined(__GNUC__) || defined(__clang__) |
394 | | n = __builtin_ffs( m ) - 1; |
395 | | #else |
396 | | unsigned long index; |
397 | | _BitScanForward( &index, m ); |
398 | | n = index; |
399 | | #endif |
400 | | } |
401 | | |
402 | | return n; |
403 | | } |
404 | | |
405 | | #else |
406 | | |
407 | | // assumes p..p+15 |
408 | | inline std::size_t count_leading( char const * p, char ch ) noexcept |
409 | | { |
410 | | std::size_t n = 0; |
411 | | |
412 | | for( ; n < 16 && *p == ch; ++p, ++n ); |
413 | | |
414 | | return n; |
415 | | } |
416 | | |
417 | | #endif |
418 | | */ |
419 | | |
420 | | // count_whitespace |
421 | | |
422 | | #ifdef BOOST_JSON_USE_SSE2 |
423 | | |
424 | | inline const char* count_whitespace( char const* p, const char* end ) noexcept |
425 | 24.9M | { |
426 | 24.9M | if( p == end ) |
427 | 12.7k | { |
428 | 12.7k | return p; |
429 | 12.7k | } |
430 | | |
431 | 24.9M | if( static_cast<unsigned char>( *p ) > 0x20 ) |
432 | 24.8M | { |
433 | 24.8M | return p; |
434 | 24.8M | } |
435 | | |
436 | 28.6k | __m128i const q1 = _mm_set1_epi8( ' ' ); |
437 | 28.6k | __m128i const q2 = _mm_set1_epi8( '\n' ); |
438 | 28.6k | __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r' |
439 | 28.6k | __m128i const q4 = _mm_set1_epi8( '\r' ); |
440 | | |
441 | 36.9k | while( end - p >= 16 ) |
442 | 33.9k | { |
443 | 33.9k | __m128i v0 = _mm_loadu_si128( (__m128i const*)p ); |
444 | | |
445 | 33.9k | __m128i w0 = _mm_or_si128( |
446 | 33.9k | _mm_cmpeq_epi8( v0, q1 ), |
447 | 33.9k | _mm_cmpeq_epi8( v0, q2 )); |
448 | 33.9k | __m128i v1 = _mm_or_si128( v0, q3 ); |
449 | 33.9k | __m128i w1 = _mm_cmpeq_epi8( v1, q4 ); |
450 | 33.9k | __m128i w2 = _mm_or_si128( w0, w1 ); |
451 | | |
452 | 33.9k | int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF; |
453 | | |
454 | 33.9k | if( m != 0 ) |
455 | 25.5k | { |
456 | 25.5k | #if defined(__GNUC__) || defined(__clang__) |
457 | 25.5k | std::size_t c = __builtin_ffs( m ) - 1; |
458 | | #else |
459 | | unsigned long index; |
460 | | _BitScanForward( &index, m ); |
461 | | std::size_t c = index; |
462 | | #endif |
463 | | |
464 | 25.5k | p += c; |
465 | 25.5k | return p; |
466 | 25.5k | } |
467 | | |
468 | 8.33k | p += 16; |
469 | 8.33k | } |
470 | | |
471 | 7.68k | while( p != end ) |
472 | 6.56k | { |
473 | 6.56k | if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' ) |
474 | 1.88k | { |
475 | 1.88k | return p; |
476 | 1.88k | } |
477 | | |
478 | 4.68k | ++p; |
479 | 4.68k | } |
480 | | |
481 | 1.11k | return p; |
482 | 3.00k | } |
483 | | |
484 | | /* |
485 | | |
486 | | // slightly faster on msvc-14.2, slightly slower on clang-win |
487 | | |
488 | | inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept |
489 | | { |
490 | | char const * p0 = p; |
491 | | |
492 | | while( n > 0 ) |
493 | | { |
494 | | char ch = *p; |
495 | | |
496 | | if( ch == '\n' || ch == '\r' ) |
497 | | { |
498 | | ++p; |
499 | | --n; |
500 | | continue; |
501 | | } |
502 | | |
503 | | if( ch != ' ' && ch != '\t' ) |
504 | | { |
505 | | break; |
506 | | } |
507 | | |
508 | | ++p; |
509 | | --n; |
510 | | |
511 | | while( n >= 16 ) |
512 | | { |
513 | | std::size_t n2 = count_leading( p, ch ); |
514 | | |
515 | | p += n2; |
516 | | n -= n2; |
517 | | |
518 | | if( n2 < 16 ) |
519 | | { |
520 | | break; |
521 | | } |
522 | | } |
523 | | } |
524 | | |
525 | | return p - p0; |
526 | | } |
527 | | */ |
528 | | |
529 | | #else |
530 | | |
531 | | inline const char* count_whitespace( char const* p, const char* end ) noexcept |
532 | | { |
533 | | |
534 | | for(; p != end; ++p) |
535 | | { |
536 | | char const c = *p; |
537 | | if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break; |
538 | | } |
539 | | |
540 | | return p; |
541 | | } |
542 | | |
543 | | #endif |
544 | | |
545 | | } // detail |
546 | | } // namespace json |
547 | | } // namespace boost |
548 | | |
549 | | #endif |