Coverage Report

Created: 2023-06-07 06:25

/src/boost/boost/json/detail/sse2.hpp
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
3
//                    Vinnie Falco (vinnie.falco@gmail.com)
4
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
5
//
6
// Distributed under the Boost Software License, Version 1.0. (See accompanying
7
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8
//
9
// Official repository: https://github.com/boostorg/json
10
//
11
12
#ifndef BOOST_JSON_DETAIL_SSE2_HPP
13
#define BOOST_JSON_DETAIL_SSE2_HPP
14
15
#include <boost/json/detail/config.hpp>
16
#include <boost/json/detail/utf8.hpp>
17
#include <cstddef>
18
#include <cstring>
19
#ifdef BOOST_JSON_USE_SSE2
20
# include <emmintrin.h>
21
# include <xmmintrin.h>
22
# ifdef _MSC_VER
23
#  include <intrin.h>
24
# endif
25
#endif
26
27
namespace boost {
28
namespace json {
29
namespace detail {
30
31
#ifdef BOOST_JSON_USE_SSE2
32
33
template<bool AllowBadUTF8>
34
inline
35
const char*
36
count_valid(
37
    char const* p,
38
    const char* end) noexcept
39
377k
{
40
377k
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
41
377k
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
42
377k
    __m128i const q3 = _mm_set1_epi8( 0x1F );
43
44
1.80M
    while(end - p >= 16)
45
1.80M
    {
46
1.80M
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
47
1.80M
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
48
1.80M
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
49
1.80M
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
50
1.80M
        __m128i v5 = _mm_min_epu8( v1, q3 );
51
1.80M
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
52
1.80M
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
53
54
1.80M
        int w = _mm_movemask_epi8( v7 );
55
56
1.80M
        if( w != 0 )
57
375k
        {
58
375k
            int m;
59
375k
#if defined(__GNUC__) || defined(__clang__)
60
375k
            m = __builtin_ffs( w ) - 1;
61
#else
62
            unsigned long index;
63
            _BitScanForward( &index, w );
64
            m = index;
65
#endif
66
375k
            return p + m;
67
375k
        }
68
69
1.43M
        p += 16;
70
1.43M
    }
71
72
4.70k
    while(p != end)
73
4.42k
    {
74
4.42k
        const unsigned char c = *p;
75
4.42k
        if(c == '\x22' || c == '\\' || c < 0x20)
76
1.96k
            break;
77
2.46k
        ++p;
78
2.46k
    }
79
80
2.24k
    return p;
81
377k
}
82
83
template<>
84
inline
85
const char*
86
count_valid<false>(
87
    char const* p,
88
    const char* end) noexcept
89
1.07M
{
90
1.07M
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
91
1.07M
    __m128i const q2 = _mm_set1_epi8( '\\' );
92
1.07M
    __m128i const q3 = _mm_set1_epi8( 0x20 );
93
94
1.42M
    while(end - p >= 16)
95
1.41M
    {
96
1.41M
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
97
98
1.41M
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
99
1.41M
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
100
1.41M
        __m128i v4 = _mm_cmplt_epi8( v1, q3 );
101
102
1.41M
        __m128i v5 = _mm_or_si128( v2, v3 );
103
1.41M
        __m128i v6 = _mm_or_si128( v5, v4 );
104
105
1.41M
        int w = _mm_movemask_epi8( v6 );
106
107
1.41M
        if( w != 0 )
108
1.06M
        {
109
1.06M
            int m;
110
1.06M
#if defined(__GNUC__) || defined(__clang__)
111
1.06M
            m = __builtin_ffs( w ) - 1;
112
#else
113
            unsigned long index;
114
            _BitScanForward( &index, w );
115
            m = index;
116
#endif
117
1.06M
            p += m;
118
1.06M
            break;
119
1.06M
        }
120
121
347k
        p += 16;
122
347k
    }
123
124
33.8M
    while(p != end)
125
33.8M
    {
126
33.8M
        const unsigned char c = *p;
127
33.8M
        if(c == '\x22' || c == '\\' || c < 0x20)
128
1.07M
            break;
129
32.8M
        if(c < 0x80)
130
32.6M
        {
131
32.6M
            ++p;
132
32.6M
            continue;
133
32.6M
        }
134
        // validate utf-8
135
147k
        uint16_t first = classify_utf8(c);
136
147k
        uint8_t len = first & 0xFF;
137
147k
        if(BOOST_JSON_UNLIKELY(end - p < len))
138
101
            break;
139
147k
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
140
745
            break;
141
146k
        p += len;
142
146k
    }
143
144
1.07M
    return p;
145
1.07M
}
146
147
#else
148
149
template<bool AllowBadUTF8>
150
char const*
151
count_valid(
152
    char const* p,
153
    char const* end) noexcept
154
{
155
    while(p != end)
156
    {
157
        const unsigned char c = *p;
158
        if(c == '\x22' || c == '\\' || c < 0x20)
159
            break;
160
        ++p;
161
    }
162
163
    return p;
164
}
165
166
template<>
167
inline
168
char const*
169
count_valid<false>(
170
    char const* p,
171
    char const* end) noexcept
172
{
173
    while(p != end)
174
    {
175
        const unsigned char c = *p;
176
        if(c == '\x22' || c == '\\' || c < 0x20)
177
            break;
178
        if(c < 0x80)
179
        {
180
            ++p;
181
            continue;
182
        }
183
        // validate utf-8
184
        uint16_t first = classify_utf8(c);
185
        uint8_t len = first & 0xFF;
186
        if(BOOST_JSON_UNLIKELY(end - p < len))
187
            break;
188
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
189
            break;
190
        p += len;
191
    }
192
193
    return p;
194
}
195
196
#endif
197
198
// KRYSTIAN NOTE: does not stop to validate
199
// count_unescaped
200
201
#ifdef BOOST_JSON_USE_SSE2
202
203
inline
204
size_t
205
count_unescaped(
206
    char const* s,
207
    size_t n) noexcept
208
17.4k
{
209
210
17.4k
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
211
17.4k
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
212
17.4k
    __m128i const q3 = _mm_set1_epi8( 0x1F );
213
214
17.4k
    char const * s0 = s;
215
216
264k
    while( n >= 16 )
217
250k
    {
218
250k
        __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
219
250k
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
220
250k
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
221
250k
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
222
250k
        __m128i v5 = _mm_min_epu8( v1, q3 );
223
250k
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
224
250k
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
225
226
250k
        int w = _mm_movemask_epi8( v7 );
227
228
250k
        if( w != 0 )
229
3.67k
        {
230
3.67k
            int m;
231
3.67k
#if defined(__GNUC__) || defined(__clang__)
232
3.67k
            m = __builtin_ffs( w ) - 1;
233
#else
234
            unsigned long index;
235
            _BitScanForward( &index, w );
236
            m = index;
237
#endif
238
239
3.67k
            s += m;
240
3.67k
            break;
241
3.67k
        }
242
243
247k
        s += 16;
244
247k
        n -= 16;
245
247k
    }
246
247
17.4k
    return s - s0;
248
17.4k
}
249
250
#else
251
252
inline
253
std::size_t
254
count_unescaped(
255
    char const*,
256
    std::size_t) noexcept
257
{
258
    return 0;
259
}
260
261
#endif
262
263
// count_digits
264
265
#ifdef BOOST_JSON_USE_SSE2
266
267
// assumes p..p+15 are valid
268
inline int count_digits( char const* p ) noexcept
269
10.0M
{
270
10.0M
    __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
271
10.0M
    v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
272
10.0M
    v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
273
274
10.0M
    int m = _mm_movemask_epi8(v1);
275
276
10.0M
    int n;
277
278
10.0M
    if( m == 0 )
279
63.9k
    {
280
63.9k
        n = 16;
281
63.9k
    }
282
9.94M
    else
283
9.94M
    {
284
9.94M
#if defined(__GNUC__) || defined(__clang__)
285
9.94M
        n = __builtin_ffs( m ) - 1;
286
#else
287
        unsigned long index;
288
        _BitScanForward( &index, m );
289
        n = static_cast<int>(index);
290
#endif
291
9.94M
    }
292
293
10.0M
    return n;
294
10.0M
}
295
296
#else
297
298
// assumes p..p+15 are valid
299
inline int count_digits( char const* p ) noexcept
300
{
301
    int n = 0;
302
303
    for( ; n < 16; ++n )
304
    {
305
        unsigned char const d = *p++ - '0';
306
        if(d > 9) break;
307
    }
308
309
    return n;
310
}
311
312
#endif
313
314
// parse_unsigned
315
316
inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
317
10.0M
{
318
10.3M
    while( n >= 4 )
319
380k
    {
320
        // faster on on clang for x86,
321
        // slower on gcc
322
380k
#ifdef __clang__
323
380k
        r = r * 10 + p[0] - '0';
324
380k
        r = r * 10 + p[1] - '0';
325
380k
        r = r * 10 + p[2] - '0';
326
380k
        r = r * 10 + p[3] - '0';
327
#else
328
        uint32_t v;
329
        std::memcpy( &v, p, 4 );
330
331
        v -= 0x30303030;
332
333
        unsigned w0 = v & 0xFF;
334
        unsigned w1 = (v >> 8) & 0xFF;
335
        unsigned w2 = (v >> 16) & 0xFF;
336
        unsigned w3 = (v >> 24);
337
338
#ifdef BOOST_JSON_BIG_ENDIAN
339
        r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0;
340
#else
341
        r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
342
#endif
343
#endif
344
380k
        p += 4;
345
380k
        n -= 4;
346
380k
    }
347
348
10.0M
    switch( n )
349
10.0M
    {
350
92.3k
    case 0:
351
92.3k
        break;
352
9.70M
    case 1:
353
9.70M
        r = r * 10 + p[0] - '0';
354
9.70M
        break;
355
131k
    case 2:
356
131k
        r = r * 10 + p[0] - '0';
357
131k
        r = r * 10 + p[1] - '0';
358
131k
        break;
359
70.6k
    case 3:
360
70.6k
        r = r * 10 + p[0] - '0';
361
70.6k
        r = r * 10 + p[1] - '0';
362
70.6k
        r = r * 10 + p[2] - '0';
363
70.6k
        break;
364
10.0M
    }
365
10.0M
    return r;
366
10.0M
}
367
368
// KRYSTIAN: this function is unused
369
// count_leading
370
371
/*
372
#ifdef BOOST_JSON_USE_SSE2
373
374
// assumes p..p+15
375
inline std::size_t count_leading( char const * p, char ch ) noexcept
376
{
377
    __m128i const q1 = _mm_set1_epi8( ch );
378
379
    __m128i v = _mm_loadu_si128( (__m128i const*)p );
380
381
    __m128i w = _mm_cmpeq_epi8( v, q1 );
382
383
    int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
384
385
    std::size_t n;
386
387
    if( m == 0 )
388
    {
389
        n = 16;
390
    }
391
    else
392
    {
393
#if defined(__GNUC__) || defined(__clang__)
394
        n = __builtin_ffs( m ) - 1;
395
#else
396
        unsigned long index;
397
        _BitScanForward( &index, m );
398
        n = index;
399
#endif
400
    }
401
402
    return n;
403
}
404
405
#else
406
407
// assumes p..p+15
408
inline std::size_t count_leading( char const * p, char ch ) noexcept
409
{
410
    std::size_t n = 0;
411
412
    for( ; n < 16 && *p == ch; ++p, ++n );
413
414
    return n;
415
}
416
417
#endif
418
*/
419
420
// count_whitespace
421
422
#ifdef BOOST_JSON_USE_SSE2
423
424
inline const char* count_whitespace( char const* p, const char* end ) noexcept
425
24.9M
{
426
24.9M
    if( p == end )
427
12.7k
    {
428
12.7k
        return p;
429
12.7k
    }
430
431
24.9M
    if( static_cast<unsigned char>( *p ) > 0x20 )
432
24.8M
    {
433
24.8M
        return p;
434
24.8M
    }
435
436
28.6k
    __m128i const q1 = _mm_set1_epi8( ' ' );
437
28.6k
    __m128i const q2 = _mm_set1_epi8( '\n' );
438
28.6k
    __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
439
28.6k
    __m128i const q4 = _mm_set1_epi8( '\r' );
440
441
36.9k
    while( end - p >= 16 )
442
33.9k
    {
443
33.9k
        __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
444
445
33.9k
        __m128i w0 = _mm_or_si128(
446
33.9k
            _mm_cmpeq_epi8( v0, q1 ),
447
33.9k
            _mm_cmpeq_epi8( v0, q2 ));
448
33.9k
        __m128i v1 = _mm_or_si128( v0, q3 );
449
33.9k
        __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
450
33.9k
        __m128i w2 = _mm_or_si128( w0, w1 );
451
452
33.9k
        int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
453
454
33.9k
        if( m != 0 )
455
25.5k
        {
456
25.5k
#if defined(__GNUC__) || defined(__clang__)
457
25.5k
            std::size_t c = __builtin_ffs( m ) - 1;
458
#else
459
            unsigned long index;
460
            _BitScanForward( &index, m );
461
            std::size_t c = index;
462
#endif
463
464
25.5k
            p += c;
465
25.5k
            return p;
466
25.5k
        }
467
468
8.33k
        p += 16;
469
8.33k
    }
470
471
7.68k
    while( p != end )
472
6.56k
    {
473
6.56k
        if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
474
1.88k
        {
475
1.88k
            return p;
476
1.88k
        }
477
478
4.68k
        ++p;
479
4.68k
    }
480
481
1.11k
    return p;
482
3.00k
}
483
484
/*
485
486
// slightly faster on msvc-14.2, slightly slower on clang-win
487
488
inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
489
{
490
    char const * p0 = p;
491
492
    while( n > 0 )
493
    {
494
        char ch = *p;
495
496
        if( ch == '\n' || ch == '\r' )
497
        {
498
            ++p;
499
            --n;
500
            continue;
501
        }
502
503
        if( ch != ' ' && ch != '\t' )
504
        {
505
            break;
506
        }
507
508
        ++p;
509
        --n;
510
511
        while( n >= 16 )
512
        {
513
            std::size_t n2 = count_leading( p, ch );
514
515
            p += n2;
516
            n -= n2;
517
518
            if( n2 < 16 )
519
            {
520
                break;
521
            }
522
        }
523
    }
524
525
    return p - p0;
526
}
527
*/
528
529
#else
530
531
inline const char* count_whitespace( char const* p, const char* end ) noexcept
532
{
533
534
    for(; p != end; ++p)
535
    {
536
        char const c = *p;
537
        if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
538
    }
539
540
    return p;
541
}
542
543
#endif
544
545
} // detail
546
} // namespace json
547
} // namespace boost
548
549
#endif