Coverage Report

Created: 2025-06-24 06:38

/src/boost/boost/json/detail/sse2.hpp
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
3
//                    Vinnie Falco (vinnie.falco@gmail.com)
4
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
5
//
6
// Distributed under the Boost Software License, Version 1.0. (See accompanying
7
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8
//
9
// Official repository: https://github.com/boostorg/json
10
//
11
12
#ifndef BOOST_JSON_DETAIL_SSE2_HPP
13
#define BOOST_JSON_DETAIL_SSE2_HPP
14
15
#include <boost/json/detail/config.hpp>
16
#include <boost/json/detail/utf8.hpp>
17
#include <cstddef>
18
#include <cstring>
19
#ifdef BOOST_JSON_USE_SSE2
20
# include <emmintrin.h>
21
# include <xmmintrin.h>
22
# ifdef _MSC_VER
23
#  include <intrin.h>
24
# endif
25
#endif
26
27
namespace boost {
28
namespace json {
29
namespace detail {
30
31
#ifdef BOOST_JSON_USE_SSE2
32
33
template<bool AllowBadUTF8>
34
inline
35
const char*
36
count_valid(
37
    char const* p,
38
    const char* end) noexcept
39
485k
{
40
485k
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
41
485k
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
42
485k
    __m128i const q3 = _mm_set1_epi8( 0x1F );
43
44
2.99M
    while(end - p >= 16)
45
2.99M
    {
46
2.99M
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
47
2.99M
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
48
2.99M
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
49
2.99M
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
50
2.99M
        __m128i v5 = _mm_min_epu8( v1, q3 );
51
2.99M
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
52
2.99M
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
53
54
2.99M
        int w = _mm_movemask_epi8( v7 );
55
56
2.99M
        if( w != 0 )
57
482k
        {
58
482k
            int m;
59
482k
#if defined(__GNUC__) || defined(__clang__)
60
482k
            m = __builtin_ffs( w ) - 1;
61
#else
62
            unsigned long index;
63
            _BitScanForward( &index, w );
64
            m = index;
65
#endif
66
482k
            return p + m;
67
482k
        }
68
69
2.51M
        p += 16;
70
2.51M
    }
71
72
5.86k
    while(p != end)
73
5.32k
    {
74
5.32k
        const unsigned char c = *p;
75
5.32k
        if(c == '\x22' || c == '\\' || c < 0x20)
76
2.20k
            break;
77
3.12k
        ++p;
78
3.12k
    }
79
80
2.73k
    return p;
81
485k
}
82
83
template<>
84
inline
85
const char*
86
count_valid<false>(
87
    char const* p,
88
    const char* end) noexcept
89
1.94M
{
90
1.94M
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
91
1.94M
    __m128i const q2 = _mm_set1_epi8( '\\' );
92
1.94M
    __m128i const q3 = _mm_set1_epi8( 0x20 );
93
94
3.74M
    while(end - p >= 16)
95
3.73M
    {
96
3.73M
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
97
98
3.73M
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
99
3.73M
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
100
3.73M
        __m128i v4 = _mm_cmplt_epi8( v1, q3 );
101
102
3.73M
        __m128i v5 = _mm_or_si128( v2, v3 );
103
3.73M
        __m128i v6 = _mm_or_si128( v5, v4 );
104
105
3.73M
        int w = _mm_movemask_epi8( v6 );
106
107
3.73M
        if( w != 0 )
108
1.93M
        {
109
1.93M
            int m;
110
1.93M
#if defined(__GNUC__) || defined(__clang__)
111
1.93M
            m = __builtin_ffs( w ) - 1;
112
#else
113
            unsigned long index;
114
            _BitScanForward( &index, w );
115
            m = index;
116
#endif
117
1.93M
            p += m;
118
1.93M
            break;
119
1.93M
        }
120
121
1.80M
        p += 16;
122
1.80M
    }
123
124
47.8M
    while(p != end)
125
47.8M
    {
126
47.8M
        const unsigned char c = *p;
127
47.8M
        if(c == '\x22' || c == '\\' || c < 0x20)
128
1.94M
            break;
129
45.9M
        if(c < 0x80)
130
45.7M
        {
131
45.7M
            ++p;
132
45.7M
            continue;
133
45.7M
        }
134
        // validate utf-8
135
181k
        uint16_t first = classify_utf8(c);
136
181k
        uint8_t len = first & 0xFF;
137
181k
        if(BOOST_JSON_UNLIKELY(end - p < len))
138
76
            break;
139
181k
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
140
979
            break;
141
180k
        p += len;
142
180k
    }
143
144
1.94M
    return p;
145
1.94M
}
146
147
#else
148
149
template<bool AllowBadUTF8>
150
char const*
151
count_valid(
152
    char const* p,
153
    char const* end) noexcept
154
{
155
    while(p != end)
156
    {
157
        const unsigned char c = *p;
158
        if(c == '\x22' || c == '\\' || c < 0x20)
159
            break;
160
        ++p;
161
    }
162
163
    return p;
164
}
165
166
template<>
167
inline
168
char const*
169
count_valid<false>(
170
    char const* p,
171
    char const* end) noexcept
172
{
173
    while(p != end)
174
    {
175
        const unsigned char c = *p;
176
        if(c == '\x22' || c == '\\' || c < 0x20)
177
            break;
178
        if(c < 0x80)
179
        {
180
            ++p;
181
            continue;
182
        }
183
        // validate utf-8
184
        uint16_t first = classify_utf8(c);
185
        uint8_t len = first & 0xFF;
186
        if(BOOST_JSON_UNLIKELY(end - p < len))
187
            break;
188
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
189
            break;
190
        p += len;
191
    }
192
193
    return p;
194
}
195
196
#endif
197
198
// KRYSTIAN NOTE: does not stop to validate
199
// count_unescaped
200
201
#ifdef BOOST_JSON_USE_SSE2
202
203
inline
204
size_t
205
count_unescaped(
206
    char const* s,
207
    size_t n) noexcept
208
23.4k
{
209
210
23.4k
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
211
23.4k
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
212
23.4k
    __m128i const q3 = _mm_set1_epi8( 0x1F );
213
214
23.4k
    char const * s0 = s;
215
216
451k
    while( n >= 16 )
217
435k
    {
218
435k
        __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
219
435k
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
220
435k
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
221
435k
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
222
435k
        __m128i v5 = _mm_min_epu8( v1, q3 );
223
435k
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
224
435k
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
225
226
435k
        int w = _mm_movemask_epi8( v7 );
227
228
435k
        if( w != 0 )
229
7.08k
        {
230
7.08k
            int m;
231
7.08k
#if defined(__GNUC__) || defined(__clang__)
232
7.08k
            m = __builtin_ffs( w ) - 1;
233
#else
234
            unsigned long index;
235
            _BitScanForward( &index, w );
236
            m = index;
237
#endif
238
239
7.08k
            s += m;
240
7.08k
            break;
241
7.08k
        }
242
243
428k
        s += 16;
244
428k
        n -= 16;
245
428k
    }
246
247
23.4k
    return s - s0;
248
23.4k
}
249
250
#else
251
252
inline
253
std::size_t
254
count_unescaped(
255
    char const*,
256
    std::size_t) noexcept
257
{
258
    return 0;
259
}
260
261
#endif
262
263
// count_digits
264
265
#ifdef BOOST_JSON_USE_SSE2
266
267
// assumes p..p+15 are valid
268
inline int count_digits( char const* p ) noexcept
269
12.8M
{
270
12.8M
    __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
271
12.8M
    v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
272
12.8M
    v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
273
274
12.8M
    int m = _mm_movemask_epi8(v1);
275
276
12.8M
    int n;
277
278
12.8M
    if( m == 0 )
279
180k
    {
280
180k
        n = 16;
281
180k
    }
282
12.6M
    else
283
12.6M
    {
284
12.6M
#if defined(__GNUC__) || defined(__clang__)
285
12.6M
        n = __builtin_ffs( m ) - 1;
286
#else
287
        unsigned long index;
288
        _BitScanForward( &index, m );
289
        n = static_cast<int>(index);
290
#endif
291
12.6M
    }
292
293
12.8M
    return n;
294
12.8M
}
295
296
#else
297
298
// assumes p..p+15 are valid
299
inline int count_digits( char const* p ) noexcept
300
{
301
    int n = 0;
302
303
    for( ; n < 16; ++n )
304
    {
305
        unsigned char const d = *p++ - '0';
306
        if(d > 9) break;
307
    }
308
309
    return n;
310
}
311
312
#endif
313
314
// parse_unsigned
315
316
inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
317
12.2M
{
318
13.1M
    while( n >= 4 )
319
893k
    {
320
        // faster on on clang for x86,
321
        // slower on gcc
322
893k
#ifdef __clang__
323
893k
        r = r * 10 + p[0] - '0';
324
893k
        r = r * 10 + p[1] - '0';
325
893k
        r = r * 10 + p[2] - '0';
326
893k
        r = r * 10 + p[3] - '0';
327
#else
328
        uint32_t v;
329
        std::memcpy( &v, p, 4 );
330
        endian::native_to_little_inplace(v);
331
332
        v -= 0x30303030;
333
334
        unsigned w0 = v & 0xFF;
335
        unsigned w1 = (v >> 8) & 0xFF;
336
        unsigned w2 = (v >> 16) & 0xFF;
337
        unsigned w3 = (v >> 24);
338
339
        r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
340
#endif
341
893k
        p += 4;
342
893k
        n -= 4;
343
893k
    }
344
345
12.2M
    switch( n )
346
12.2M
    {
347
212k
    case 0:
348
212k
        break;
349
11.7M
    case 1:
350
11.7M
        r = r * 10 + p[0] - '0';
351
11.7M
        break;
352
167k
    case 2:
353
167k
        r = r * 10 + p[0] - '0';
354
167k
        r = r * 10 + p[1] - '0';
355
167k
        break;
356
92.5k
    case 3:
357
92.5k
        r = r * 10 + p[0] - '0';
358
92.5k
        r = r * 10 + p[1] - '0';
359
92.5k
        r = r * 10 + p[2] - '0';
360
92.5k
        break;
361
12.2M
    }
362
12.2M
    return r;
363
12.2M
}
364
365
// KRYSTIAN: this function is unused
366
// count_leading
367
368
/*
369
#ifdef BOOST_JSON_USE_SSE2
370
371
// assumes p..p+15
372
inline std::size_t count_leading( char const * p, char ch ) noexcept
373
{
374
    __m128i const q1 = _mm_set1_epi8( ch );
375
376
    __m128i v = _mm_loadu_si128( (__m128i const*)p );
377
378
    __m128i w = _mm_cmpeq_epi8( v, q1 );
379
380
    int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
381
382
    std::size_t n;
383
384
    if( m == 0 )
385
    {
386
        n = 16;
387
    }
388
    else
389
    {
390
#if defined(__GNUC__) || defined(__clang__)
391
        n = __builtin_ffs( m ) - 1;
392
#else
393
        unsigned long index;
394
        _BitScanForward( &index, m );
395
        n = index;
396
#endif
397
    }
398
399
    return n;
400
}
401
402
#else
403
404
// assumes p..p+15
405
inline std::size_t count_leading( char const * p, char ch ) noexcept
406
{
407
    std::size_t n = 0;
408
409
    for( ; n < 16 && *p == ch; ++p, ++n );
410
411
    return n;
412
}
413
414
#endif
415
*/
416
417
// count_whitespace
418
419
#ifdef BOOST_JSON_USE_SSE2
420
421
inline const char* count_whitespace( char const* p, const char* end ) noexcept
422
32.1M
{
423
32.1M
    if( p == end )
424
19.7k
    {
425
19.7k
        return p;
426
19.7k
    }
427
428
32.1M
    if( static_cast<unsigned char>( *p ) > 0x20 )
429
32.0M
    {
430
32.0M
        return p;
431
32.0M
    }
432
433
78.9k
    __m128i const q1 = _mm_set1_epi8( ' ' );
434
78.9k
    __m128i const q2 = _mm_set1_epi8( '\n' );
435
78.9k
    __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
436
78.9k
    __m128i const q4 = _mm_set1_epi8( '\r' );
437
438
91.0k
    while( end - p >= 16 )
439
87.2k
    {
440
87.2k
        __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
441
442
87.2k
        __m128i w0 = _mm_or_si128(
443
87.2k
            _mm_cmpeq_epi8( v0, q1 ),
444
87.2k
            _mm_cmpeq_epi8( v0, q2 ));
445
87.2k
        __m128i v1 = _mm_or_si128( v0, q3 );
446
87.2k
        __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
447
87.2k
        __m128i w2 = _mm_or_si128( w0, w1 );
448
449
87.2k
        int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
450
451
87.2k
        if( m != 0 )
452
75.1k
        {
453
75.1k
#if defined(__GNUC__) || defined(__clang__)
454
75.1k
            std::size_t c = __builtin_ffs( m ) - 1;
455
#else
456
            unsigned long index;
457
            _BitScanForward( &index, m );
458
            std::size_t c = index;
459
#endif
460
461
75.1k
            p += c;
462
75.1k
            return p;
463
75.1k
        }
464
465
12.0k
        p += 16;
466
12.0k
    }
467
468
9.69k
    while( p != end )
469
8.33k
    {
470
8.33k
        if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
471
2.40k
        {
472
2.40k
            return p;
473
2.40k
        }
474
475
5.92k
        ++p;
476
5.92k
    }
477
478
1.36k
    return p;
479
3.76k
}
480
481
/*
482
483
// slightly faster on msvc-14.2, slightly slower on clang-win
484
485
inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
486
{
487
    char const * p0 = p;
488
489
    while( n > 0 )
490
    {
491
        char ch = *p;
492
493
        if( ch == '\n' || ch == '\r' )
494
        {
495
            ++p;
496
            --n;
497
            continue;
498
        }
499
500
        if( ch != ' ' && ch != '\t' )
501
        {
502
            break;
503
        }
504
505
        ++p;
506
        --n;
507
508
        while( n >= 16 )
509
        {
510
            std::size_t n2 = count_leading( p, ch );
511
512
            p += n2;
513
            n -= n2;
514
515
            if( n2 < 16 )
516
            {
517
                break;
518
            }
519
        }
520
    }
521
522
    return p - p0;
523
}
524
*/
525
526
#else
527
528
inline const char* count_whitespace( char const* p, const char* end ) noexcept
529
{
530
531
    for(; p != end; ++p)
532
    {
533
        char const c = *p;
534
        if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
535
    }
536
537
    return p;
538
}
539
540
#endif
541
542
} // detail
543
} // namespace json
544
} // namespace boost
545
546
#endif