/src/simdutf/src/generic/validate_utf32.h
Line | Count | Source |
1 | | namespace simdutf { |
2 | | namespace SIMDUTF_IMPLEMENTATION { |
3 | | namespace { |
4 | | namespace utf32 { |
5 | | |
6 | 4.88k | simdutf_really_inline bool validate(const char32_t *input, size_t size) { |
7 | 4.88k | if (simdutf_unlikely(size == 0)) { |
8 | | // empty input is valid UTF-32. protect the implementation from |
9 | | // handling nullptr |
10 | 46 | return true; |
11 | 46 | } |
12 | | |
13 | 4.84k | const char32_t *end = input + size; |
14 | | |
15 | 4.84k | using vector_u32 = simd32<uint32_t>; |
16 | | |
17 | 4.84k | const auto standardmax = vector_u32::splat(0x10ffff); |
18 | 4.84k | const auto offset = vector_u32::splat(0xffff2000); |
19 | 4.84k | const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); |
20 | 4.84k | auto currentmax = vector_u32::zero(); |
21 | 4.84k | auto currentoffsetmax = vector_u32::zero(); |
22 | | |
23 | 4.84k | constexpr size_t N = vector_u32::ELEMENTS; |
24 | | |
25 | 3.47M | while (input + N < end) { |
26 | 3.47M | auto in = vector_u32(input); |
27 | 3.47M | if (!match_system(endianness::BIG)) { |
28 | 3.47M | in.swap_bytes(); |
29 | 3.47M | } |
30 | | |
31 | 3.47M | currentmax = max(currentmax, in); |
32 | 3.47M | currentoffsetmax = max(currentoffsetmax, in + offset); |
33 | 3.47M | input += N; |
34 | 3.47M | } |
35 | | |
36 | 4.84k | const auto too_large = currentmax > standardmax; |
37 | 4.84k | if (too_large.any()) { |
38 | 1.80k | return false; |
39 | 1.80k | } |
40 | | |
41 | 3.03k | const auto surrogate = currentoffsetmax > standardoffsetmax; |
42 | 3.03k | if (surrogate.any()) { |
43 | 43 | return false; |
44 | 43 | } |
45 | | |
46 | 2.98k | return scalar::utf32::validate(input, end - input); |
47 | 3.03k | } simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf32::validate(char32_t const*, unsigned long) Line | Count | Source | 6 | 2.44k | simdutf_really_inline bool validate(const char32_t *input, size_t size) { | 7 | 2.44k | if (simdutf_unlikely(size == 0)) { | 8 | | // empty input is valid UTF-32. protect the implementation from | 9 | | // handling nullptr | 10 | 23 | return true; | 11 | 23 | } | 12 | | | 13 | 2.42k | const char32_t *end = input + size; | 14 | | | 15 | 2.42k | using vector_u32 = simd32<uint32_t>; | 16 | | | 17 | 2.42k | const auto standardmax = vector_u32::splat(0x10ffff); | 18 | 2.42k | const auto offset = vector_u32::splat(0xffff2000); | 19 | 2.42k | const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); | 20 | 2.42k | auto currentmax = vector_u32::zero(); | 21 | 2.42k | auto currentoffsetmax = vector_u32::zero(); | 22 | | | 23 | 2.42k | constexpr size_t N = vector_u32::ELEMENTS; | 24 | | | 25 | 1.15M | while (input + N < end) { | 26 | 1.15M | auto in = vector_u32(input); | 27 | 1.15M | if (!match_system(endianness::BIG)) { | 28 | 1.15M | in.swap_bytes(); | 29 | 1.15M | } | 30 | | | 31 | 1.15M | currentmax = max(currentmax, in); | 32 | 1.15M | currentoffsetmax = max(currentoffsetmax, in + offset); | 33 | 1.15M | input += N; | 34 | 1.15M | } | 35 | | | 36 | 2.42k | const auto too_large = currentmax > standardmax; | 37 | 2.42k | if (too_large.any()) { | 38 | 838 | return false; | 39 | 838 | } | 40 | | | 41 | 1.58k | const auto surrogate = currentoffsetmax > standardoffsetmax; | 42 | 1.58k | if (surrogate.any()) { | 43 | 20 | return false; | 44 | 20 | } | 45 | | | 46 | 1.56k | return scalar::utf32::validate(input, end - input); | 47 | 1.58k | } |
simdutf.cpp:simdutf::westmere::(anonymous namespace)::utf32::validate(char32_t const*, unsigned long) Line | Count | Source | 6 | 2.44k | simdutf_really_inline bool validate(const char32_t *input, size_t size) { | 7 | 2.44k | if (simdutf_unlikely(size == 0)) { | 8 | | // empty input is valid UTF-32. protect the implementation from | 9 | | // handling nullptr | 10 | 23 | return true; | 11 | 23 | } | 12 | | | 13 | 2.42k | const char32_t *end = input + size; | 14 | | | 15 | 2.42k | using vector_u32 = simd32<uint32_t>; | 16 | | | 17 | 2.42k | const auto standardmax = vector_u32::splat(0x10ffff); | 18 | 2.42k | const auto offset = vector_u32::splat(0xffff2000); | 19 | 2.42k | const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); | 20 | 2.42k | auto currentmax = vector_u32::zero(); | 21 | 2.42k | auto currentoffsetmax = vector_u32::zero(); | 22 | | | 23 | 2.42k | constexpr size_t N = vector_u32::ELEMENTS; | 24 | | | 25 | 2.31M | while (input + N < end) { | 26 | 2.31M | auto in = vector_u32(input); | 27 | 2.31M | if (!match_system(endianness::BIG)) { | 28 | 2.31M | in.swap_bytes(); | 29 | 2.31M | } | 30 | | | 31 | 2.31M | currentmax = max(currentmax, in); | 32 | 2.31M | currentoffsetmax = max(currentoffsetmax, in + offset); | 33 | 2.31M | input += N; | 34 | 2.31M | } | 35 | | | 36 | 2.42k | const auto too_large = currentmax > standardmax; | 37 | 2.42k | if (too_large.any()) { | 38 | 971 | return false; | 39 | 971 | } | 40 | | | 41 | 1.44k | const auto surrogate = currentoffsetmax > standardoffsetmax; | 42 | 1.44k | if (surrogate.any()) { | 43 | 23 | return false; | 44 | 23 | } | 45 | | | 46 | 1.42k | return scalar::utf32::validate(input, end - input); | 47 | 1.44k | } |
|
48 | | |
49 | | simdutf_really_inline result validate_with_errors(const char32_t *input, |
50 | 4.88k | size_t size) { |
51 | 4.88k | if (simdutf_unlikely(size == 0)) { |
52 | | // empty input is valid UTF-32. protect the implementation from |
53 | | // handling nullptr |
54 | 46 | return result(error_code::SUCCESS, 0); |
55 | 46 | } |
56 | | |
57 | 4.84k | const char32_t *start = input; |
58 | 4.84k | const char32_t *end = input + size; |
59 | | |
60 | 4.84k | using vector_u32 = simd32<uint32_t>; |
61 | | |
62 | 4.84k | const auto standardmax = vector_u32::splat(0x10ffff + 1); |
63 | 4.84k | const auto surrogate_mask = vector_u32::splat(0xfffff800); |
64 | 4.84k | const auto surrogate_byte = vector_u32::splat(0x0000d800); |
65 | | |
66 | 4.84k | constexpr size_t N = vector_u32::ELEMENTS; |
67 | | |
68 | 375k | while (input + N < end) { |
69 | 372k | auto in = vector_u32(input); |
70 | 372k | if (!match_system(endianness::BIG)) { |
71 | 372k | in.swap_bytes(); |
72 | 372k | } |
73 | | |
74 | 372k | const auto too_large = in >= standardmax; |
75 | 372k | const auto surrogate = (in & surrogate_mask) == surrogate_byte; |
76 | | |
77 | 372k | const auto combined = too_large | surrogate; |
78 | 372k | if (simdutf_unlikely(combined.any())) { |
79 | 1.85k | const size_t consumed = input - start; |
80 | 1.85k | auto sr = scalar::utf32::validate_with_errors(input, end - input); |
81 | 1.85k | sr.count += consumed; |
82 | | |
83 | 1.85k | return sr; |
84 | 1.85k | } |
85 | | |
86 | 370k | input += N; |
87 | 370k | } |
88 | | |
89 | 2.98k | const size_t consumed = input - start; |
90 | 2.98k | auto sr = scalar::utf32::validate_with_errors(input, end - input); |
91 | 2.98k | sr.count += consumed; |
92 | | |
93 | 2.98k | return sr; |
94 | 4.84k | } simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf32::validate_with_errors(char32_t const*, unsigned long) Line | Count | Source | 50 | 2.44k | size_t size) { | 51 | 2.44k | if (simdutf_unlikely(size == 0)) { | 52 | | // empty input is valid UTF-32. protect the implementation from | 53 | | // handling nullptr | 54 | 23 | return result(error_code::SUCCESS, 0); | 55 | 23 | } | 56 | | | 57 | 2.42k | const char32_t *start = input; | 58 | 2.42k | const char32_t *end = input + size; | 59 | | | 60 | 2.42k | using vector_u32 = simd32<uint32_t>; | 61 | | | 62 | 2.42k | const auto standardmax = vector_u32::splat(0x10ffff + 1); | 63 | 2.42k | const auto surrogate_mask = vector_u32::splat(0xfffff800); | 64 | 2.42k | const auto surrogate_byte = vector_u32::splat(0x0000d800); | 65 | | | 66 | 2.42k | constexpr size_t N = vector_u32::ELEMENTS; | 67 | | | 68 | 125k | while (input + N < end) { | 69 | 124k | auto in = vector_u32(input); | 70 | 124k | if (!match_system(endianness::BIG)) { | 71 | 124k | in.swap_bytes(); | 72 | 124k | } | 73 | | | 74 | 124k | const auto too_large = in >= standardmax; | 75 | 124k | const auto surrogate = (in & surrogate_mask) == surrogate_byte; | 76 | | | 77 | 124k | const auto combined = too_large | surrogate; | 78 | 124k | if (simdutf_unlikely(combined.any())) { | 79 | 858 | const size_t consumed = input - start; | 80 | 858 | auto sr = scalar::utf32::validate_with_errors(input, end - input); | 81 | 858 | sr.count += consumed; | 82 | | | 83 | 858 | return sr; | 84 | 858 | } | 85 | | | 86 | 123k | input += N; | 87 | 123k | } | 88 | | | 89 | 1.56k | const size_t consumed = input - start; | 90 | 1.56k | auto sr = scalar::utf32::validate_with_errors(input, end - input); | 91 | 1.56k | sr.count += consumed; | 92 | | | 93 | 1.56k | return sr; | 94 | 2.42k | } |
simdutf.cpp:simdutf::westmere::(anonymous namespace)::utf32::validate_with_errors(char32_t const*, unsigned long) Line | Count | Source | 50 | 2.44k | size_t size) { | 51 | 2.44k | if (simdutf_unlikely(size == 0)) { | 52 | | // empty input is valid UTF-32. protect the implementation from | 53 | | // handling nullptr | 54 | 23 | return result(error_code::SUCCESS, 0); | 55 | 23 | } | 56 | | | 57 | 2.42k | const char32_t *start = input; | 58 | 2.42k | const char32_t *end = input + size; | 59 | | | 60 | 2.42k | using vector_u32 = simd32<uint32_t>; | 61 | | | 62 | 2.42k | const auto standardmax = vector_u32::splat(0x10ffff + 1); | 63 | 2.42k | const auto surrogate_mask = vector_u32::splat(0xfffff800); | 64 | 2.42k | const auto surrogate_byte = vector_u32::splat(0x0000d800); | 65 | | | 66 | 2.42k | constexpr size_t N = vector_u32::ELEMENTS; | 67 | | | 68 | 249k | while (input + N < end) { | 69 | 248k | auto in = vector_u32(input); | 70 | 248k | if (!match_system(endianness::BIG)) { | 71 | 248k | in.swap_bytes(); | 72 | 248k | } | 73 | | | 74 | 248k | const auto too_large = in >= standardmax; | 75 | 248k | const auto surrogate = (in & surrogate_mask) == surrogate_byte; | 76 | | | 77 | 248k | const auto combined = too_large | surrogate; | 78 | 248k | if (simdutf_unlikely(combined.any())) { | 79 | 994 | const size_t consumed = input - start; | 80 | 994 | auto sr = scalar::utf32::validate_with_errors(input, end - input); | 81 | 994 | sr.count += consumed; | 82 | | | 83 | 994 | return sr; | 84 | 994 | } | 85 | | | 86 | 247k | input += N; | 87 | 247k | } | 88 | | | 89 | 1.42k | const size_t consumed = input - start; | 90 | 1.42k | auto sr = scalar::utf32::validate_with_errors(input, end - input); | 91 | 1.42k | sr.count += consumed; | 92 | | | 93 | 1.42k | return sr; | 94 | 2.42k | } |
|
95 | | |
96 | | } // namespace utf32 |
97 | | } // unnamed namespace |
98 | | } // namespace SIMDUTF_IMPLEMENTATION |
99 | | } // namespace simdutf |