/src/simdutf/include/simdutf/scalar/ascii.h
Line | Count | Source |
1 | | #ifndef SIMDUTF_ASCII_H |
2 | | #define SIMDUTF_ASCII_H |
3 | | |
4 | | namespace simdutf { |
5 | | namespace scalar { |
6 | | namespace { |
7 | | namespace ascii { |
8 | | |
9 | | template <class InputPtr> |
10 | | #if SIMDUTF_CPLUSPLUS20 |
11 | | requires simdutf::detail::indexes_into_byte_like<InputPtr> |
12 | | #endif |
13 | | simdutf_warn_unused simdutf_constexpr23 bool validate(InputPtr data, |
14 | 215 | size_t len) noexcept { |
15 | 215 | uint64_t pos = 0; |
16 | | |
17 | | #if SIMDUTF_CPLUSPLUS23 |
18 | | // avoid memcpy during constant evaluation |
19 | | if !consteval |
20 | | #endif |
21 | | // process in blocks of 16 bytes when possible |
22 | 215 | { |
23 | 177k | for (; pos + 16 <= len; pos += 16) { |
24 | 177k | uint64_t v1; |
25 | 177k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
26 | 177k | uint64_t v2; |
27 | 177k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
28 | 177k | uint64_t v{v1 | v2}; |
29 | 177k | if ((v & 0x8080808080808080) != 0) { |
30 | 132 | return false; |
31 | 132 | } |
32 | 177k | } |
33 | 215 | } |
34 | | |
35 | | // process the tail byte-by-byte |
36 | 407 | for (; pos < len; pos++) { |
37 | 367 | if (static_cast<std::uint8_t>(data[pos]) >= 0b10000000) { |
38 | 43 | return false; |
39 | 43 | } |
40 | 367 | } |
41 | 40 | return true; |
42 | 83 | } |
43 | | template <class InputPtr> |
44 | | #if SIMDUTF_CPLUSPLUS20 |
45 | | requires simdutf::detail::indexes_into_byte_like<InputPtr> |
46 | | #endif |
47 | | simdutf_warn_unused simdutf_constexpr23 result |
48 | 760 | validate_with_errors(InputPtr data, size_t len) noexcept { |
49 | 760 | size_t pos = 0; |
50 | | #if SIMDUTF_CPLUSPLUS23 |
51 | | // avoid memcpy during constant evaluation |
52 | | if !consteval |
53 | | #endif |
54 | 760 | { |
55 | | // process in blocks of 16 bytes when possible |
56 | 220k | for (; pos + 16 <= len; pos += 16) { |
57 | 219k | uint64_t v1; |
58 | 219k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
59 | 219k | uint64_t v2; |
60 | 219k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
61 | 219k | uint64_t v{v1 | v2}; |
62 | 219k | if ((v & 0x8080808080808080) != 0) { |
63 | 2.99k | for (; pos < len; pos++) { |
64 | 2.99k | if (static_cast<std::uint8_t>(data[pos]) >= 0b10000000) { |
65 | 585 | return result(error_code::TOO_LARGE, pos); |
66 | 585 | } |
67 | 2.99k | } |
68 | 585 | } |
69 | 219k | } |
70 | 760 | } |
71 | | |
72 | | // process the tail byte-by-byte |
73 | 725 | for (; pos < len; pos++) { |
74 | 688 | if (static_cast<std::uint8_t>(data[pos]) >= 0b10000000) { |
75 | 138 | return result(error_code::TOO_LARGE, pos); |
76 | 138 | } |
77 | 688 | } |
78 | 37 | return result(error_code::SUCCESS, pos); |
79 | 175 | } |
80 | | |
81 | | } // namespace ascii |
82 | | } // unnamed namespace |
83 | | } // namespace scalar |
84 | | } // namespace simdutf |
85 | | |
86 | | #endif |