/src/simdutf/src/scalar/ascii.h
Line | Count | Source |
1 | | #ifndef SIMDUTF_ASCII_H |
2 | | #define SIMDUTF_ASCII_H |
3 | | |
4 | | namespace simdutf { |
5 | | namespace scalar { |
6 | | namespace { |
7 | | namespace ascii { |
8 | | #if SIMDUTF_IMPLEMENTATION_FALLBACK |
9 | | // Only used by the fallback kernel. |
10 | 161 | inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { |
11 | 161 | const uint8_t *data = reinterpret_cast<const uint8_t *>(buf); |
12 | 161 | uint64_t pos = 0; |
13 | | // process in blocks of 16 bytes when possible |
14 | 77.3k | for (; pos + 16 <= len; pos += 16) { |
15 | 77.3k | uint64_t v1; |
16 | 77.3k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
17 | 77.3k | uint64_t v2; |
18 | 77.3k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
19 | 77.3k | uint64_t v{v1 | v2}; |
20 | 77.3k | if ((v & 0x8080808080808080) != 0) { |
21 | 106 | return false; |
22 | 106 | } |
23 | 77.3k | } |
24 | | // process the tail byte-by-byte |
25 | 243 | for (; pos < len; pos++) { |
26 | 219 | if (data[pos] >= 0b10000000) { |
27 | 31 | return false; |
28 | 31 | } |
29 | 219 | } |
30 | 24 | return true; |
31 | 55 | } |
32 | | #endif |
33 | | |
34 | | inline simdutf_warn_unused result validate_with_errors(const char *buf, |
35 | 758 | size_t len) noexcept { |
36 | 758 | const uint8_t *data = reinterpret_cast<const uint8_t *>(buf); |
37 | 758 | size_t pos = 0; |
38 | | // process in blocks of 16 bytes when possible |
39 | 224k | for (; pos + 16 <= len; pos += 16) { |
40 | 224k | uint64_t v1; |
41 | 224k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
42 | 224k | uint64_t v2; |
43 | 224k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
44 | 224k | uint64_t v{v1 | v2}; |
45 | 224k | if ((v & 0x8080808080808080) != 0) { |
46 | 3.30k | for (; pos < len; pos++) { |
47 | 3.30k | if (data[pos] >= 0b10000000) { |
48 | 591 | return result(error_code::TOO_LARGE, pos); |
49 | 591 | } |
50 | 3.30k | } |
51 | 591 | } |
52 | 224k | } |
53 | | // process the tail byte-by-byte |
54 | 511 | for (; pos < len; pos++) { |
55 | 479 | if (data[pos] >= 0b10000000) { |
56 | 135 | return result(error_code::TOO_LARGE, pos); |
57 | 135 | } |
58 | 479 | } |
59 | 32 | return result(error_code::SUCCESS, pos); |
60 | 167 | } |
61 | | |
62 | | } // namespace ascii |
63 | | } // unnamed namespace |
64 | | } // namespace scalar |
65 | | } // namespace simdutf |
66 | | |
67 | | #endif |