/src/simdutf/src/icelake/icelake_utf8_validation.inl.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // file included directly |
2 | | |
3 | | simdutf_really_inline __m512i check_special_cases(__m512i input, |
4 | 0 | const __m512i prev1) { |
5 | 0 | __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, |
6 | 0 | 0x0202020202020202, 0x4915012180808080, |
7 | 0 | 0x0202020202020202, 0x4915012180808080, |
8 | 0 | 0x0202020202020202, 0x4915012180808080); |
9 | 0 | const __m512i v_0f = _mm512_set1_epi8(0x0f); |
10 | 0 | __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); |
11 | |
|
12 | 0 | __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); |
13 | 0 | __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, |
14 | 0 | 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, |
15 | 0 | 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, |
16 | 0 | 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); |
17 | 0 | __m512i index2 = _mm512_and_si512(prev1, v_0f); |
18 | |
|
19 | 0 | __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); |
20 | 0 | __m512i mask3 = |
21 | 0 | _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, |
22 | 0 | 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, |
23 | 0 | 0x101010101010101, 0x1010101babaaee6); |
24 | 0 | __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); |
25 | 0 | __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); |
26 | 0 | return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); |
27 | 0 | } |
28 | | |
29 | | simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, |
30 | | const __m512i prev_input, |
31 | 0 | const __m512i sc) { |
32 | 0 | __m512i prev2 = prev<2>(input, prev_input); |
33 | 0 | __m512i prev3 = prev<3>(input, prev_input); |
34 | 0 | __m512i is_third_byte = _mm512_subs_epu8( |
35 | 0 | prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 |
36 | 0 | __m512i is_fourth_byte = _mm512_subs_epu8( |
37 | 0 | prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 |
38 | 0 | __m512i is_third_or_fourth_byte = |
39 | 0 | _mm512_or_si512(is_third_byte, is_fourth_byte); |
40 | 0 | const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); |
41 | 0 | is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); |
42 | | // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. |
43 | 0 | const __m512i v_80 = _mm512_set1_epi8(char(0x80)); |
44 | 0 | return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, |
45 | 0 | 0b1101010); |
46 | | //__m512i is_third_or_fourth_byte_mask = |
47 | | //_mm512_and_si512(is_third_or_fourth_byte, v_80); return |
48 | | // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); |
49 | 0 | } |
50 | | // |
51 | | // Return nonzero if there are incomplete multibyte characters at the end of the |
52 | | // block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. |
53 | | // |
54 | 0 | simdutf_really_inline __m512i is_incomplete(const __m512i input) { |
55 | | // If the previous input's last 3 bytes match this, they're too short (they |
56 | | // ended at EOF): |
57 | | // ... 1111____ 111_____ 11______ |
58 | 0 | __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, |
59 | 0 | 0xffffffffffffffff, 0xffffffffffffffff, |
60 | 0 | 0xffffffffffffffff, 0xffffffffffffffff, |
61 | 0 | 0xffffffffffffffff, 0xbfdfefffffffffff); |
62 | 0 | return _mm512_subs_epu8(input, max_value); |
63 | 0 | } |
64 | | |
65 | | struct avx512_utf8_checker { |
66 | | // If this is nonzero, there has been a UTF-8 error. |
67 | | __m512i error{}; |
68 | | |
69 | | // The last input we received |
70 | | __m512i prev_input_block{}; |
71 | | // Whether the last input we received was incomplete (used for ASCII fast |
72 | | // path) |
73 | | __m512i prev_incomplete{}; |
74 | | |
75 | | // |
76 | | // Check whether the current bytes are valid UTF-8. |
77 | | // |
78 | | simdutf_really_inline void check_utf8_bytes(const __m512i input, |
79 | 0 | const __m512i prev_input) { |
80 | | // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ |
81 | | // lead bytes (2, 3, 4-byte leads become large positive numbers instead of |
82 | | // small negative numbers) |
83 | 0 | __m512i prev1 = prev<1>(input, prev_input); |
84 | 0 | __m512i sc = check_special_cases(input, prev1); |
85 | 0 | this->error = _mm512_or_si512( |
86 | 0 | check_multibyte_lengths(input, prev_input, sc), this->error); |
87 | 0 | } |
88 | | |
89 | | // The only problem that can happen at EOF is that a multibyte character is |
90 | | // too short or a byte value too large in the last bytes: check_special_cases |
91 | | // only checks for bytes too large in the first of two bytes. |
92 | 0 | simdutf_really_inline void check_eof() { |
93 | | // If the previous block had incomplete UTF-8 characters at the end, an |
94 | | // ASCII block can't possibly finish them. |
95 | 0 | this->error = _mm512_or_si512(this->error, this->prev_incomplete); |
96 | 0 | } |
97 | | |
98 | | // returns true if ASCII. |
99 | 0 | simdutf_really_inline bool check_next_input(const __m512i input) { |
100 | 0 | const __m512i v_80 = _mm512_set1_epi8(char(0x80)); |
101 | 0 | const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); |
102 | 0 | if (ascii == 0) { |
103 | 0 | this->error = _mm512_or_si512(this->error, this->prev_incomplete); |
104 | 0 | return true; |
105 | 0 | } else { |
106 | 0 | this->check_utf8_bytes(input, this->prev_input_block); |
107 | 0 | this->prev_incomplete = is_incomplete(input); |
108 | 0 | this->prev_input_block = input; |
109 | 0 | return false; |
110 | 0 | } |
111 | 0 | } |
112 | | // do not forget to call check_eof! |
113 | 0 | simdutf_really_inline bool errors() const { |
114 | 0 | return _mm512_test_epi8_mask(this->error, this->error) != 0; |
115 | 0 | } |
116 | | }; // struct avx512_utf8_checker |