/src/simdutf/src/haswell/avx2_utf16fix.cpp
Line | Count | Source |
1 | | /* |
2 | | * Process one block of 16 characters. If in_place is false, |
3 | | * copy the block from in to out. If there is a sequencing |
4 | | * error in the block, overwrite the illsequenced characters |
5 | | * with the replacement character. This function reads one |
6 | | * character before the beginning of the buffer as a lookback. |
7 | | * If that character is illsequenced, it too is overwritten. |
8 | | */ |
9 | | template <endianness big_endian, bool in_place> |
10 | 0 | void utf16fix_block(char16_t *out, const char16_t *in) { |
11 | 0 | const char16_t replacement = scalar::utf16::replacement<big_endian>(); |
12 | 0 | auto swap_if_needed = [](uint16_t c) -> uint16_t { |
13 | 0 | return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; |
14 | 0 | }; Unexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)0, true>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)0, false>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)1, true>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)1, false>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) const |
15 | 0 | __m256i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; |
16 | 0 | __m256i illseq, lb_illseq, block_illseq, lb_illseq_shifted; |
17 | |
|
18 | 0 | lookback = _mm256_loadu_si256((const __m256i *)(in - 1)); |
19 | 0 | block = _mm256_loadu_si256((const __m256i *)in); |
20 | 0 | lb_masked = |
21 | 0 | _mm256_and_si256(lookback, _mm256_set1_epi16(swap_if_needed(0xfc00u))); |
22 | 0 | block_masked = |
23 | 0 | _mm256_and_si256(block, _mm256_set1_epi16(swap_if_needed(0xfc00u))); |
24 | 0 | lb_is_high = |
25 | 0 | _mm256_cmpeq_epi16(lb_masked, _mm256_set1_epi16(swap_if_needed(0xd800u))); |
26 | 0 | block_is_low = _mm256_cmpeq_epi16(block_masked, |
27 | 0 | _mm256_set1_epi16(swap_if_needed(0xdc00u))); |
28 | |
|
29 | 0 | illseq = _mm256_xor_si256(lb_is_high, block_is_low); |
30 | 0 | if (!_mm256_testz_si256(illseq, illseq)) { |
31 | 0 | int lb; |
32 | | |
33 | | /* compute the cause of the illegal sequencing */ |
34 | 0 | lb_illseq = _mm256_andnot_si256(block_is_low, lb_is_high); |
35 | | #if SIMDUTF_GCC9OROLDER |
36 | | // Old GCC versions are missing _mm256_zextsi128_si256, so we emulate it. |
37 | | __m128i tmp_legacygcc = |
38 | | _mm_bslli_si128(_mm256_extracti128_si256(lb_illseq, 1), 14); |
39 | | __m256i tmp_legacygcc256 = |
40 | | _mm256_set_m128i(_mm_setzero_si128(), tmp_legacygcc); |
41 | | lb_illseq_shifted = |
42 | | _mm256_or_si256(_mm256_bsrli_epi128(lb_illseq, 2), tmp_legacygcc256); |
43 | | #else |
44 | 0 | lb_illseq_shifted = |
45 | 0 | _mm256_or_si256(_mm256_bsrli_epi128(lb_illseq, 2), |
46 | 0 | _mm256_zextsi128_si256(_mm_bslli_si128( |
47 | 0 | _mm256_extracti128_si256(lb_illseq, 1), 14))); |
48 | 0 | #endif // SIMDUTF_GCC9OROLDER |
49 | 0 | block_illseq = _mm256_or_si256( |
50 | 0 | _mm256_andnot_si256(lb_is_high, block_is_low), lb_illseq_shifted); |
51 | | |
52 | | /* fix illegal sequencing in the lookback */ |
53 | | #if SIMDUTF_GCC10 || SIMDUTF_GCC9OROLDER |
54 | | // GCC 10 is missing important intrinsics. |
55 | | lb = _mm_cvtsi128_si32(_mm256_extractf128_si256(lb_illseq, 0)); |
56 | | #else |
57 | 0 | lb = _mm256_cvtsi256_si32(lb_illseq); |
58 | 0 | #endif |
59 | 0 | lb = (lb & replacement) | (~lb & out[-1]); |
60 | 0 | out[-1] = char16_t(lb); |
61 | | |
62 | | /* fix illegal sequencing in the main block */ |
63 | 0 | block = |
64 | 0 | _mm256_blendv_epi8(block, _mm256_set1_epi16(replacement), block_illseq); |
65 | 0 | _mm256_storeu_si256((__m256i *)out, block); |
66 | 0 | } else if (!in_place) { |
67 | 0 | _mm256_storeu_si256((__m256i *)out, block); |
68 | 0 | } |
69 | 0 | } Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)0, true>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)0, false>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)1, true>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block<(simdutf::endianness)1, false>(char16_t*, char16_t const*) |
70 | | |
71 | | template <endianness big_endian, bool in_place> |
72 | 0 | void utf16fix_block_sse(char16_t *out, const char16_t *in) { |
73 | 0 | const char16_t replacement = scalar::utf16::replacement<big_endian>(); |
74 | 0 | auto swap_if_needed = [](uint16_t c) -> uint16_t { |
75 | 0 | return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; |
76 | 0 | }; Unexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)0, true>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)0, false>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)1, true>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) constUnexecuted instantiation: simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)1, false>(char16_t*, char16_t const*)::{lambda(unsigned short)#1}::operator()(unsigned short) const |
77 | |
|
78 | 0 | __m128i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; |
79 | 0 | __m128i illseq, lb_illseq, block_illseq; |
80 | |
|
81 | 0 | lookback = _mm_loadu_si128((const __m128i *)(in - 1)); |
82 | 0 | block = _mm_loadu_si128((const __m128i *)in); |
83 | 0 | lb_masked = _mm_and_si128(lookback, _mm_set1_epi16(swap_if_needed(0xfc00U))); |
84 | 0 | block_masked = _mm_and_si128(block, _mm_set1_epi16(swap_if_needed(0xfc00U))); |
85 | 0 | lb_is_high = |
86 | 0 | _mm_cmpeq_epi16(lb_masked, _mm_set1_epi16(swap_if_needed(0xd800U))); |
87 | 0 | block_is_low = |
88 | 0 | _mm_cmpeq_epi16(block_masked, _mm_set1_epi16(swap_if_needed(0xdc00U))); |
89 | |
|
90 | 0 | illseq = _mm_xor_si128(lb_is_high, block_is_low); |
91 | 0 | if (_mm_movemask_epi8(illseq) != 0) { |
92 | | /* compute the cause of the illegal sequencing */ |
93 | 0 | lb_illseq = _mm_andnot_si128(block_is_low, lb_is_high); |
94 | 0 | block_illseq = _mm_or_si128(_mm_andnot_si128(lb_is_high, block_is_low), |
95 | 0 | _mm_bsrli_si128(lb_illseq, 2)); |
96 | | /* fix illegal sequencing in the lookback */ |
97 | 0 | int lb = _mm_cvtsi128_si32(lb_illseq); |
98 | 0 | lb = (lb & replacement) | (~lb & out[-1]); |
99 | 0 | out[-1] = char16_t(lb); |
100 | | /* fix illegal sequencing in the main block */ |
101 | 0 | block = |
102 | 0 | _mm_or_si128(_mm_andnot_si128(block_illseq, block), |
103 | 0 | _mm_and_si128(block_illseq, _mm_set1_epi16(replacement))); |
104 | 0 | _mm_storeu_si128((__m128i *)out, block); |
105 | 0 | } else if (!in_place) { |
106 | 0 | _mm_storeu_si128((__m128i *)out, block); |
107 | 0 | } |
108 | 0 | } Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)0, true>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)0, false>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)1, true>(char16_t*, char16_t const*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_block_sse<(simdutf::endianness)1, false>(char16_t*, char16_t const*) |
109 | | |
110 | | template <endianness big_endian> |
111 | 0 | void utf16fix_sse(const char16_t *in, size_t n, char16_t *out) { |
112 | 0 | const char16_t replacement = scalar::utf16::replacement<big_endian>(); |
113 | 0 | size_t i; |
114 | |
|
115 | 0 | if (n < 9) { |
116 | 0 | scalar::utf16::to_well_formed_utf16<big_endian>(in, n, out); |
117 | 0 | return; |
118 | 0 | } |
119 | | |
120 | 0 | out[0] = |
121 | 0 | scalar::utf16::is_low_surrogate<big_endian>(in[0]) ? replacement : in[0]; |
122 | | |
123 | | /* duplicate code to have the compiler specialise utf16fix_block() */ |
124 | 0 | if (in == out) { |
125 | 0 | for (i = 1; i + 8 < n; i += 8) { |
126 | 0 | utf16fix_block_sse<big_endian, true>(out + i, in + i); |
127 | 0 | } |
128 | |
|
129 | 0 | utf16fix_block_sse<big_endian, true>(out + n - 8, in + n - 8); |
130 | 0 | } else { |
131 | 0 | for (i = 1; i + 8 < n; i += 8) { |
132 | 0 | utf16fix_block_sse<big_endian, false>(out + i, in + i); |
133 | 0 | } |
134 | |
|
135 | 0 | utf16fix_block_sse<big_endian, false>(out + n - 8, in + n - 8); |
136 | 0 | } |
137 | |
|
138 | 0 | out[n - 1] = scalar::utf16::is_high_surrogate<big_endian>(out[n - 1]) |
139 | 0 | ? replacement |
140 | 0 | : out[n - 1]; |
141 | 0 | } Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_sse<(simdutf::endianness)0>(char16_t const*, unsigned long, char16_t*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_sse<(simdutf::endianness)1>(char16_t const*, unsigned long, char16_t*) |
142 | | |
143 | | template <endianness big_endian> |
144 | 0 | void utf16fix_avx(const char16_t *in, size_t n, char16_t *out) { |
145 | 0 | const char16_t replacement = scalar::utf16::replacement<big_endian>(); |
146 | 0 | size_t i; |
147 | |
|
148 | 0 | if (n < 17) { |
149 | 0 | utf16fix_sse<big_endian>(in, n, out); |
150 | 0 | return; |
151 | 0 | } |
152 | | |
153 | 0 | out[0] = |
154 | 0 | scalar::utf16::is_low_surrogate<big_endian>(in[0]) ? replacement : in[0]; |
155 | | |
156 | | /* duplicate code to have the compiler specialise utf16fix_block() */ |
157 | 0 | if (in == out) { |
158 | 0 | for (i = 1; i + 16 < n; i += 16) { |
159 | 0 | utf16fix_block<big_endian, true>(out + i, in + i); |
160 | 0 | } |
161 | |
|
162 | 0 | utf16fix_block<big_endian, true>(out + n - 16, in + n - 16); |
163 | 0 | } else { |
164 | 0 | for (i = 1; i + 16 < n; i += 16) { |
165 | 0 | utf16fix_block<big_endian, false>(out + i, in + i); |
166 | 0 | } |
167 | |
|
168 | 0 | utf16fix_block<big_endian, false>(out + n - 16, in + n - 16); |
169 | 0 | } |
170 | |
|
171 | 0 | out[n - 1] = scalar::utf16::is_high_surrogate<big_endian>(out[n - 1]) |
172 | 0 | ? replacement |
173 | 0 | : out[n - 1]; |
174 | 0 | } Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_avx<(simdutf::endianness)0>(char16_t const*, unsigned long, char16_t*) Unexecuted instantiation: simdutf.cpp:void simdutf::haswell::(anonymous namespace)::utf16fix_avx<(simdutf::endianness)1>(char16_t const*, unsigned long, char16_t*) |