/src/simdutf/src/implementation.cpp
Line | Count | Source |
1 | | #include "simdutf.h" |
2 | | #include <initializer_list> |
3 | | #include <climits> |
4 | | #include <type_traits> |
5 | | #if SIMDUTF_ATOMIC_REF |
6 | | #include <array> |
7 | | #include "simdutf/scalar/atomic_util.h" |
8 | | #endif |
9 | | |
10 | | static_assert(sizeof(uint8_t) == sizeof(char), |
11 | | "simdutf requires that uint8_t be a char"); |
12 | | static_assert(sizeof(uint16_t) == sizeof(char16_t), |
13 | | "simdutf requires that char16_t be 16 bits"); |
14 | | static_assert(sizeof(uint32_t) == sizeof(char32_t), |
15 | | "simdutf requires that char32_t be 32 bits"); |
16 | | // next line is redundant, but it is kept to catch defective systems. |
17 | | static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); |
18 | | |
19 | | // Useful for debugging purposes |
20 | | namespace simdutf { |
21 | | namespace { |
22 | | |
23 | | template <typename T> std::string toBinaryString(T b) { |
24 | | std::string binary = ""; |
25 | | T mask = T(1) << (sizeof(T) * CHAR_BIT - 1); |
26 | | while (mask > 0) { |
27 | | binary += ((b & mask) == 0) ? '0' : '1'; |
28 | | mask >>= 1; |
29 | | } |
30 | | return binary; |
31 | | } |
32 | | } // namespace |
33 | | } // namespace simdutf |
34 | | |
35 | | namespace simdutf { |
36 | 8 | bool implementation::supported_by_runtime_system() const { |
37 | 8 | uint32_t required_instruction_sets = this->required_instruction_sets(); |
38 | 8 | uint32_t supported_instruction_sets = |
39 | 8 | internal::detect_supported_architectures(); |
40 | 8 | return ((supported_instruction_sets & required_instruction_sets) == |
41 | 8 | required_instruction_sets); |
42 | 8 | } |
43 | | |
44 | | #if SIMDUTF_FEATURE_DETECT_ENCODING |
45 | | simdutf_warn_unused encoding_type implementation::autodetect_encoding( |
46 | 0 | const char *input, size_t length) const noexcept { |
47 | | // If there is a BOM, then we trust it. |
48 | 0 | auto bom_encoding = simdutf::BOM::check_bom(input, length); |
49 | 0 | if (bom_encoding != encoding_type::unspecified) { |
50 | 0 | return bom_encoding; |
51 | 0 | } |
52 | | // UTF8 is common, it includes ASCII, and is commonly represented |
53 | | // without a BOM, so if it fits, go with that. Note that it is still |
54 | | // possible to get it wrong, we are only 'guessing'. If some has UTF-16 |
55 | | // data without a BOM, it could pass as UTF-8. |
56 | | // |
57 | | // An interesting twist might be to check for UTF-16 ASCII first (every |
58 | | // other byte is zero). |
59 | 0 | if (validate_utf8(input, length)) { |
60 | 0 | return encoding_type::UTF8; |
61 | 0 | } |
62 | | // The next most common encoding that might appear without BOM is probably |
63 | | // UTF-16LE, so try that next. |
64 | 0 | if ((length % 2) == 0) { |
65 | | // important: we need to divide by two |
66 | 0 | if (validate_utf16le(reinterpret_cast<const char16_t *>(input), |
67 | 0 | length / 2)) { |
68 | 0 | return encoding_type::UTF16_LE; |
69 | 0 | } |
70 | 0 | } |
71 | 0 | if ((length % 4) == 0) { |
72 | 0 | if (validate_utf32(reinterpret_cast<const char32_t *>(input), length / 4)) { |
73 | 0 | return encoding_type::UTF32_LE; |
74 | 0 | } |
75 | 0 | } |
76 | 0 | return encoding_type::unspecified; |
77 | 0 | } |
78 | | |
79 | | #ifdef SIMDUTF_INTERNAL_TESTS |
80 | | std::vector<implementation::TestProcedure> |
81 | | implementation::internal_tests() const { |
82 | | return {}; |
83 | | } |
84 | | #endif |
85 | | #endif // SIMDUTF_FEATURE_DETECT_ENCODING |
86 | | |
87 | | #if SIMDUTF_FEATURE_BASE64 |
88 | | simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( |
89 | 0 | const char *input, size_t length) const noexcept { |
90 | 0 | return scalar::base64::maximal_binary_length_from_base64(input, length); |
91 | 0 | } |
92 | | |
93 | | simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( |
94 | 0 | const char16_t *input, size_t length) const noexcept { |
95 | 0 | return scalar::base64::maximal_binary_length_from_base64(input, length); |
96 | 0 | } |
97 | | simdutf_warn_unused size_t implementation::base64_length_from_binary( |
98 | 0 | size_t length, base64_options options) const noexcept { |
99 | 0 | return scalar::base64::base64_length_from_binary(length, options); |
100 | 0 | } |
101 | | #endif // SIMDUTF_FEATURE_BASE64 |
102 | | |
103 | | namespace internal { |
104 | | // When there is a single implementation, we should not pay a price |
105 | | // for dispatching to the best implementation. We should just use the |
106 | | // one we have. This is a compile-time check. |
107 | | #define SIMDUTF_SINGLE_IMPLEMENTATION \ |
108 | | (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ |
109 | | SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ |
110 | | SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ |
111 | | SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ |
112 | | 1) |
113 | | |
114 | | // Static array of known implementations. We are hoping these get baked into the |
115 | | // executable without requiring a static initializer. |
116 | | |
117 | | #if SIMDUTF_IMPLEMENTATION_ICELAKE |
118 | 1 | static const icelake::implementation *get_icelake_singleton() { |
119 | 1 | static const icelake::implementation icelake_singleton{}; |
120 | 1 | return &icelake_singleton; |
121 | 1 | } |
122 | | #endif |
123 | | #if SIMDUTF_IMPLEMENTATION_HASWELL |
124 | 1 | static const haswell::implementation *get_haswell_singleton() { |
125 | 1 | static const haswell::implementation haswell_singleton{}; |
126 | 1 | return &haswell_singleton; |
127 | 1 | } |
128 | | #endif |
129 | | #if SIMDUTF_IMPLEMENTATION_WESTMERE |
130 | 1 | static const westmere::implementation *get_westmere_singleton() { |
131 | 1 | static const westmere::implementation westmere_singleton{}; |
132 | 1 | return &westmere_singleton; |
133 | 1 | } |
134 | | #endif |
135 | | #if SIMDUTF_IMPLEMENTATION_ARM64 |
136 | | static const arm64::implementation *get_arm64_singleton() { |
137 | | static const arm64::implementation arm64_singleton{}; |
138 | | return &arm64_singleton; |
139 | | } |
140 | | #endif |
141 | | #if SIMDUTF_IMPLEMENTATION_PPC64 |
142 | | static const ppc64::implementation *get_ppc64_singleton() { |
143 | | static const ppc64::implementation ppc64_singleton{}; |
144 | | return &ppc64_singleton; |
145 | | } |
146 | | #endif |
147 | | #if SIMDUTF_IMPLEMENTATION_RVV |
148 | | static const rvv::implementation *get_rvv_singleton() { |
149 | | static const rvv::implementation rvv_singleton{}; |
150 | | return &rvv_singleton; |
151 | | } |
152 | | #endif |
153 | | #if SIMDUTF_IMPLEMENTATION_LASX |
154 | | static const lasx::implementation *get_lasx_singleton() { |
155 | | static const lasx::implementation lasx_singleton{}; |
156 | | return &lasx_singleton; |
157 | | } |
158 | | #endif |
159 | | #if SIMDUTF_IMPLEMENTATION_LSX |
160 | | static const lsx::implementation *get_lsx_singleton() { |
161 | | static const lsx::implementation lsx_singleton{}; |
162 | | return &lsx_singleton; |
163 | | } |
164 | | #endif |
165 | | #if SIMDUTF_IMPLEMENTATION_FALLBACK |
166 | 1 | static const fallback::implementation *get_fallback_singleton() { |
167 | 1 | static const fallback::implementation fallback_singleton{}; |
168 | 1 | return &fallback_singleton; |
169 | 1 | } |
170 | | #endif |
171 | | |
172 | | #if SIMDUTF_SINGLE_IMPLEMENTATION |
173 | | simdutf_really_inline static const implementation *get_single_implementation() { |
174 | | return |
175 | | #if SIMDUTF_IMPLEMENTATION_ICELAKE |
176 | | get_icelake_singleton(); |
177 | | #endif |
178 | | #if SIMDUTF_IMPLEMENTATION_HASWELL |
179 | | get_haswell_singleton(); |
180 | | #endif |
181 | | #if SIMDUTF_IMPLEMENTATION_WESTMERE |
182 | | get_westmere_singleton(); |
183 | | #endif |
184 | | #if SIMDUTF_IMPLEMENTATION_ARM64 |
185 | | get_arm64_singleton(); |
186 | | #endif |
187 | | #if SIMDUTF_IMPLEMENTATION_PPC64 |
188 | | get_ppc64_singleton(); |
189 | | #endif |
190 | | #if SIMDUTF_IMPLEMENTATION_LASX |
191 | | get_lasx_singleton(); |
192 | | #endif |
193 | | #if SIMDUTF_IMPLEMENTATION_LSX |
194 | | get_lsx_singleton(); |
195 | | #endif |
196 | | #if SIMDUTF_IMPLEMENTATION_FALLBACK |
197 | | get_fallback_singleton(); |
198 | | #endif |
199 | | } |
200 | | #endif |
201 | | |
202 | | /** |
203 | | * @private Detects best supported implementation on first use, and sets it |
204 | | */ |
205 | | class detect_best_supported_implementation_on_first_use final |
206 | | : public implementation { |
207 | | public: |
208 | 0 | std::string name() const noexcept final { return set_best()->name(); } |
209 | 0 | std::string description() const noexcept final { |
210 | 0 | return set_best()->description(); |
211 | 0 | } |
212 | 0 | uint32_t required_instruction_sets() const noexcept final { |
213 | 0 | return set_best()->required_instruction_sets(); |
214 | 0 | } |
215 | | |
216 | | #if SIMDUTF_FEATURE_DETECT_ENCODING |
217 | | simdutf_warn_unused int |
218 | 0 | detect_encodings(const char *input, size_t length) const noexcept override { |
219 | 0 | return set_best()->detect_encodings(input, length); |
220 | 0 | } |
221 | | #endif // SIMDUTF_FEATURE_DETECT_ENCODING |
222 | | |
223 | | #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING |
224 | | simdutf_warn_unused bool |
225 | 0 | validate_utf8(const char *buf, size_t len) const noexcept final override { |
226 | 0 | return set_best()->validate_utf8(buf, len); |
227 | 0 | } |
228 | | #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING |
229 | | |
230 | | #if SIMDUTF_FEATURE_UTF8 |
231 | | simdutf_warn_unused result validate_utf8_with_errors( |
232 | 0 | const char *buf, size_t len) const noexcept final override { |
233 | 0 | return set_best()->validate_utf8_with_errors(buf, len); |
234 | 0 | } |
235 | | #endif // SIMDUTF_FEATURE_UTF8 |
236 | | |
237 | | #if SIMDUTF_FEATURE_ASCII |
238 | | simdutf_warn_unused bool |
239 | 0 | validate_ascii(const char *buf, size_t len) const noexcept final override { |
240 | 0 | return set_best()->validate_ascii(buf, len); |
241 | 0 | } |
242 | | simdutf_warn_unused result validate_ascii_with_errors( |
243 | 0 | const char *buf, size_t len) const noexcept final override { |
244 | 0 | return set_best()->validate_ascii_with_errors(buf, len); |
245 | 0 | } |
246 | | #endif // SIMDUTF_FEATURE_ASCII |
247 | | |
248 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
249 | | simdutf_warn_unused bool |
250 | | validate_utf16le_as_ascii(const char16_t *buf, |
251 | 0 | size_t len) const noexcept final override { |
252 | 0 | return set_best()->validate_utf16le_as_ascii(buf, len); |
253 | 0 | } |
254 | | simdutf_warn_unused bool |
255 | | validate_utf16be_as_ascii(const char16_t *buf, |
256 | 0 | size_t len) const noexcept final override { |
257 | 0 | return set_best()->validate_utf16be_as_ascii(buf, len); |
258 | 0 | } |
259 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
260 | | |
261 | | #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
262 | | simdutf_warn_unused bool |
263 | | validate_utf16le(const char16_t *buf, |
264 | 0 | size_t len) const noexcept final override { |
265 | 0 | return set_best()->validate_utf16le(buf, len); |
266 | 0 | } |
267 | | #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
268 | | |
269 | | #if SIMDUTF_FEATURE_UTF16 |
270 | | simdutf_warn_unused bool |
271 | | validate_utf16be(const char16_t *buf, |
272 | 0 | size_t len) const noexcept final override { |
273 | 0 | return set_best()->validate_utf16be(buf, len); |
274 | 0 | } |
275 | | |
276 | | simdutf_warn_unused result validate_utf16le_with_errors( |
277 | 0 | const char16_t *buf, size_t len) const noexcept final override { |
278 | 0 | return set_best()->validate_utf16le_with_errors(buf, len); |
279 | 0 | } |
280 | | |
281 | | simdutf_warn_unused result validate_utf16be_with_errors( |
282 | 0 | const char16_t *buf, size_t len) const noexcept final override { |
283 | 0 | return set_best()->validate_utf16be_with_errors(buf, len); |
284 | 0 | } |
285 | | void to_well_formed_utf16be(const char16_t *input, size_t len, |
286 | 0 | char16_t *output) const noexcept final override { |
287 | 0 | return set_best()->to_well_formed_utf16be(input, len, output); |
288 | 0 | } |
289 | | void to_well_formed_utf16le(const char16_t *input, size_t len, |
290 | 0 | char16_t *output) const noexcept final override { |
291 | 0 | return set_best()->to_well_formed_utf16le(input, len, output); |
292 | 0 | } |
293 | | #endif // SIMDUTF_FEATURE_UTF16 |
294 | | |
295 | | #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING |
296 | | simdutf_warn_unused bool |
297 | | validate_utf32(const char32_t *buf, |
298 | 0 | size_t len) const noexcept final override { |
299 | 0 | return set_best()->validate_utf32(buf, len); |
300 | 0 | } |
301 | | #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING |
302 | | |
303 | | #if SIMDUTF_FEATURE_UTF32 |
304 | | simdutf_warn_unused result validate_utf32_with_errors( |
305 | 0 | const char32_t *buf, size_t len) const noexcept final override { |
306 | 0 | return set_best()->validate_utf32_with_errors(buf, len); |
307 | 0 | } |
308 | | #endif // SIMDUTF_FEATURE_UTF32 |
309 | | |
310 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
311 | | simdutf_warn_unused size_t |
312 | | convert_latin1_to_utf8(const char *buf, size_t len, |
313 | 0 | char *utf8_output) const noexcept final override { |
314 | 0 | return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); |
315 | 0 | } |
316 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
317 | | |
318 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
319 | | simdutf_warn_unused size_t convert_latin1_to_utf16le( |
320 | | const char *buf, size_t len, |
321 | 0 | char16_t *utf16_output) const noexcept final override { |
322 | 0 | return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); |
323 | 0 | } |
324 | | |
325 | | simdutf_warn_unused size_t convert_latin1_to_utf16be( |
326 | | const char *buf, size_t len, |
327 | 0 | char16_t *utf16_output) const noexcept final override { |
328 | 0 | return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); |
329 | 0 | } |
330 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
331 | | |
332 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
333 | | simdutf_warn_unused size_t convert_latin1_to_utf32( |
334 | | const char *buf, size_t len, |
335 | 0 | char32_t *latin1_output) const noexcept final override { |
336 | 0 | return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); |
337 | 0 | } |
338 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
339 | | |
340 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
341 | | simdutf_warn_unused size_t |
342 | | convert_utf8_to_latin1(const char *buf, size_t len, |
343 | 0 | char *latin1_output) const noexcept final override { |
344 | 0 | return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); |
345 | 0 | } |
346 | | |
347 | | simdutf_warn_unused result convert_utf8_to_latin1_with_errors( |
348 | | const char *buf, size_t len, |
349 | 0 | char *latin1_output) const noexcept final override { |
350 | 0 | return set_best()->convert_utf8_to_latin1_with_errors(buf, len, |
351 | 0 | latin1_output); |
352 | 0 | } |
353 | | |
354 | | simdutf_warn_unused size_t convert_valid_utf8_to_latin1( |
355 | | const char *buf, size_t len, |
356 | 0 | char *latin1_output) const noexcept final override { |
357 | 0 | return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); |
358 | 0 | } |
359 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
360 | | |
361 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
362 | | simdutf_warn_unused size_t convert_utf8_to_utf16le( |
363 | | const char *buf, size_t len, |
364 | 0 | char16_t *utf16_output) const noexcept final override { |
365 | 0 | return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); |
366 | 0 | } |
367 | | |
368 | | simdutf_warn_unused size_t convert_utf8_to_utf16be( |
369 | | const char *buf, size_t len, |
370 | 0 | char16_t *utf16_output) const noexcept final override { |
371 | 0 | return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); |
372 | 0 | } |
373 | | |
374 | | simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( |
375 | | const char *buf, size_t len, |
376 | 0 | char16_t *utf16_output) const noexcept final override { |
377 | 0 | return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, |
378 | 0 | utf16_output); |
379 | 0 | } |
380 | | |
381 | | simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( |
382 | | const char *buf, size_t len, |
383 | 0 | char16_t *utf16_output) const noexcept final override { |
384 | 0 | return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, |
385 | 0 | utf16_output); |
386 | 0 | } |
387 | | |
388 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( |
389 | | const char *buf, size_t len, |
390 | 0 | char16_t *utf16_output) const noexcept final override { |
391 | 0 | return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); |
392 | 0 | } |
393 | | |
394 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( |
395 | | const char *buf, size_t len, |
396 | 0 | char16_t *utf16_output) const noexcept final override { |
397 | 0 | return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); |
398 | 0 | } |
399 | | simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( |
400 | 0 | const char16_t *input, size_t length) const noexcept final override { |
401 | 0 | return set_best()->utf8_length_from_utf16le_with_replacement(input, length); |
402 | 0 | } |
403 | | |
404 | | simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( |
405 | 0 | const char16_t *input, size_t length) const noexcept final override { |
406 | 0 | return set_best()->utf8_length_from_utf16be_with_replacement(input, length); |
407 | 0 | } |
408 | | |
409 | | simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement( |
410 | | const char16_t *input, size_t length, |
411 | 0 | char *utf8_buffer) const noexcept final override { |
412 | 0 | return set_best()->convert_utf16le_to_utf8_with_replacement(input, length, |
413 | 0 | utf8_buffer); |
414 | 0 | } |
415 | | |
416 | | simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement( |
417 | | const char16_t *input, size_t length, |
418 | 0 | char *utf8_buffer) const noexcept final override { |
419 | 0 | return set_best()->convert_utf16be_to_utf8_with_replacement(input, length, |
420 | 0 | utf8_buffer); |
421 | 0 | } |
422 | | |
423 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
424 | | |
425 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
426 | | simdutf_warn_unused size_t |
427 | | convert_utf8_to_utf32(const char *buf, size_t len, |
428 | 0 | char32_t *utf32_output) const noexcept final override { |
429 | 0 | return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); |
430 | 0 | } |
431 | | |
432 | | simdutf_warn_unused result convert_utf8_to_utf32_with_errors( |
433 | | const char *buf, size_t len, |
434 | 0 | char32_t *utf32_output) const noexcept final override { |
435 | 0 | return set_best()->convert_utf8_to_utf32_with_errors(buf, len, |
436 | 0 | utf32_output); |
437 | 0 | } |
438 | | |
439 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf32( |
440 | | const char *buf, size_t len, |
441 | 0 | char32_t *utf32_output) const noexcept final override { |
442 | 0 | return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); |
443 | 0 | } |
444 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
445 | | |
446 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
447 | | simdutf_warn_unused size_t |
448 | | convert_utf16le_to_latin1(const char16_t *buf, size_t len, |
449 | 0 | char *latin1_output) const noexcept final override { |
450 | 0 | return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); |
451 | 0 | } |
452 | | |
453 | | simdutf_warn_unused size_t |
454 | | convert_utf16be_to_latin1(const char16_t *buf, size_t len, |
455 | 0 | char *latin1_output) const noexcept final override { |
456 | 0 | return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); |
457 | 0 | } |
458 | | |
459 | | simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( |
460 | | const char16_t *buf, size_t len, |
461 | 0 | char *latin1_output) const noexcept final override { |
462 | 0 | return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, |
463 | 0 | latin1_output); |
464 | 0 | } |
465 | | |
466 | | simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( |
467 | | const char16_t *buf, size_t len, |
468 | 0 | char *latin1_output) const noexcept final override { |
469 | 0 | return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, |
470 | 0 | latin1_output); |
471 | 0 | } |
472 | | |
473 | | simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( |
474 | | const char16_t *buf, size_t len, |
475 | 0 | char *latin1_output) const noexcept final override { |
476 | 0 | return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); |
477 | 0 | } |
478 | | |
479 | | simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( |
480 | | const char16_t *buf, size_t len, |
481 | 0 | char *latin1_output) const noexcept final override { |
482 | 0 | return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); |
483 | 0 | } |
484 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
485 | | |
486 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
487 | | simdutf_warn_unused size_t |
488 | | convert_utf16le_to_utf8(const char16_t *buf, size_t len, |
489 | 0 | char *utf8_output) const noexcept final override { |
490 | 0 | return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); |
491 | 0 | } |
492 | | |
493 | | simdutf_warn_unused size_t |
494 | | convert_utf16be_to_utf8(const char16_t *buf, size_t len, |
495 | 0 | char *utf8_output) const noexcept final override { |
496 | 0 | return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); |
497 | 0 | } |
498 | | |
499 | | simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( |
500 | | const char16_t *buf, size_t len, |
501 | 0 | char *utf8_output) const noexcept final override { |
502 | 0 | return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, |
503 | 0 | utf8_output); |
504 | 0 | } |
505 | | |
506 | | simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( |
507 | | const char16_t *buf, size_t len, |
508 | 0 | char *utf8_output) const noexcept final override { |
509 | 0 | return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, |
510 | 0 | utf8_output); |
511 | 0 | } |
512 | | |
513 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( |
514 | | const char16_t *buf, size_t len, |
515 | 0 | char *utf8_output) const noexcept final override { |
516 | 0 | return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); |
517 | 0 | } |
518 | | |
519 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( |
520 | | const char16_t *buf, size_t len, |
521 | 0 | char *utf8_output) const noexcept final override { |
522 | 0 | return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); |
523 | 0 | } |
524 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
525 | | |
526 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
527 | | simdutf_warn_unused size_t |
528 | | convert_utf32_to_latin1(const char32_t *buf, size_t len, |
529 | 0 | char *latin1_output) const noexcept final override { |
530 | 0 | return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); |
531 | 0 | } |
532 | | |
533 | | simdutf_warn_unused result convert_utf32_to_latin1_with_errors( |
534 | | const char32_t *buf, size_t len, |
535 | 0 | char *latin1_output) const noexcept final override { |
536 | 0 | return set_best()->convert_utf32_to_latin1_with_errors(buf, len, |
537 | 0 | latin1_output); |
538 | 0 | } |
539 | | |
540 | | simdutf_warn_unused size_t convert_valid_utf32_to_latin1( |
541 | | const char32_t *buf, size_t len, |
542 | 0 | char *latin1_output) const noexcept final override { |
543 | 0 | return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); |
544 | 0 | } |
545 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
546 | | |
547 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
548 | | simdutf_warn_unused size_t |
549 | | convert_utf32_to_utf8(const char32_t *buf, size_t len, |
550 | 0 | char *utf8_output) const noexcept final override { |
551 | 0 | return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); |
552 | 0 | } |
553 | | |
554 | | simdutf_warn_unused result convert_utf32_to_utf8_with_errors( |
555 | | const char32_t *buf, size_t len, |
556 | 0 | char *utf8_output) const noexcept final override { |
557 | 0 | return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); |
558 | 0 | } |
559 | | |
560 | | simdutf_warn_unused size_t |
561 | | convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, |
562 | 0 | char *utf8_output) const noexcept final override { |
563 | 0 | return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); |
564 | 0 | } |
565 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
566 | | |
567 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
568 | | simdutf_warn_unused size_t convert_utf32_to_utf16le( |
569 | | const char32_t *buf, size_t len, |
570 | 0 | char16_t *utf16_output) const noexcept final override { |
571 | 0 | return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); |
572 | 0 | } |
573 | | |
574 | | simdutf_warn_unused size_t convert_utf32_to_utf16be( |
575 | | const char32_t *buf, size_t len, |
576 | 0 | char16_t *utf16_output) const noexcept final override { |
577 | 0 | return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); |
578 | 0 | } |
579 | | |
580 | | simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( |
581 | | const char32_t *buf, size_t len, |
582 | 0 | char16_t *utf16_output) const noexcept final override { |
583 | 0 | return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, |
584 | 0 | utf16_output); |
585 | 0 | } |
586 | | |
587 | | simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( |
588 | | const char32_t *buf, size_t len, |
589 | 0 | char16_t *utf16_output) const noexcept final override { |
590 | 0 | return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, |
591 | 0 | utf16_output); |
592 | 0 | } |
593 | | |
594 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( |
595 | | const char32_t *buf, size_t len, |
596 | 0 | char16_t *utf16_output) const noexcept final override { |
597 | 0 | return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); |
598 | 0 | } |
599 | | |
600 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( |
601 | | const char32_t *buf, size_t len, |
602 | 0 | char16_t *utf16_output) const noexcept final override { |
603 | 0 | return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); |
604 | 0 | } |
605 | | |
606 | | simdutf_warn_unused size_t convert_utf16le_to_utf32( |
607 | | const char16_t *buf, size_t len, |
608 | 0 | char32_t *utf32_output) const noexcept final override { |
609 | 0 | return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); |
610 | 0 | } |
611 | | |
612 | | simdutf_warn_unused size_t convert_utf16be_to_utf32( |
613 | | const char16_t *buf, size_t len, |
614 | 0 | char32_t *utf32_output) const noexcept final override { |
615 | 0 | return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); |
616 | 0 | } |
617 | | |
618 | | simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( |
619 | | const char16_t *buf, size_t len, |
620 | 0 | char32_t *utf32_output) const noexcept final override { |
621 | 0 | return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, |
622 | 0 | utf32_output); |
623 | 0 | } |
624 | | |
625 | | simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( |
626 | | const char16_t *buf, size_t len, |
627 | 0 | char32_t *utf32_output) const noexcept final override { |
628 | 0 | return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, |
629 | 0 | utf32_output); |
630 | 0 | } |
631 | | |
632 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( |
633 | | const char16_t *buf, size_t len, |
634 | 0 | char32_t *utf32_output) const noexcept final override { |
635 | 0 | return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); |
636 | 0 | } |
637 | | |
638 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( |
639 | | const char16_t *buf, size_t len, |
640 | 0 | char32_t *utf32_output) const noexcept final override { |
641 | 0 | return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); |
642 | 0 | } |
643 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
644 | | |
645 | | #if SIMDUTF_FEATURE_UTF16 |
646 | | void change_endianness_utf16(const char16_t *buf, size_t len, |
647 | 0 | char16_t *output) const noexcept final override { |
648 | 0 | set_best()->change_endianness_utf16(buf, len, output); |
649 | 0 | } |
650 | | |
651 | | simdutf_warn_unused size_t |
652 | 0 | count_utf16le(const char16_t *buf, size_t len) const noexcept final override { |
653 | 0 | return set_best()->count_utf16le(buf, len); |
654 | 0 | } |
655 | | |
656 | | simdutf_warn_unused size_t |
657 | 0 | count_utf16be(const char16_t *buf, size_t len) const noexcept final override { |
658 | 0 | return set_best()->count_utf16be(buf, len); |
659 | 0 | } |
660 | | #endif // SIMDUTF_FEATURE_UTF16 |
661 | | |
662 | | #if SIMDUTF_FEATURE_UTF8 |
663 | | simdutf_warn_unused size_t |
664 | 0 | count_utf8(const char *buf, size_t len) const noexcept final override { |
665 | 0 | return set_best()->count_utf8(buf, len); |
666 | 0 | } |
667 | | #endif // SIMDUTF_FEATURE_UTF8 |
668 | | |
669 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
670 | | simdutf_warn_unused size_t |
671 | 0 | latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { |
672 | 0 | return set_best()->latin1_length_from_utf8(buf, len); |
673 | 0 | } |
674 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
675 | | |
676 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
677 | | simdutf_warn_unused size_t |
678 | 0 | utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { |
679 | 0 | return set_best()->utf8_length_from_latin1(buf, len); |
680 | 0 | } |
681 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
682 | | |
683 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
684 | | simdutf_warn_unused size_t utf8_length_from_utf16le( |
685 | 0 | const char16_t *buf, size_t len) const noexcept override { |
686 | 0 | return set_best()->utf8_length_from_utf16le(buf, len); |
687 | 0 | } |
688 | | |
689 | | simdutf_warn_unused size_t utf8_length_from_utf16be( |
690 | 0 | const char16_t *buf, size_t len) const noexcept override { |
691 | 0 | return set_best()->utf8_length_from_utf16be(buf, len); |
692 | 0 | } |
693 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
694 | | |
695 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
696 | | simdutf_warn_unused size_t utf32_length_from_utf16le( |
697 | 0 | const char16_t *buf, size_t len) const noexcept override { |
698 | 0 | return set_best()->utf32_length_from_utf16le(buf, len); |
699 | 0 | } |
700 | | |
701 | | simdutf_warn_unused size_t utf32_length_from_utf16be( |
702 | 0 | const char16_t *buf, size_t len) const noexcept override { |
703 | 0 | return set_best()->utf32_length_from_utf16be(buf, len); |
704 | 0 | } |
705 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
706 | | |
707 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
708 | | simdutf_warn_unused size_t |
709 | 0 | utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { |
710 | 0 | return set_best()->utf16_length_from_utf8(buf, len); |
711 | 0 | } |
712 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
713 | | |
714 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
715 | | simdutf_warn_unused size_t utf8_length_from_utf32( |
716 | 0 | const char32_t *buf, size_t len) const noexcept override { |
717 | 0 | return set_best()->utf8_length_from_utf32(buf, len); |
718 | 0 | } |
719 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
720 | | |
721 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
722 | | simdutf_warn_unused size_t utf16_length_from_utf32( |
723 | 0 | const char32_t *buf, size_t len) const noexcept override { |
724 | 0 | return set_best()->utf16_length_from_utf32(buf, len); |
725 | 0 | } |
726 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
727 | | |
728 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
729 | | simdutf_warn_unused size_t |
730 | 0 | utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { |
731 | 0 | return set_best()->utf32_length_from_utf8(buf, len); |
732 | 0 | } |
733 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
734 | | |
735 | | #if SIMDUTF_FEATURE_BASE64 |
736 | | simdutf_warn_unused result base64_to_binary( |
737 | | const char *input, size_t length, char *output, base64_options options, |
738 | | last_chunk_handling_options last_chunk_handling_options = |
739 | 0 | last_chunk_handling_options::loose) const noexcept override { |
740 | 0 | return set_best()->base64_to_binary(input, length, output, options, |
741 | 0 | last_chunk_handling_options); |
742 | 0 | } |
743 | | |
744 | | simdutf_warn_unused full_result base64_to_binary_details( |
745 | | const char *input, size_t length, char *output, base64_options options, |
746 | | last_chunk_handling_options last_chunk_handling_options = |
747 | 0 | last_chunk_handling_options::loose) const noexcept override { |
748 | 0 | return set_best()->base64_to_binary_details(input, length, output, options, |
749 | 0 | last_chunk_handling_options); |
750 | 0 | } |
751 | | |
752 | | simdutf_warn_unused result base64_to_binary( |
753 | | const char16_t *input, size_t length, char *output, |
754 | | base64_options options, |
755 | | last_chunk_handling_options last_chunk_handling_options = |
756 | 0 | last_chunk_handling_options::loose) const noexcept override { |
757 | 0 | return set_best()->base64_to_binary(input, length, output, options, |
758 | 0 | last_chunk_handling_options); |
759 | 0 | } |
760 | | |
761 | | simdutf_warn_unused full_result base64_to_binary_details( |
762 | | const char16_t *input, size_t length, char *output, |
763 | | base64_options options, |
764 | | last_chunk_handling_options last_chunk_handling_options = |
765 | 0 | last_chunk_handling_options::loose) const noexcept override { |
766 | 0 | return set_best()->base64_to_binary_details(input, length, output, options, |
767 | 0 | last_chunk_handling_options); |
768 | 0 | } |
769 | | |
770 | | size_t binary_to_base64(const char *input, size_t length, char *output, |
771 | 0 | base64_options options) const noexcept override { |
772 | 0 | return set_best()->binary_to_base64(input, length, output, options); |
773 | 0 | } |
774 | | |
775 | | size_t |
776 | | binary_to_base64_with_lines(const char *input, size_t length, char *output, |
777 | | size_t line_length, |
778 | 0 | base64_options options) const noexcept override { |
779 | 0 | return set_best()->binary_to_base64_with_lines(input, length, output, |
780 | 0 | line_length, options); |
781 | 0 | } |
782 | | |
783 | | const char *find(const char *start, const char *end, |
784 | 0 | char character) const noexcept override { |
785 | 0 | return set_best()->find(start, end, character); |
786 | 0 | } |
787 | | |
788 | | const char16_t *find(const char16_t *start, const char16_t *end, |
789 | 0 | char16_t character) const noexcept override { |
790 | 0 | return set_best()->find(start, end, character); |
791 | 0 | } |
792 | | #endif // SIMDUTF_FEATURE_BASE64 |
793 | | |
794 | | simdutf_really_inline |
795 | | detect_best_supported_implementation_on_first_use() noexcept |
796 | 0 | : implementation("best_supported_detector", |
797 | 0 | "Detects the best supported implementation and sets it", |
798 | 0 | 0) {} |
799 | | |
800 | | private: |
801 | | const implementation *set_best() const noexcept; |
802 | | }; |
803 | | |
804 | | static_assert(std::is_trivially_destructible< |
805 | | detect_best_supported_implementation_on_first_use>::value, |
806 | | "detect_best_supported_implementation_on_first_use should be " |
807 | | "trivially destructible"); |
808 | | |
809 | | static const std::initializer_list<const implementation *> & |
810 | 2 | get_available_implementation_pointers() { |
811 | 2 | static const std::initializer_list<const implementation *> |
812 | 2 | available_implementation_pointers{ |
813 | 2 | #if SIMDUTF_IMPLEMENTATION_ICELAKE |
814 | 2 | get_icelake_singleton(), |
815 | 2 | #endif |
816 | 2 | #if SIMDUTF_IMPLEMENTATION_HASWELL |
817 | 2 | get_haswell_singleton(), |
818 | 2 | #endif |
819 | 2 | #if SIMDUTF_IMPLEMENTATION_WESTMERE |
820 | 2 | get_westmere_singleton(), |
821 | 2 | #endif |
822 | | #if SIMDUTF_IMPLEMENTATION_ARM64 |
823 | | get_arm64_singleton(), |
824 | | #endif |
825 | | #if SIMDUTF_IMPLEMENTATION_PPC64 |
826 | | get_ppc64_singleton(), |
827 | | #endif |
828 | | #if SIMDUTF_IMPLEMENTATION_RVV |
829 | | get_rvv_singleton(), |
830 | | #endif |
831 | | #if SIMDUTF_IMPLEMENTATION_LASX |
832 | | get_lasx_singleton(), |
833 | | #endif |
834 | | #if SIMDUTF_IMPLEMENTATION_LSX |
835 | | get_lsx_singleton(), |
836 | | #endif |
837 | 2 | #if SIMDUTF_IMPLEMENTATION_FALLBACK |
838 | 2 | get_fallback_singleton(), |
839 | 2 | #endif |
840 | 2 | }; // available_implementation_pointers |
841 | 2 | return available_implementation_pointers; |
842 | 2 | } |
843 | | |
844 | | // So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no |
845 | | // support |
846 | | class unsupported_implementation final : public implementation { |
847 | | public: |
848 | | #if SIMDUTF_FEATURE_DETECT_ENCODING |
849 | | simdutf_warn_unused int detect_encodings(const char *, |
850 | 0 | size_t) const noexcept override { |
851 | 0 | return encoding_type::unspecified; |
852 | 0 | } |
853 | | #endif // SIMDUTF_FEATURE_DETECT_ENCODING |
854 | | |
855 | | #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING |
856 | | simdutf_warn_unused bool validate_utf8(const char *, |
857 | 0 | size_t) const noexcept final override { |
858 | 0 | return false; // Just refuse to validate. Given that we have a fallback |
859 | | // implementation |
860 | | // it seems unlikely that unsupported_implementation will ever be used. If |
861 | | // it is used, then it will flag all strings as invalid. The alternative is |
862 | | // to return an error_code from which the user has to figure out whether the |
863 | | // string is valid UTF-8... which seems like a lot of work just to handle |
864 | | // the very unlikely case that we have an unsupported implementation. And, |
865 | | // when it does happen (that we have an unsupported implementation), what |
866 | | // are the chances that the programmer has a fallback? Given that *we* |
867 | | // provide the fallback, it implies that the programmer would need a |
868 | | // fallback for our fallback. |
869 | 0 | } |
870 | | #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING |
871 | | |
872 | | #if SIMDUTF_FEATURE_UTF8 |
873 | | simdutf_warn_unused result validate_utf8_with_errors( |
874 | 0 | const char *, size_t) const noexcept final override { |
875 | 0 | return result(error_code::OTHER, 0); |
876 | 0 | } |
877 | | #endif // SIMDUTF_FEATURE_UTF8 |
878 | | |
879 | | #if SIMDUTF_FEATURE_ASCII |
880 | | simdutf_warn_unused bool |
881 | 0 | validate_ascii(const char *, size_t) const noexcept final override { |
882 | 0 | return false; |
883 | 0 | } |
884 | | |
885 | | simdutf_warn_unused result validate_ascii_with_errors( |
886 | 0 | const char *, size_t) const noexcept final override { |
887 | 0 | return result(error_code::OTHER, 0); |
888 | 0 | } |
889 | | #endif // SIMDUTF_FEATURE_ASCII |
890 | | |
891 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
892 | | simdutf_warn_unused bool |
893 | | validate_utf16le_as_ascii(const char16_t *, |
894 | 0 | size_t) const noexcept final override { |
895 | 0 | return false; |
896 | 0 | } |
897 | | |
898 | | simdutf_warn_unused bool |
899 | | validate_utf16be_as_ascii(const char16_t *, |
900 | 0 | size_t) const noexcept final override { |
901 | 0 | return false; |
902 | 0 | } |
903 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
904 | | |
905 | | #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
906 | | simdutf_warn_unused bool |
907 | 0 | validate_utf16le(const char16_t *, size_t) const noexcept final override { |
908 | 0 | return false; |
909 | 0 | } |
910 | | #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
911 | | |
912 | | #if SIMDUTF_FEATURE_UTF16 |
913 | | simdutf_warn_unused bool |
914 | 0 | validate_utf16be(const char16_t *, size_t) const noexcept final override { |
915 | 0 | return false; |
916 | 0 | } |
917 | | |
918 | | simdutf_warn_unused result validate_utf16le_with_errors( |
919 | 0 | const char16_t *, size_t) const noexcept final override { |
920 | 0 | return result(error_code::OTHER, 0); |
921 | 0 | } |
922 | | |
923 | | simdutf_warn_unused result validate_utf16be_with_errors( |
924 | 0 | const char16_t *, size_t) const noexcept final override { |
925 | 0 | return result(error_code::OTHER, 0); |
926 | 0 | } |
927 | | void to_well_formed_utf16be(const char16_t *, size_t, |
928 | 0 | char16_t *) const noexcept final override {} |
929 | | void to_well_formed_utf16le(const char16_t *, size_t, |
930 | 0 | char16_t *) const noexcept final override {} |
931 | | #endif // SIMDUTF_FEATURE_UTF16 |
932 | | |
933 | | #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING |
934 | | simdutf_warn_unused bool |
935 | 0 | validate_utf32(const char32_t *, size_t) const noexcept final override { |
936 | 0 | return false; |
937 | 0 | } |
938 | | #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING |
939 | | |
940 | | #if SIMDUTF_FEATURE_UTF32 |
941 | | simdutf_warn_unused result validate_utf32_with_errors( |
942 | 0 | const char32_t *, size_t) const noexcept final override { |
943 | 0 | return result(error_code::OTHER, 0); |
944 | 0 | } |
945 | | #endif // SIMDUTF_FEATURE_UTF32 |
946 | | |
947 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
948 | | simdutf_warn_unused size_t convert_latin1_to_utf8( |
949 | 0 | const char *, size_t, char *) const noexcept final override { |
950 | 0 | return 0; |
951 | 0 | } |
952 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
953 | | |
954 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
955 | | simdutf_warn_unused size_t convert_latin1_to_utf16le( |
956 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
957 | 0 | return 0; |
958 | 0 | } |
959 | | |
960 | | simdutf_warn_unused size_t convert_latin1_to_utf16be( |
961 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
962 | 0 | return 0; |
963 | 0 | } |
964 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
965 | | |
966 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
967 | | simdutf_warn_unused size_t convert_latin1_to_utf32( |
968 | 0 | const char *, size_t, char32_t *) const noexcept final override { |
969 | 0 | return 0; |
970 | 0 | } |
971 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
972 | | |
973 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
974 | | simdutf_warn_unused size_t convert_utf8_to_latin1( |
975 | 0 | const char *, size_t, char *) const noexcept final override { |
976 | 0 | return 0; |
977 | 0 | } |
978 | | |
979 | | simdutf_warn_unused result convert_utf8_to_latin1_with_errors( |
980 | 0 | const char *, size_t, char *) const noexcept final override { |
981 | 0 | return result(error_code::OTHER, 0); |
982 | 0 | } |
983 | | |
984 | | simdutf_warn_unused size_t convert_valid_utf8_to_latin1( |
985 | 0 | const char *, size_t, char *) const noexcept final override { |
986 | 0 | return 0; |
987 | 0 | } |
988 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
989 | | |
990 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
991 | | simdutf_warn_unused size_t convert_utf8_to_utf16le( |
992 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
993 | 0 | return 0; |
994 | 0 | } |
995 | | |
996 | | simdutf_warn_unused size_t convert_utf8_to_utf16be( |
997 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
998 | 0 | return 0; |
999 | 0 | } |
1000 | | |
1001 | | simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( |
1002 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
1003 | 0 | return result(error_code::OTHER, 0); |
1004 | 0 | } |
1005 | | |
1006 | | simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( |
1007 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
1008 | 0 | return result(error_code::OTHER, 0); |
1009 | 0 | } |
1010 | | |
1011 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( |
1012 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
1013 | 0 | return 0; |
1014 | 0 | } |
1015 | | |
1016 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( |
1017 | 0 | const char *, size_t, char16_t *) const noexcept final override { |
1018 | 0 | return 0; |
1019 | 0 | } |
1020 | | simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( |
1021 | 0 | const char16_t *, size_t) const noexcept final override { |
1022 | 0 | return {OTHER, 0}; // Not supported |
1023 | 0 | } |
1024 | | |
1025 | | simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( |
1026 | 0 | const char16_t *, size_t) const noexcept final override { |
1027 | 0 | return {OTHER, 0}; // Not supported |
1028 | 0 | } |
1029 | | |
1030 | | simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement( |
1031 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1032 | 0 | return 0; // Not supported |
1033 | 0 | } |
1034 | | |
1035 | | simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement( |
1036 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1037 | 0 | return 0; // Not supported |
1038 | 0 | } |
1039 | | |
1040 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1041 | | |
1042 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1043 | | simdutf_warn_unused size_t convert_utf8_to_utf32( |
1044 | 0 | const char *, size_t, char32_t *) const noexcept final override { |
1045 | 0 | return 0; |
1046 | 0 | } |
1047 | | |
1048 | | simdutf_warn_unused result convert_utf8_to_utf32_with_errors( |
1049 | 0 | const char *, size_t, char32_t *) const noexcept final override { |
1050 | 0 | return result(error_code::OTHER, 0); |
1051 | 0 | } |
1052 | | |
1053 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf32( |
1054 | 0 | const char *, size_t, char32_t *) const noexcept final override { |
1055 | 0 | return 0; |
1056 | 0 | } |
1057 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1058 | | |
1059 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1060 | | simdutf_warn_unused size_t convert_utf16le_to_latin1( |
1061 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1062 | 0 | return 0; |
1063 | 0 | } |
1064 | | |
1065 | | simdutf_warn_unused size_t convert_utf16be_to_latin1( |
1066 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1067 | 0 | return 0; |
1068 | 0 | } |
1069 | | |
1070 | | simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( |
1071 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1072 | 0 | return result(error_code::OTHER, 0); |
1073 | 0 | } |
1074 | | |
1075 | | simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( |
1076 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1077 | 0 | return result(error_code::OTHER, 0); |
1078 | 0 | } |
1079 | | |
1080 | | simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( |
1081 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1082 | 0 | return 0; |
1083 | 0 | } |
1084 | | |
1085 | | simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( |
1086 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1087 | 0 | return 0; |
1088 | 0 | } |
1089 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1090 | | |
1091 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1092 | | simdutf_warn_unused size_t convert_utf16le_to_utf8( |
1093 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1094 | 0 | return 0; |
1095 | 0 | } |
1096 | | |
1097 | | simdutf_warn_unused size_t convert_utf16be_to_utf8( |
1098 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1099 | 0 | return 0; |
1100 | 0 | } |
1101 | | |
1102 | | simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( |
1103 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1104 | 0 | return result(error_code::OTHER, 0); |
1105 | 0 | } |
1106 | | |
1107 | | simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( |
1108 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1109 | 0 | return result(error_code::OTHER, 0); |
1110 | 0 | } |
1111 | | |
1112 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( |
1113 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1114 | 0 | return 0; |
1115 | 0 | } |
1116 | | |
1117 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( |
1118 | 0 | const char16_t *, size_t, char *) const noexcept final override { |
1119 | 0 | return 0; |
1120 | 0 | } |
1121 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1122 | | |
1123 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
1124 | | simdutf_warn_unused size_t convert_utf32_to_latin1( |
1125 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1126 | 0 | return 0; |
1127 | 0 | } |
1128 | | |
1129 | | simdutf_warn_unused result convert_utf32_to_latin1_with_errors( |
1130 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1131 | 0 | return result(error_code::OTHER, 0); |
1132 | 0 | } |
1133 | | |
1134 | | simdutf_warn_unused size_t convert_valid_utf32_to_latin1( |
1135 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1136 | 0 | return 0; |
1137 | 0 | } |
1138 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
1139 | | |
1140 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1141 | | simdutf_warn_unused size_t convert_utf32_to_utf8( |
1142 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1143 | 0 | return 0; |
1144 | 0 | } |
1145 | | |
1146 | | simdutf_warn_unused result convert_utf32_to_utf8_with_errors( |
1147 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1148 | 0 | return result(error_code::OTHER, 0); |
1149 | 0 | } |
1150 | | |
1151 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf8( |
1152 | 0 | const char32_t *, size_t, char *) const noexcept final override { |
1153 | 0 | return 0; |
1154 | 0 | } |
1155 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1156 | | |
1157 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1158 | | simdutf_warn_unused size_t convert_utf32_to_utf16le( |
1159 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1160 | 0 | return 0; |
1161 | 0 | } |
1162 | | |
1163 | | simdutf_warn_unused size_t convert_utf32_to_utf16be( |
1164 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1165 | 0 | return 0; |
1166 | 0 | } |
1167 | | |
1168 | | simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( |
1169 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1170 | 0 | return result(error_code::OTHER, 0); |
1171 | 0 | } |
1172 | | |
1173 | | simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( |
1174 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1175 | 0 | return result(error_code::OTHER, 0); |
1176 | 0 | } |
1177 | | |
1178 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( |
1179 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1180 | 0 | return 0; |
1181 | 0 | } |
1182 | | |
1183 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( |
1184 | 0 | const char32_t *, size_t, char16_t *) const noexcept final override { |
1185 | 0 | return 0; |
1186 | 0 | } |
1187 | | |
1188 | | simdutf_warn_unused size_t convert_utf16le_to_utf32( |
1189 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1190 | 0 | return 0; |
1191 | 0 | } |
1192 | | |
1193 | | simdutf_warn_unused size_t convert_utf16be_to_utf32( |
1194 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1195 | 0 | return 0; |
1196 | 0 | } |
1197 | | |
1198 | | simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( |
1199 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1200 | 0 | return result(error_code::OTHER, 0); |
1201 | 0 | } |
1202 | | |
1203 | | simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( |
1204 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1205 | 0 | return result(error_code::OTHER, 0); |
1206 | 0 | } |
1207 | | |
1208 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( |
1209 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1210 | 0 | return 0; |
1211 | 0 | } |
1212 | | |
1213 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( |
1214 | 0 | const char16_t *, size_t, char32_t *) const noexcept final override { |
1215 | 0 | return 0; |
1216 | 0 | } |
1217 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1218 | | |
1219 | | #if SIMDUTF_FEATURE_UTF16 |
1220 | | void change_endianness_utf16(const char16_t *, size_t, |
1221 | 0 | char16_t *) const noexcept final override {} |
1222 | | |
1223 | | simdutf_warn_unused size_t |
1224 | 0 | count_utf16le(const char16_t *, size_t) const noexcept final override { |
1225 | 0 | return 0; |
1226 | 0 | } |
1227 | | |
1228 | | simdutf_warn_unused size_t |
1229 | 0 | count_utf16be(const char16_t *, size_t) const noexcept final override { |
1230 | 0 | return 0; |
1231 | 0 | } |
1232 | | #endif // SIMDUTF_FEATURE_UTF16 |
1233 | | |
1234 | | #if SIMDUTF_FEATURE_UTF8 |
1235 | | simdutf_warn_unused size_t count_utf8(const char *, |
1236 | 0 | size_t) const noexcept final override { |
1237 | 0 | return 0; |
1238 | 0 | } |
1239 | | #endif // SIMDUTF_FEATURE_UTF8 |
1240 | | |
1241 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1242 | | simdutf_warn_unused size_t |
1243 | 0 | latin1_length_from_utf8(const char *, size_t) const noexcept override { |
1244 | 0 | return 0; |
1245 | 0 | } |
1246 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1247 | | |
1248 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1249 | | simdutf_warn_unused size_t |
1250 | 0 | utf8_length_from_latin1(const char *, size_t) const noexcept override { |
1251 | 0 | return 0; |
1252 | 0 | } |
1253 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1254 | | |
1255 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1256 | | simdutf_warn_unused size_t |
1257 | 0 | utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { |
1258 | 0 | return 0; |
1259 | 0 | } |
1260 | | |
1261 | | simdutf_warn_unused size_t |
1262 | 0 | utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { |
1263 | 0 | return 0; |
1264 | 0 | } |
1265 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1266 | | |
1267 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1268 | | simdutf_warn_unused size_t |
1269 | 0 | utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { |
1270 | 0 | return 0; |
1271 | 0 | } |
1272 | | |
1273 | | simdutf_warn_unused size_t |
1274 | 0 | utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { |
1275 | 0 | return 0; |
1276 | 0 | } |
1277 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1278 | | |
1279 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1280 | | simdutf_warn_unused size_t |
1281 | 0 | utf16_length_from_utf8(const char *, size_t) const noexcept override { |
1282 | 0 | return 0; |
1283 | 0 | } |
1284 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1285 | | |
1286 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1287 | | simdutf_warn_unused size_t |
1288 | 0 | utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { |
1289 | 0 | return 0; |
1290 | 0 | } |
1291 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1292 | | |
1293 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1294 | | simdutf_warn_unused size_t |
1295 | 0 | utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { |
1296 | 0 | return 0; |
1297 | 0 | } |
1298 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
1299 | | |
1300 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1301 | | simdutf_warn_unused size_t |
1302 | 0 | utf32_length_from_utf8(const char *, size_t) const noexcept override { |
1303 | 0 | return 0; |
1304 | 0 | } |
1305 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1306 | | |
1307 | | #if SIMDUTF_FEATURE_BASE64 |
1308 | | simdutf_warn_unused result |
1309 | | base64_to_binary(const char *, size_t, char *, base64_options, |
1310 | 0 | last_chunk_handling_options) const noexcept override { |
1311 | 0 | return result(error_code::OTHER, 0); |
1312 | 0 | } |
1313 | | |
1314 | | simdutf_warn_unused full_result base64_to_binary_details( |
1315 | | const char *, size_t, char *, base64_options, |
1316 | 0 | last_chunk_handling_options) const noexcept override { |
1317 | 0 | return full_result(error_code::OTHER, 0, 0); |
1318 | 0 | } |
1319 | | |
1320 | | simdutf_warn_unused result |
1321 | | base64_to_binary(const char16_t *, size_t, char *, base64_options, |
1322 | 0 | last_chunk_handling_options) const noexcept override { |
1323 | 0 | return result(error_code::OTHER, 0); |
1324 | 0 | } |
1325 | | |
1326 | | simdutf_warn_unused full_result base64_to_binary_details( |
1327 | | const char16_t *, size_t, char *, base64_options, |
1328 | 0 | last_chunk_handling_options) const noexcept override { |
1329 | 0 | return full_result(error_code::OTHER, 0, 0); |
1330 | 0 | } |
1331 | | |
1332 | | size_t binary_to_base64(const char *, size_t, char *, |
1333 | 0 | base64_options) const noexcept override { |
1334 | 0 | return 0; |
1335 | 0 | } |
1336 | | size_t binary_to_base64_with_lines(const char *, size_t, char *, size_t, |
1337 | 0 | base64_options) const noexcept override { |
1338 | 0 | return 0; |
1339 | 0 | } |
1340 | 0 | const char *find(const char *, const char *, char) const noexcept override { |
1341 | 0 | return nullptr; |
1342 | 0 | } |
1343 | | const char16_t *find(const char16_t *, const char16_t *, |
1344 | 0 | char16_t) const noexcept override { |
1345 | 0 | return nullptr; |
1346 | 0 | } |
1347 | | #endif // SIMDUTF_FEATURE_BASE64 |
1348 | | |
1349 | | unsupported_implementation() |
1350 | 0 | : implementation("unsupported", |
1351 | 0 | "Unsupported CPU (no detected SIMD instructions)", 0) {} |
1352 | | }; |
1353 | | |
1354 | 0 | const unsupported_implementation *get_unsupported_singleton() { |
1355 | 0 | static const unsupported_implementation unsupported_singleton{}; |
1356 | 0 | return &unsupported_singleton; |
1357 | 0 | } |
1358 | | static_assert(std::is_trivially_destructible<unsupported_implementation>::value, |
1359 | | "unsupported_singleton should be trivially destructible"); |
1360 | | |
1361 | 0 | size_t available_implementation_list::size() const noexcept { |
1362 | 0 | return internal::get_available_implementation_pointers().size(); |
1363 | 0 | } |
1364 | | const implementation *const * |
1365 | 1 | available_implementation_list::begin() const noexcept { |
1366 | 1 | return internal::get_available_implementation_pointers().begin(); |
1367 | 1 | } |
1368 | | const implementation *const * |
1369 | 1 | available_implementation_list::end() const noexcept { |
1370 | 1 | return internal::get_available_implementation_pointers().end(); |
1371 | 1 | } |
1372 | | const implementation * |
1373 | 0 | available_implementation_list::detect_best_supported() const noexcept { |
1374 | | // They are prelisted in priority order, so we just go down the list |
1375 | 0 | uint32_t supported_instruction_sets = |
1376 | 0 | internal::detect_supported_architectures(); |
1377 | 0 | for (const implementation *impl : |
1378 | 0 | internal::get_available_implementation_pointers()) { |
1379 | 0 | uint32_t required_instruction_sets = impl->required_instruction_sets(); |
1380 | 0 | if ((supported_instruction_sets & required_instruction_sets) == |
1381 | 0 | required_instruction_sets) { |
1382 | 0 | return impl; |
1383 | 0 | } |
1384 | 0 | } |
1385 | 0 | return get_unsupported_singleton(); // this should never happen? |
1386 | 0 | } |
1387 | | |
1388 | | const implementation * |
1389 | 0 | detect_best_supported_implementation_on_first_use::set_best() const noexcept { |
1390 | 0 | SIMDUTF_PUSH_DISABLE_WARNINGS |
1391 | | SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: |
1392 | | // manually verified this is safe |
1393 | 0 | char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); |
1394 | 0 | SIMDUTF_POP_DISABLE_WARNINGS |
1395 | |
|
1396 | 0 | if (force_implementation_name) { |
1397 | 0 | auto force_implementation = |
1398 | 0 | get_available_implementations()[force_implementation_name]; |
1399 | 0 | if (force_implementation) { |
1400 | 0 | return get_active_implementation() = force_implementation; |
1401 | 0 | } else { |
1402 | | // Note: abort() and stderr usage within the library is forbidden. |
1403 | 0 | return get_active_implementation() = get_unsupported_singleton(); |
1404 | 0 | } |
1405 | 0 | } |
1406 | 0 | return get_active_implementation() = |
1407 | 0 | get_available_implementations().detect_best_supported(); |
1408 | 0 | } |
1409 | | |
1410 | | } // namespace internal |
1411 | | |
1412 | | /** |
1413 | | * The list of available implementations compiled into simdutf. |
1414 | | */ |
1415 | | SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & |
1416 | 1 | get_available_implementations() { |
1417 | 1 | static const internal::available_implementation_list |
1418 | 1 | available_implementations{}; |
1419 | 1 | return available_implementations; |
1420 | 1 | } |
1421 | | |
1422 | | /** |
1423 | | * The active implementation. |
1424 | | */ |
1425 | | SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> & |
1426 | 0 | get_active_implementation() { |
1427 | | #if SIMDUTF_SINGLE_IMPLEMENTATION |
1428 | | // skip runtime detection |
1429 | | static internal::atomic_ptr<const implementation> active_implementation{ |
1430 | | internal::get_single_implementation()}; |
1431 | | return active_implementation; |
1432 | | #else |
1433 | 0 | static const internal::detect_best_supported_implementation_on_first_use |
1434 | 0 | detect_best_supported_implementation_on_first_use_singleton; |
1435 | 0 | static internal::atomic_ptr<const implementation> active_implementation{ |
1436 | 0 | &detect_best_supported_implementation_on_first_use_singleton}; |
1437 | 0 | return active_implementation; |
1438 | 0 | #endif |
1439 | 0 | } |
1440 | | |
1441 | | #if SIMDUTF_SINGLE_IMPLEMENTATION |
1442 | | simdutf_really_inline const implementation *get_default_implementation() { |
1443 | | return internal::get_single_implementation(); |
1444 | | } |
1445 | | #else |
1446 | | simdutf_really_inline internal::atomic_ptr<const implementation> & |
1447 | 0 | get_default_implementation() { |
1448 | 0 | return get_active_implementation(); |
1449 | 0 | } |
1450 | | #endif |
1451 | | #define SIMDUTF_GET_CURRENT_IMPLEMENTATION |
1452 | | |
1453 | | #if SIMDUTF_FEATURE_UTF8 |
1454 | 0 | simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { |
1455 | 0 | return get_default_implementation()->validate_utf8(buf, len); |
1456 | 0 | } |
1457 | | simdutf_warn_unused result validate_utf8_with_errors(const char *buf, |
1458 | 0 | size_t len) noexcept { |
1459 | 0 | return get_default_implementation()->validate_utf8_with_errors(buf, len); |
1460 | 0 | } |
1461 | | #endif // SIMDUTF_FEATURE_UTF8 |
1462 | | |
1463 | | #if SIMDUTF_FEATURE_ASCII |
1464 | 0 | simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { |
1465 | 0 | return get_default_implementation()->validate_ascii(buf, len); |
1466 | 0 | } |
1467 | | simdutf_warn_unused result validate_ascii_with_errors(const char *buf, |
1468 | 0 | size_t len) noexcept { |
1469 | 0 | return get_default_implementation()->validate_ascii_with_errors(buf, len); |
1470 | 0 | } |
1471 | | #endif // SIMDUTF_FEATURE_ASCII |
1472 | | |
1473 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
1474 | | simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, |
1475 | 0 | size_t len) noexcept { |
1476 | 0 | return get_default_implementation()->validate_utf16le_as_ascii(buf, len); |
1477 | 0 | } |
1478 | | simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, |
1479 | 0 | size_t len) noexcept { |
1480 | 0 | return get_default_implementation()->validate_utf16be_as_ascii(buf, len); |
1481 | 0 | } |
1482 | | simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *input, |
1483 | 0 | size_t length) noexcept { |
1484 | | #if SIMDUTF_IS_BIG_ENDIAN |
1485 | | return validate_utf16be_as_ascii(input, length); |
1486 | | #else |
1487 | 0 | return validate_utf16le_as_ascii(input, length); |
1488 | 0 | #endif |
1489 | 0 | } |
1490 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII |
1491 | | |
1492 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1493 | | simdutf_warn_unused size_t convert_utf8_to_utf16( |
1494 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1495 | | #if SIMDUTF_IS_BIG_ENDIAN |
1496 | | return convert_utf8_to_utf16be(input, length, utf16_output); |
1497 | | #else |
1498 | 0 | return convert_utf8_to_utf16le(input, length, utf16_output); |
1499 | 0 | #endif |
1500 | 0 | } |
1501 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1502 | | |
1503 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1504 | | simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, |
1505 | 0 | char *utf8_output) noexcept { |
1506 | 0 | return get_default_implementation()->convert_latin1_to_utf8(buf, len, |
1507 | 0 | utf8_output); |
1508 | 0 | } |
1509 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1510 | | |
1511 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1512 | | simdutf_warn_unused size_t convert_latin1_to_utf16le( |
1513 | 0 | const char *buf, size_t len, char16_t *utf16_output) noexcept { |
1514 | 0 | return get_default_implementation()->convert_latin1_to_utf16le(buf, len, |
1515 | 0 | utf16_output); |
1516 | 0 | } |
1517 | | simdutf_warn_unused size_t convert_latin1_to_utf16be( |
1518 | 0 | const char *buf, size_t len, char16_t *utf16_output) noexcept { |
1519 | 0 | return get_default_implementation()->convert_latin1_to_utf16be(buf, len, |
1520 | 0 | utf16_output); |
1521 | 0 | } |
1522 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1523 | | |
1524 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
1525 | | simdutf_warn_unused size_t convert_latin1_to_utf32( |
1526 | 0 | const char *buf, size_t len, char32_t *latin1_output) noexcept { |
1527 | 0 | return get_default_implementation()->convert_latin1_to_utf32(buf, len, |
1528 | 0 | latin1_output); |
1529 | 0 | } |
1530 | | // moved to the header file |
1531 | | // simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept |
1532 | | // simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept |
1533 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
1534 | | |
1535 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1536 | | simdutf_warn_unused size_t convert_utf8_to_latin1( |
1537 | 0 | const char *buf, size_t len, char *latin1_output) noexcept { |
1538 | 0 | return get_default_implementation()->convert_utf8_to_latin1(buf, len, |
1539 | 0 | latin1_output); |
1540 | 0 | } |
1541 | | simdutf_warn_unused result convert_utf8_to_latin1_with_errors( |
1542 | 0 | const char *buf, size_t len, char *latin1_output) noexcept { |
1543 | 0 | return get_default_implementation()->convert_utf8_to_latin1_with_errors( |
1544 | 0 | buf, len, latin1_output); |
1545 | 0 | } |
1546 | | simdutf_warn_unused size_t convert_valid_utf8_to_latin1( |
1547 | 0 | const char *buf, size_t len, char *latin1_output) noexcept { |
1548 | 0 | return get_default_implementation()->convert_valid_utf8_to_latin1( |
1549 | 0 | buf, len, latin1_output); |
1550 | 0 | } |
1551 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
1552 | | |
1553 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1554 | | simdutf_warn_unused size_t convert_utf8_to_utf16le( |
1555 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1556 | 0 | return get_default_implementation()->convert_utf8_to_utf16le(input, length, |
1557 | 0 | utf16_output); |
1558 | 0 | } |
1559 | | simdutf_warn_unused size_t convert_utf8_to_utf16be( |
1560 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1561 | 0 | return get_default_implementation()->convert_utf8_to_utf16be(input, length, |
1562 | 0 | utf16_output); |
1563 | 0 | } |
1564 | | simdutf_warn_unused result convert_utf8_to_utf16_with_errors( |
1565 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1566 | | #if SIMDUTF_IS_BIG_ENDIAN |
1567 | | return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); |
1568 | | #else |
1569 | 0 | return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); |
1570 | 0 | #endif |
1571 | 0 | } |
1572 | | simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( |
1573 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1574 | 0 | return get_default_implementation()->convert_utf8_to_utf16le_with_errors( |
1575 | 0 | input, length, utf16_output); |
1576 | 0 | } |
1577 | | simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( |
1578 | 0 | const char *input, size_t length, char16_t *utf16_output) noexcept { |
1579 | 0 | return get_default_implementation()->convert_utf8_to_utf16be_with_errors( |
1580 | 0 | input, length, utf16_output); |
1581 | 0 | } |
1582 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1583 | | |
1584 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1585 | | simdutf_warn_unused size_t convert_utf8_to_utf32( |
1586 | 0 | const char *input, size_t length, char32_t *utf32_output) noexcept { |
1587 | 0 | return get_default_implementation()->convert_utf8_to_utf32(input, length, |
1588 | 0 | utf32_output); |
1589 | 0 | } |
1590 | | simdutf_warn_unused result convert_utf8_to_utf32_with_errors( |
1591 | 0 | const char *input, size_t length, char32_t *utf32_output) noexcept { |
1592 | 0 | return get_default_implementation()->convert_utf8_to_utf32_with_errors( |
1593 | 0 | input, length, utf32_output); |
1594 | 0 | } |
1595 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1596 | | |
1597 | | #if SIMDUTF_FEATURE_UTF16 |
1598 | | simdutf_warn_unused bool validate_utf16(const char16_t *buf, |
1599 | 0 | size_t len) noexcept { |
1600 | | #if SIMDUTF_IS_BIG_ENDIAN |
1601 | | return validate_utf16be(buf, len); |
1602 | | #else |
1603 | 0 | return validate_utf16le(buf, len); |
1604 | 0 | #endif |
1605 | 0 | } |
1606 | | void to_well_formed_utf16be(const char16_t *input, size_t len, |
1607 | 0 | char16_t *output) noexcept { |
1608 | 0 | return get_default_implementation()->to_well_formed_utf16be(input, len, |
1609 | 0 | output); |
1610 | 0 | } |
1611 | | void to_well_formed_utf16le(const char16_t *input, size_t len, |
1612 | 0 | char16_t *output) noexcept { |
1613 | 0 | return get_default_implementation()->to_well_formed_utf16le(input, len, |
1614 | 0 | output); |
1615 | 0 | } |
1616 | | void to_well_formed_utf16(const char16_t *input, size_t len, |
1617 | 0 | char16_t *output) noexcept { |
1618 | | #if SIMDUTF_IS_BIG_ENDIAN |
1619 | | to_well_formed_utf16be(input, len, output); |
1620 | | #else |
1621 | 0 | to_well_formed_utf16le(input, len, output); |
1622 | 0 | #endif |
1623 | 0 | } |
1624 | | #endif // SIMDUTF_FEATURE_UTF16 |
1625 | | |
1626 | | #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
1627 | | simdutf_warn_unused bool validate_utf16le(const char16_t *buf, |
1628 | 0 | size_t len) noexcept { |
1629 | 0 | return get_default_implementation()->validate_utf16le(buf, len); |
1630 | 0 | } |
1631 | | #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING |
1632 | | |
1633 | | #if SIMDUTF_FEATURE_BASE64 |
1634 | | #if SIMDUTF_ATOMIC_REF |
1635 | | template <typename char_type> |
1636 | | simdutf_warn_unused result atomic_base64_to_binary_safe_impl( |
1637 | | const char_type *input, size_t length, char *output, size_t &outlen, |
1638 | | base64_options options, |
1639 | | last_chunk_handling_options last_chunk_handling_options, |
1640 | 0 | bool decode_up_to_bad_char) noexcept { |
1641 | 0 | #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) |
1642 | | // We use a smaller buffer during fuzzing to more easily detect bugs. |
1643 | 0 | constexpr size_t buffer_size = 128; |
1644 | | #else |
1645 | | // Arbitrary block sizes: 4KB for input. |
1646 | | constexpr size_t buffer_size = 4096; |
1647 | | #endif |
1648 | 0 | std::array<char, buffer_size> temp_buffer; |
1649 | 0 | const char_type *const input_init = input; |
1650 | 0 | size_t actual_out = 0; |
1651 | 0 | bool last_chunk = false; |
1652 | 0 | const size_t length_init = length; |
1653 | 0 | result r; |
1654 | 0 | while (!last_chunk) { |
1655 | 0 | last_chunk |= (temp_buffer.size() >= outlen - actual_out); |
1656 | 0 | size_t temp_outlen = (std::min)(temp_buffer.size(), outlen - actual_out); |
1657 | 0 | r = base64_to_binary_safe(input, length, temp_buffer.data(), temp_outlen, |
1658 | 0 | options, last_chunk_handling_options, |
1659 | 0 | decode_up_to_bad_char); |
1660 | | // We processed r.count characters of input. |
1661 | | // We wrote temp_outlen bytes to temp_buffer. |
1662 | | // If there is no ignorable characters, |
1663 | | // we should expect that values/4.0*3 == temp_outlen, |
1664 | | // except maybe at the tail end of the string. |
1665 | | |
1666 | | // |
1667 | | // We are assuming that when r.error == error_code::OUTPUT_BUFFER_TOO_SMALL, |
1668 | | // we truncate the results so that a number of base64 characters divisible |
1669 | | // by four is processed. |
1670 | | // |
1671 | | |
1672 | | // |
1673 | | // We wrote temp_outlen bytes to temp_buffer. |
1674 | | // We need to copy them to output. |
1675 | | // Copy with relaxed atomic operations to the output |
1676 | 0 | simdutf_log_assert(temp_outlen <= outlen - actual_out, |
1677 | 0 | "Output buffer is too small"); |
1678 | 0 | simdutf_log_assert(temp_outlen <= temp_buffer.size(), |
1679 | 0 | "Output buffer is too small"); |
1680 | |
|
1681 | 0 | simdutf::scalar::memcpy_atomic_write(output + actual_out, |
1682 | 0 | temp_buffer.data(), temp_outlen); |
1683 | 0 | actual_out += temp_outlen; |
1684 | 0 | length -= r.count; |
1685 | 0 | input += r.count; |
1686 | |
|
1687 | 0 | if (r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { |
1688 | 0 | break; |
1689 | 0 | } |
1690 | 0 | } |
1691 | 0 | if (size_t(input - input_init) != length_init) { |
1692 | | // We did not process all input characters. In such case, we |
1693 | | // should not end with an ignorable character. See |
1694 | | // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 |
1695 | 0 | while (input > input_init && base64_ignorable(*(input - 1), options)) { |
1696 | 0 | --input; |
1697 | 0 | } |
1698 | 0 | } |
1699 | 0 | outlen = actual_out; |
1700 | 0 | return {r.error, size_t(input - input_init)}; |
1701 | 0 | } Unexecuted instantiation: simdutf::result simdutf::atomic_base64_to_binary_safe_impl<char>(char const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options, bool) Unexecuted instantiation: simdutf::result simdutf::atomic_base64_to_binary_safe_impl<char16_t>(char16_t const*, unsigned long, char*, unsigned long&, simdutf::base64_options, simdutf::last_chunk_handling_options, bool) |
1702 | | |
1703 | | simdutf_warn_unused result atomic_base64_to_binary_safe( |
1704 | | const char *input, size_t length, char *output, size_t &outlen, |
1705 | | base64_options options, |
1706 | | last_chunk_handling_options last_chunk_handling_options, |
1707 | 0 | bool decode_up_to_bad_char) noexcept { |
1708 | 0 | return atomic_base64_to_binary_safe_impl<char>( |
1709 | 0 | input, length, output, outlen, options, last_chunk_handling_options, |
1710 | 0 | decode_up_to_bad_char); |
1711 | 0 | } |
1712 | | simdutf_warn_unused result atomic_base64_to_binary_safe( |
1713 | | const char16_t *input, size_t length, char *output, size_t &outlen, |
1714 | | base64_options options, |
1715 | | last_chunk_handling_options last_chunk_handling_options, |
1716 | 0 | bool decode_up_to_bad_char) noexcept { |
1717 | 0 | return atomic_base64_to_binary_safe_impl<char16_t>( |
1718 | 0 | input, length, output, outlen, options, last_chunk_handling_options, |
1719 | 0 | decode_up_to_bad_char); |
1720 | 0 | } |
1721 | | #endif // SIMDUTF_ATOMIC_REF |
1722 | | |
1723 | | #endif // SIMDUTF_FEATURE_BASE64 |
1724 | | |
1725 | | #if SIMDUTF_FEATURE_UTF16 |
1726 | | simdutf_warn_unused bool validate_utf16be(const char16_t *buf, |
1727 | 0 | size_t len) noexcept { |
1728 | 0 | return get_default_implementation()->validate_utf16be(buf, len); |
1729 | 0 | } |
1730 | | simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, |
1731 | 0 | size_t len) noexcept { |
1732 | | #if SIMDUTF_IS_BIG_ENDIAN |
1733 | | return validate_utf16be_with_errors(buf, len); |
1734 | | #else |
1735 | 0 | return validate_utf16le_with_errors(buf, len); |
1736 | 0 | #endif |
1737 | 0 | } |
1738 | | simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, |
1739 | 0 | size_t len) noexcept { |
1740 | 0 | return get_default_implementation()->validate_utf16le_with_errors(buf, len); |
1741 | 0 | } |
1742 | | simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, |
1743 | 0 | size_t len) noexcept { |
1744 | 0 | return get_default_implementation()->validate_utf16be_with_errors(buf, len); |
1745 | 0 | } |
1746 | | #endif // SIMDUTF_FEATURE_UTF16 |
1747 | | |
1748 | | #if SIMDUTF_FEATURE_UTF32 |
1749 | | simdutf_warn_unused bool validate_utf32(const char32_t *buf, |
1750 | 0 | size_t len) noexcept { |
1751 | 0 | return get_default_implementation()->validate_utf32(buf, len); |
1752 | 0 | } |
1753 | | simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, |
1754 | 0 | size_t len) noexcept { |
1755 | 0 | return get_default_implementation()->validate_utf32_with_errors(buf, len); |
1756 | 0 | } |
1757 | | #endif // SIMDUTF_FEATURE_UTF32 |
1758 | | |
1759 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1760 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16( |
1761 | 0 | const char *input, size_t length, char16_t *utf16_buffer) noexcept { |
1762 | | #if SIMDUTF_IS_BIG_ENDIAN |
1763 | | return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); |
1764 | | #else |
1765 | 0 | return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); |
1766 | 0 | #endif |
1767 | 0 | } |
1768 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( |
1769 | 0 | const char *input, size_t length, char16_t *utf16_buffer) noexcept { |
1770 | 0 | return get_default_implementation()->convert_valid_utf8_to_utf16le( |
1771 | 0 | input, length, utf16_buffer); |
1772 | 0 | } |
1773 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( |
1774 | 0 | const char *input, size_t length, char16_t *utf16_buffer) noexcept { |
1775 | 0 | return get_default_implementation()->convert_valid_utf8_to_utf16be( |
1776 | 0 | input, length, utf16_buffer); |
1777 | 0 | } |
1778 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1779 | | |
1780 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1781 | | simdutf_warn_unused size_t convert_valid_utf8_to_utf32( |
1782 | 0 | const char *input, size_t length, char32_t *utf32_buffer) noexcept { |
1783 | 0 | return get_default_implementation()->convert_valid_utf8_to_utf32( |
1784 | 0 | input, length, utf32_buffer); |
1785 | 0 | } |
1786 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1787 | | |
1788 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1789 | | simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, |
1790 | | size_t len, |
1791 | 0 | char *utf8_buffer) noexcept { |
1792 | | #if SIMDUTF_IS_BIG_ENDIAN |
1793 | | return convert_utf16be_to_utf8(buf, len, utf8_buffer); |
1794 | | #else |
1795 | 0 | return convert_utf16le_to_utf8(buf, len, utf8_buffer); |
1796 | 0 | #endif |
1797 | 0 | } |
1798 | | |
1799 | | simdutf_warn_unused size_t |
1800 | | convert_utf16_to_utf8_safe(const char16_t *buf, size_t len, char *utf8_output, |
1801 | 0 | size_t utf8_len) noexcept { |
1802 | 0 | const auto start{utf8_output}; |
1803 | | // We might be able to go faster by first scanning the input buffer to |
1804 | | // determine how many char16_t characters we can read without exceeding the |
1805 | | // utf8_len. This is a one-pass algorithm that has the benefit of not |
1806 | | // requiring a first pass to determine the length. |
1807 | 0 | while (true) { |
1808 | | // The worst case for convert_utf16_to_utf8 is when you go from 1 char16_t |
1809 | | // to 3 characters of UTF-8. So we can read at most utf8_len / 3 char16_t |
1810 | | // characters. |
1811 | 0 | auto read_len = std::min(len, utf8_len / 3); |
1812 | 0 | if (read_len <= 16) { |
1813 | 0 | break; |
1814 | 0 | } |
1815 | 0 | if (read_len < len) { |
1816 | | // If we have a high surrogate at the end of the buffer, we need to |
1817 | | // either read one more char16_t or backtrack. |
1818 | 0 | if (scalar::utf16::high_surrogate(buf[read_len - 1])) { |
1819 | 0 | read_len--; |
1820 | 0 | } |
1821 | 0 | } |
1822 | 0 | if (read_len == 0) { |
1823 | | // If we cannot read anything, we are done. |
1824 | 0 | break; |
1825 | 0 | } |
1826 | 0 | const auto write_len = |
1827 | 0 | simdutf::convert_utf16_to_utf8(buf, read_len, utf8_output); |
1828 | 0 | if (write_len == 0) { |
1829 | | // There was an error in the conversion, we cannot continue. |
1830 | 0 | return 0; // indicating failure |
1831 | 0 | } |
1832 | | |
1833 | 0 | utf8_output += write_len; |
1834 | 0 | utf8_len -= write_len; |
1835 | 0 | buf += read_len; |
1836 | 0 | len -= read_len; |
1837 | 0 | } |
1838 | | #if SIMDUTF_IS_BIG_ENDIAN |
1839 | | full_result r = |
1840 | | scalar::utf16_to_utf8::convert_with_errors<endianness::BIG, true>( |
1841 | | buf, len, utf8_output, utf8_len); |
1842 | | #else |
1843 | 0 | full_result r = |
1844 | 0 | scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE, true>( |
1845 | 0 | buf, len, utf8_output, utf8_len); |
1846 | 0 | #endif |
1847 | 0 | if (r.error != error_code::SUCCESS && |
1848 | 0 | r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) { |
1849 | | // If there was an error, we return 0 to indicate failure. |
1850 | 0 | return 0; // indicating failure |
1851 | 0 | } |
1852 | 0 | return r.output_count + (utf8_output - start); |
1853 | 0 | } |
1854 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1855 | | |
1856 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1857 | | simdutf_warn_unused size_t convert_utf16_to_latin1( |
1858 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1859 | | #if SIMDUTF_IS_BIG_ENDIAN |
1860 | | return convert_utf16be_to_latin1(buf, len, latin1_buffer); |
1861 | | #else |
1862 | 0 | return convert_utf16le_to_latin1(buf, len, latin1_buffer); |
1863 | 0 | #endif |
1864 | 0 | } |
1865 | | simdutf_warn_unused size_t convert_latin1_to_utf16( |
1866 | 0 | const char *buf, size_t len, char16_t *utf16_output) noexcept { |
1867 | | #if SIMDUTF_IS_BIG_ENDIAN |
1868 | | return convert_latin1_to_utf16be(buf, len, utf16_output); |
1869 | | #else |
1870 | 0 | return convert_latin1_to_utf16le(buf, len, utf16_output); |
1871 | 0 | #endif |
1872 | 0 | } |
1873 | | simdutf_warn_unused size_t convert_utf16be_to_latin1( |
1874 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1875 | 0 | return get_default_implementation()->convert_utf16be_to_latin1(buf, len, |
1876 | 0 | latin1_buffer); |
1877 | 0 | } |
1878 | | simdutf_warn_unused size_t convert_utf16le_to_latin1( |
1879 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1880 | 0 | return get_default_implementation()->convert_utf16le_to_latin1(buf, len, |
1881 | 0 | latin1_buffer); |
1882 | 0 | } |
1883 | | simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( |
1884 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1885 | 0 | return get_default_implementation()->convert_valid_utf16be_to_latin1( |
1886 | 0 | buf, len, latin1_buffer); |
1887 | 0 | } |
1888 | | simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( |
1889 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1890 | 0 | return get_default_implementation()->convert_valid_utf16le_to_latin1( |
1891 | 0 | buf, len, latin1_buffer); |
1892 | 0 | } |
1893 | | simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( |
1894 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1895 | 0 | return get_default_implementation()->convert_utf16le_to_latin1_with_errors( |
1896 | 0 | buf, len, latin1_buffer); |
1897 | 0 | } |
1898 | | simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( |
1899 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1900 | 0 | return get_default_implementation()->convert_utf16be_to_latin1_with_errors( |
1901 | 0 | buf, len, latin1_buffer); |
1902 | 0 | } |
1903 | | // moved to header file |
1904 | | // simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept |
1905 | | // simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept |
1906 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1907 | | |
1908 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1909 | | simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, |
1910 | | size_t len, |
1911 | 0 | char *utf8_buffer) noexcept { |
1912 | 0 | return get_default_implementation()->convert_utf16le_to_utf8(buf, len, |
1913 | 0 | utf8_buffer); |
1914 | 0 | } |
1915 | | simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, |
1916 | | size_t len, |
1917 | 0 | char *utf8_buffer) noexcept { |
1918 | 0 | return get_default_implementation()->convert_utf16be_to_utf8(buf, len, |
1919 | 0 | utf8_buffer); |
1920 | 0 | } |
1921 | | simdutf_warn_unused result convert_utf16_to_utf8_with_errors( |
1922 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1923 | | #if SIMDUTF_IS_BIG_ENDIAN |
1924 | | return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); |
1925 | | #else |
1926 | 0 | return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); |
1927 | 0 | #endif |
1928 | 0 | } |
1929 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1930 | | |
1931 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1932 | | simdutf_warn_unused result convert_utf16_to_latin1_with_errors( |
1933 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1934 | | #if SIMDUTF_IS_BIG_ENDIAN |
1935 | | return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); |
1936 | | #else |
1937 | 0 | return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); |
1938 | 0 | #endif |
1939 | 0 | } |
1940 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1941 | | |
1942 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1943 | | simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( |
1944 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1945 | 0 | return get_default_implementation()->convert_utf16le_to_utf8_with_errors( |
1946 | 0 | buf, len, utf8_buffer); |
1947 | 0 | } |
1948 | | simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( |
1949 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1950 | 0 | return get_default_implementation()->convert_utf16be_to_utf8_with_errors( |
1951 | 0 | buf, len, utf8_buffer); |
1952 | 0 | } |
1953 | | simdutf_warn_unused size_t convert_valid_utf16_to_utf8( |
1954 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1955 | | #if SIMDUTF_IS_BIG_ENDIAN |
1956 | | return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); |
1957 | | #else |
1958 | 0 | return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); |
1959 | 0 | #endif |
1960 | 0 | } |
1961 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1962 | | |
1963 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1964 | | simdutf_warn_unused size_t convert_valid_utf16_to_latin1( |
1965 | 0 | const char16_t *buf, size_t len, char *latin1_buffer) noexcept { |
1966 | | #if SIMDUTF_IS_BIG_ENDIAN |
1967 | | return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); |
1968 | | #else |
1969 | 0 | return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); |
1970 | 0 | #endif |
1971 | 0 | } |
1972 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 |
1973 | | |
1974 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1975 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( |
1976 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1977 | 0 | return get_default_implementation()->convert_valid_utf16le_to_utf8( |
1978 | 0 | buf, len, utf8_buffer); |
1979 | 0 | } |
1980 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( |
1981 | 0 | const char16_t *buf, size_t len, char *utf8_buffer) noexcept { |
1982 | 0 | return get_default_implementation()->convert_valid_utf16be_to_utf8( |
1983 | 0 | buf, len, utf8_buffer); |
1984 | 0 | } |
1985 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
1986 | | |
1987 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
1988 | | simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, |
1989 | | size_t len, |
1990 | 0 | char *utf8_buffer) noexcept { |
1991 | 0 | return get_default_implementation()->convert_utf32_to_utf8(buf, len, |
1992 | 0 | utf8_buffer); |
1993 | 0 | } |
1994 | | simdutf_warn_unused result convert_utf32_to_utf8_with_errors( |
1995 | 0 | const char32_t *buf, size_t len, char *utf8_buffer) noexcept { |
1996 | 0 | return get_default_implementation()->convert_utf32_to_utf8_with_errors( |
1997 | 0 | buf, len, utf8_buffer); |
1998 | 0 | } |
1999 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf8( |
2000 | 0 | const char32_t *buf, size_t len, char *utf8_buffer) noexcept { |
2001 | 0 | return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, |
2002 | 0 | utf8_buffer); |
2003 | 0 | } |
2004 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
2005 | | |
2006 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2007 | | simdutf_warn_unused size_t convert_utf32_to_utf16( |
2008 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2009 | | #if SIMDUTF_IS_BIG_ENDIAN |
2010 | | return convert_utf32_to_utf16be(buf, len, utf16_buffer); |
2011 | | #else |
2012 | 0 | return convert_utf32_to_utf16le(buf, len, utf16_buffer); |
2013 | 0 | #endif |
2014 | 0 | } |
2015 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2016 | | |
2017 | | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
2018 | | simdutf_warn_unused size_t convert_utf32_to_latin1( |
2019 | 0 | const char32_t *input, size_t length, char *latin1_output) noexcept { |
2020 | 0 | return get_default_implementation()->convert_utf32_to_latin1(input, length, |
2021 | 0 | latin1_output); |
2022 | 0 | } |
2023 | | simdutf_warn_unused result convert_utf32_to_latin1_with_errors( |
2024 | 0 | const char32_t *input, size_t length, char *latin1_buffer) noexcept { |
2025 | 0 | return get_default_implementation()->convert_utf32_to_latin1_with_errors( |
2026 | 0 | input, length, latin1_buffer); |
2027 | 0 | } |
2028 | | simdutf_warn_unused size_t convert_valid_utf32_to_latin1( |
2029 | 0 | const char32_t *input, size_t length, char *latin1_buffer) noexcept { |
2030 | 0 | return get_default_implementation()->convert_valid_utf32_to_latin1( |
2031 | 0 | input, length, latin1_buffer); |
2032 | 0 | } |
2033 | | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 |
2034 | | |
2035 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2036 | | simdutf_warn_unused size_t convert_utf32_to_utf16le( |
2037 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2038 | 0 | return get_default_implementation()->convert_utf32_to_utf16le(buf, len, |
2039 | 0 | utf16_buffer); |
2040 | 0 | } |
2041 | | simdutf_warn_unused size_t convert_utf32_to_utf16be( |
2042 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2043 | 0 | return get_default_implementation()->convert_utf32_to_utf16be(buf, len, |
2044 | 0 | utf16_buffer); |
2045 | 0 | } |
2046 | | simdutf_warn_unused result convert_utf32_to_utf16_with_errors( |
2047 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2048 | | #if SIMDUTF_IS_BIG_ENDIAN |
2049 | | return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); |
2050 | | #else |
2051 | 0 | return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); |
2052 | 0 | #endif |
2053 | 0 | } |
2054 | | simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( |
2055 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2056 | 0 | return get_default_implementation()->convert_utf32_to_utf16le_with_errors( |
2057 | 0 | buf, len, utf16_buffer); |
2058 | 0 | } |
2059 | | simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( |
2060 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2061 | 0 | return get_default_implementation()->convert_utf32_to_utf16be_with_errors( |
2062 | 0 | buf, len, utf16_buffer); |
2063 | 0 | } |
2064 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16( |
2065 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2066 | | #if SIMDUTF_IS_BIG_ENDIAN |
2067 | | return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); |
2068 | | #else |
2069 | 0 | return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); |
2070 | 0 | #endif |
2071 | 0 | } |
2072 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( |
2073 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2074 | 0 | return get_default_implementation()->convert_valid_utf32_to_utf16le( |
2075 | 0 | buf, len, utf16_buffer); |
2076 | 0 | } |
2077 | | simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( |
2078 | 0 | const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { |
2079 | 0 | return get_default_implementation()->convert_valid_utf32_to_utf16be( |
2080 | 0 | buf, len, utf16_buffer); |
2081 | 0 | } |
2082 | | simdutf_warn_unused size_t convert_utf16_to_utf32( |
2083 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2084 | | #if SIMDUTF_IS_BIG_ENDIAN |
2085 | | return convert_utf16be_to_utf32(buf, len, utf32_buffer); |
2086 | | #else |
2087 | 0 | return convert_utf16le_to_utf32(buf, len, utf32_buffer); |
2088 | 0 | #endif |
2089 | 0 | } |
2090 | | simdutf_warn_unused size_t convert_utf16le_to_utf32( |
2091 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2092 | 0 | return get_default_implementation()->convert_utf16le_to_utf32(buf, len, |
2093 | 0 | utf32_buffer); |
2094 | 0 | } |
2095 | | simdutf_warn_unused size_t convert_utf16be_to_utf32( |
2096 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2097 | 0 | return get_default_implementation()->convert_utf16be_to_utf32(buf, len, |
2098 | 0 | utf32_buffer); |
2099 | 0 | } |
2100 | | simdutf_warn_unused result convert_utf16_to_utf32_with_errors( |
2101 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2102 | | #if SIMDUTF_IS_BIG_ENDIAN |
2103 | | return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); |
2104 | | #else |
2105 | 0 | return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); |
2106 | 0 | #endif |
2107 | 0 | } |
2108 | | simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( |
2109 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2110 | 0 | return get_default_implementation()->convert_utf16le_to_utf32_with_errors( |
2111 | 0 | buf, len, utf32_buffer); |
2112 | 0 | } |
2113 | | simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( |
2114 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2115 | 0 | return get_default_implementation()->convert_utf16be_to_utf32_with_errors( |
2116 | 0 | buf, len, utf32_buffer); |
2117 | 0 | } |
2118 | | simdutf_warn_unused size_t convert_valid_utf16_to_utf32( |
2119 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2120 | | #if SIMDUTF_IS_BIG_ENDIAN |
2121 | | return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); |
2122 | | #else |
2123 | 0 | return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); |
2124 | 0 | #endif |
2125 | 0 | } |
2126 | | simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( |
2127 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2128 | 0 | return get_default_implementation()->convert_valid_utf16le_to_utf32( |
2129 | 0 | buf, len, utf32_buffer); |
2130 | 0 | } |
2131 | | simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( |
2132 | 0 | const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { |
2133 | 0 | return get_default_implementation()->convert_valid_utf16be_to_utf32( |
2134 | 0 | buf, len, utf32_buffer); |
2135 | 0 | } |
2136 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2137 | | |
2138 | | #if SIMDUTF_FEATURE_UTF16 |
2139 | | void change_endianness_utf16(const char16_t *input, size_t length, |
2140 | 0 | char16_t *output) noexcept { |
2141 | 0 | get_default_implementation()->change_endianness_utf16(input, length, output); |
2142 | 0 | } |
2143 | | simdutf_warn_unused size_t count_utf16(const char16_t *input, |
2144 | 0 | size_t length) noexcept { |
2145 | | #if SIMDUTF_IS_BIG_ENDIAN |
2146 | | return count_utf16be(input, length); |
2147 | | #else |
2148 | 0 | return count_utf16le(input, length); |
2149 | 0 | #endif |
2150 | 0 | } |
2151 | | simdutf_warn_unused size_t count_utf16le(const char16_t *input, |
2152 | 0 | size_t length) noexcept { |
2153 | 0 | return get_default_implementation()->count_utf16le(input, length); |
2154 | 0 | } |
2155 | | simdutf_warn_unused size_t count_utf16be(const char16_t *input, |
2156 | 0 | size_t length) noexcept { |
2157 | 0 | return get_default_implementation()->count_utf16be(input, length); |
2158 | 0 | } |
2159 | | #endif // SIMDUTF_FEATURE_UTF16 |
2160 | | |
2161 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2162 | | simdutf_warn_unused size_t count_utf8(const char *input, |
2163 | 0 | size_t length) noexcept { |
2164 | 0 | return get_default_implementation()->count_utf8(input, length); |
2165 | 0 | } |
2166 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2167 | | |
2168 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2169 | | simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, |
2170 | 0 | size_t len) noexcept { |
2171 | 0 | return get_default_implementation()->latin1_length_from_utf8(buf, len); |
2172 | 0 | } |
2173 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2174 | | |
2175 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2176 | | simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, |
2177 | 0 | size_t len) noexcept { |
2178 | 0 | return get_default_implementation()->utf8_length_from_latin1(buf, len); |
2179 | 0 | } |
2180 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2181 | | |
2182 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
2183 | | simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, |
2184 | 0 | size_t length) noexcept { |
2185 | | #if SIMDUTF_IS_BIG_ENDIAN |
2186 | | return utf8_length_from_utf16be(input, length); |
2187 | | #else |
2188 | 0 | return utf8_length_from_utf16le(input, length); |
2189 | 0 | #endif |
2190 | 0 | } |
2191 | | simdutf_warn_unused result utf8_length_from_utf16_with_replacement( |
2192 | 0 | const char16_t *input, size_t length) noexcept { |
2193 | | #if SIMDUTF_IS_BIG_ENDIAN |
2194 | | return utf8_length_from_utf16be_with_replacement(input, length); |
2195 | | #else |
2196 | 0 | return utf8_length_from_utf16le_with_replacement(input, length); |
2197 | 0 | #endif |
2198 | 0 | } |
2199 | | simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, |
2200 | 0 | size_t length) noexcept { |
2201 | 0 | return get_default_implementation()->utf8_length_from_utf16le(input, length); |
2202 | 0 | } |
2203 | | simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, |
2204 | 0 | size_t length) noexcept { |
2205 | 0 | return get_default_implementation()->utf8_length_from_utf16be(input, length); |
2206 | 0 | } |
2207 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
2208 | | |
2209 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2210 | | simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, |
2211 | 0 | size_t length) noexcept { |
2212 | | #if SIMDUTF_IS_BIG_ENDIAN |
2213 | | return utf32_length_from_utf16be(input, length); |
2214 | | #else |
2215 | 0 | return utf32_length_from_utf16le(input, length); |
2216 | 0 | #endif |
2217 | 0 | } |
2218 | | simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, |
2219 | 0 | size_t length) noexcept { |
2220 | 0 | return get_default_implementation()->utf32_length_from_utf16le(input, length); |
2221 | 0 | } |
2222 | | simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, |
2223 | 0 | size_t length) noexcept { |
2224 | 0 | return get_default_implementation()->utf32_length_from_utf16be(input, length); |
2225 | 0 | } |
2226 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2227 | | |
2228 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
2229 | | simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, |
2230 | 0 | size_t length) noexcept { |
2231 | 0 | return get_default_implementation()->utf16_length_from_utf8(input, length); |
2232 | 0 | } |
2233 | | simdutf_warn_unused result utf8_length_from_utf16le_with_replacement( |
2234 | 0 | const char16_t *input, size_t length) noexcept { |
2235 | 0 | return get_default_implementation() |
2236 | 0 | ->utf8_length_from_utf16le_with_replacement(input, length); |
2237 | 0 | } |
2238 | | |
2239 | | simdutf_warn_unused result utf8_length_from_utf16be_with_replacement( |
2240 | 0 | const char16_t *input, size_t length) noexcept { |
2241 | 0 | return get_default_implementation() |
2242 | 0 | ->utf8_length_from_utf16be_with_replacement(input, length); |
2243 | 0 | } |
2244 | | |
2245 | | simdutf_warn_unused size_t convert_utf16_to_utf8_with_replacement( |
2246 | 0 | const char16_t *input, size_t length, char *utf8_buffer) noexcept { |
2247 | | #if SIMDUTF_IS_BIG_ENDIAN |
2248 | | return convert_utf16be_to_utf8_with_replacement(input, length, utf8_buffer); |
2249 | | #else |
2250 | 0 | return convert_utf16le_to_utf8_with_replacement(input, length, utf8_buffer); |
2251 | 0 | #endif |
2252 | 0 | } |
2253 | | |
2254 | | simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement( |
2255 | 0 | const char16_t *input, size_t length, char *utf8_buffer) noexcept { |
2256 | 0 | return get_default_implementation()->convert_utf16le_to_utf8_with_replacement( |
2257 | 0 | input, length, utf8_buffer); |
2258 | 0 | } |
2259 | | |
2260 | | simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement( |
2261 | 0 | const char16_t *input, size_t length, char *utf8_buffer) noexcept { |
2262 | 0 | return get_default_implementation()->convert_utf16be_to_utf8_with_replacement( |
2263 | 0 | input, length, utf8_buffer); |
2264 | 0 | } |
2265 | | |
2266 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 |
2267 | | |
2268 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
2269 | | simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, |
2270 | 0 | size_t length) noexcept { |
2271 | 0 | return get_default_implementation()->utf8_length_from_utf32(input, length); |
2272 | 0 | } |
2273 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
2274 | | |
2275 | | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2276 | | simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, |
2277 | 0 | size_t length) noexcept { |
2278 | 0 | return get_default_implementation()->utf16_length_from_utf32(input, length); |
2279 | 0 | } |
2280 | | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 |
2281 | | |
2282 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
2283 | | simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, |
2284 | 0 | size_t length) noexcept { |
2285 | 0 | return get_default_implementation()->utf32_length_from_utf8(input, length); |
2286 | 0 | } |
2287 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 |
2288 | | |
2289 | | #if SIMDUTF_FEATURE_BASE64 |
2290 | | |
2291 | | // this has been moved to implementation.h |
2292 | | // simdutf_warn_unused size_t |
2293 | | // base64_length_from_binary(size_t length, base64_options option) noexcept; |
2294 | | |
2295 | | // this has been moved to implementation.h |
2296 | | // simdutf_warn_unused size_t base64_length_from_binary_with_lines( |
2297 | | // size_t length, base64_options options, size_t line_length) noexcept; |
2298 | | // } |
2299 | | |
2300 | | simdutf_warn_unused const char *detail::find(const char *start, const char *end, |
2301 | 0 | char character) noexcept { |
2302 | 0 | return get_default_implementation()->find(start, end, character); |
2303 | 0 | } |
2304 | | simdutf_warn_unused const char16_t *detail::find(const char16_t *start, |
2305 | | const char16_t *end, |
2306 | 0 | char16_t character) noexcept { |
2307 | 0 | return get_default_implementation()->find(start, end, character); |
2308 | 0 | } |
2309 | | |
2310 | | simdutf_warn_unused size_t |
2311 | 0 | maximal_binary_length_from_base64(const char *input, size_t length) noexcept { |
2312 | 0 | return get_default_implementation()->maximal_binary_length_from_base64( |
2313 | 0 | input, length); |
2314 | 0 | } |
2315 | | |
2316 | | simdutf_warn_unused result base64_to_binary( |
2317 | | const char *input, size_t length, char *output, base64_options options, |
2318 | 0 | last_chunk_handling_options last_chunk_handling_options) noexcept { |
2319 | 0 | return get_default_implementation()->base64_to_binary( |
2320 | 0 | input, length, output, options, last_chunk_handling_options); |
2321 | 0 | } |
2322 | | |
2323 | | simdutf_warn_unused size_t maximal_binary_length_from_base64( |
2324 | 0 | const char16_t *input, size_t length) noexcept { |
2325 | 0 | return get_default_implementation()->maximal_binary_length_from_base64( |
2326 | 0 | input, length); |
2327 | 0 | } |
2328 | | |
2329 | | simdutf_warn_unused result base64_to_binary( |
2330 | | const char16_t *input, size_t length, char *output, base64_options options, |
2331 | 0 | last_chunk_handling_options last_chunk_handling_options) noexcept { |
2332 | 0 | return get_default_implementation()->base64_to_binary( |
2333 | 0 | input, length, output, options, last_chunk_handling_options); |
2334 | 0 | } |
2335 | | |
2336 | | // moved to implementation.h |
2337 | | // simdutf_warn_unused bool base64_ignorable(char input, |
2338 | | // base64_options options) noexcept |
2339 | | // simdutf_warn_unused bool base64_ignorable(char16_t input, |
2340 | | // base64_options options) noexcept |
2341 | | // simdutf_warn_unused bool base64_valid(char input, |
2342 | | // base64_options options) noexcept |
2343 | | // simdutf_warn_unused bool base64_valid(char16_t input, |
2344 | | // base64_options options) noexcept |
2345 | | // simdutf_warn_unused bool |
2346 | | // base64_valid_or_padding(char input, base64_options options) noexcept |
2347 | | // simdutf_warn_unused bool |
2348 | | // base64_valid_or_padding(char16_t input, base64_options options) noexcept |
2349 | | |
2350 | | // base64_to_binary_safe_impl is moved to |
2351 | | // include/simdutf/base64_implementation.h |
2352 | | |
2353 | | #if SIMDUTF_ATOMIC_REF |
2354 | | size_t atomic_binary_to_base64(const char *input, size_t length, char *output, |
2355 | 0 | base64_options options) noexcept { |
2356 | 0 | size_t retval = 0; |
2357 | 0 | #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) |
2358 | | // We use a smaller buffer during fuzzing to more easily detect bugs. |
2359 | 0 | constexpr size_t input_block_size = 128 * 3; |
2360 | | #else |
2361 | | // Arbitrary block sizes: 3KB for input which produces 4KB in output. |
2362 | | constexpr size_t input_block_size = 1024 * 3; |
2363 | | #endif |
2364 | 0 | std::array<char, input_block_size> inbuf; |
2365 | 0 | for (size_t i = 0; i < length; i += input_block_size) { |
2366 | 0 | const size_t current_block_size = std::min(input_block_size, length - i); |
2367 | 0 | simdutf::scalar::memcpy_atomic_read(inbuf.data(), input + i, |
2368 | 0 | current_block_size); |
2369 | 0 | const size_t written = binary_to_base64(inbuf.data(), current_block_size, |
2370 | 0 | output + retval, options); |
2371 | 0 | retval += written; |
2372 | 0 | } |
2373 | 0 | return retval; |
2374 | 0 | } |
2375 | | #endif // SIMDUTF_ATOMIC_REF |
2376 | | |
2377 | | #endif // SIMDUTF_FEATURE_BASE64 |
2378 | | |
2379 | | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2380 | | simdutf_warn_unused size_t convert_latin1_to_utf8_safe( |
2381 | 0 | const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { |
2382 | 0 | const auto start{utf8_output}; |
2383 | |
|
2384 | 0 | while (true) { |
2385 | | // convert_latin1_to_utf8 will never write more than input length * 2 |
2386 | 0 | auto read_len = std::min(len, utf8_len >> 1); |
2387 | 0 | if (read_len <= 16) { |
2388 | 0 | break; |
2389 | 0 | } |
2390 | | |
2391 | 0 | const auto write_len = |
2392 | 0 | simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); |
2393 | |
|
2394 | 0 | utf8_output += write_len; |
2395 | 0 | utf8_len -= write_len; |
2396 | 0 | buf += read_len; |
2397 | 0 | len -= read_len; |
2398 | 0 | } |
2399 | |
|
2400 | 0 | utf8_output += |
2401 | 0 | scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); |
2402 | |
|
2403 | 0 | return utf8_output - start; |
2404 | 0 | } |
2405 | | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 |
2406 | | |
2407 | | #if SIMDUTF_FEATURE_BASE64 |
2408 | | simdutf_warn_unused result |
2409 | | base64_to_binary_safe(const char *input, size_t length, char *output, |
2410 | | size_t &outlen, base64_options options, |
2411 | | last_chunk_handling_options last_chunk_handling_options, |
2412 | 0 | bool decode_up_to_bad_char) noexcept { |
2413 | 0 | return base64_to_binary_safe_impl<char>(input, length, output, outlen, |
2414 | 0 | options, last_chunk_handling_options, |
2415 | 0 | decode_up_to_bad_char); |
2416 | 0 | } |
2417 | | simdutf_warn_unused result |
2418 | | base64_to_binary_safe(const char16_t *input, size_t length, char *output, |
2419 | | size_t &outlen, base64_options options, |
2420 | | last_chunk_handling_options last_chunk_handling_options, |
2421 | 0 | bool decode_up_to_bad_char) noexcept { |
2422 | 0 | return base64_to_binary_safe_impl<char16_t>( |
2423 | 0 | input, length, output, outlen, options, last_chunk_handling_options, |
2424 | 0 | decode_up_to_bad_char); |
2425 | 0 | } |
2426 | | |
2427 | | size_t binary_to_base64(const char *input, size_t length, char *output, |
2428 | 0 | base64_options options) noexcept { |
2429 | 0 | return get_default_implementation()->binary_to_base64(input, length, output, |
2430 | 0 | options); |
2431 | 0 | } |
2432 | | |
2433 | | size_t binary_to_base64_with_lines(const char *input, size_t length, |
2434 | | char *output, size_t line_length, |
2435 | 0 | base64_options options) noexcept { |
2436 | 0 | return get_default_implementation()->binary_to_base64_with_lines( |
2437 | 0 | input, length, output, line_length, options); |
2438 | 0 | } |
2439 | | #endif // SIMDUTF_FEATURE_BASE64 |
2440 | | |
2441 | | #if SIMDUTF_FEATURE_DETECT_ENCODING |
2442 | | simdutf_warn_unused simdutf::encoding_type |
2443 | 0 | autodetect_encoding(const char *buf, size_t length) noexcept { |
2444 | 0 | return get_default_implementation()->autodetect_encoding(buf, length); |
2445 | 0 | } |
2446 | | |
2447 | | simdutf_warn_unused int detect_encodings(const char *buf, |
2448 | 0 | size_t length) noexcept { |
2449 | 0 | return get_default_implementation()->detect_encodings(buf, length); |
2450 | 0 | } |
2451 | | #endif // SIMDUTF_FEATURE_DETECT_ENCODING |
2452 | | |
2453 | 0 | const implementation *builtin_implementation() { |
2454 | 0 | static const implementation *builtin_impl = |
2455 | 0 | get_available_implementations()[SIMDUTF_STRINGIFY( |
2456 | 0 | SIMDUTF_BUILTIN_IMPLEMENTATION)]; |
2457 | 0 | return builtin_impl; |
2458 | 0 | } |
2459 | | |
2460 | | #if SIMDUTF_FEATURE_UTF8 |
2461 | 0 | simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { |
2462 | 0 | return scalar::utf8::trim_partial_utf8(input, length); |
2463 | 0 | } |
2464 | | #endif // SIMDUTF_FEATURE_UTF8 |
2465 | | |
2466 | | #if SIMDUTF_FEATURE_UTF16 |
2467 | | simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, |
2468 | 0 | size_t length) { |
2469 | 0 | return scalar::utf16::trim_partial_utf16<BIG>(input, length); |
2470 | 0 | } |
2471 | | |
2472 | | simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, |
2473 | 0 | size_t length) { |
2474 | 0 | return scalar::utf16::trim_partial_utf16<LITTLE>(input, length); |
2475 | 0 | } |
2476 | | |
2477 | | simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, |
2478 | 0 | size_t length) { |
2479 | | #if SIMDUTF_IS_BIG_ENDIAN |
2480 | | return trim_partial_utf16be(input, length); |
2481 | | #else |
2482 | 0 | return trim_partial_utf16le(input, length); |
2483 | 0 | #endif |
2484 | 0 | } |
2485 | | #endif // SIMDUTF_FEATURE_UTF16 |
2486 | | |
2487 | | } // namespace simdutf |