/src/node/deps/v8/third_party/simdutf/simdutf.h
Line  | Count  | Source  | 
1  |  | /* auto-generated on 2025-07-13 10:46:57 -0400. Do not edit! */  | 
2  |  | /* begin file include/simdutf.h */  | 
3  |  | #ifndef SIMDUTF_H  | 
4  |  | #define SIMDUTF_H  | 
5  |  | #include <cstring>  | 
6  |  |  | 
7  |  | /* begin file include/simdutf/compiler_check.h */  | 
8  |  | #ifndef SIMDUTF_COMPILER_CHECK_H  | 
9  |  | #define SIMDUTF_COMPILER_CHECK_H  | 
10  |  |  | 
11  |  | #ifndef __cplusplus  | 
12  |  |   #error simdutf requires a C++ compiler  | 
13  |  | #endif  | 
14  |  |  | 
15  |  | #ifndef SIMDUTF_CPLUSPLUS  | 
16  |  |   #if defined(_MSVC_LANG) && !defined(__clang__)  | 
17  |  |     #define SIMDUTF_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)  | 
18  |  |   #else  | 
19  |  |     #define SIMDUTF_CPLUSPLUS __cplusplus  | 
20  |  |   #endif  | 
21  |  | #endif  | 
22  |  |  | 
23  |  | // C++ 23  | 
24  |  | #if !defined(SIMDUTF_CPLUSPLUS23) && (SIMDUTF_CPLUSPLUS >= 202302L)  | 
25  |  |   #define SIMDUTF_CPLUSPLUS23 1  | 
26  |  | #endif  | 
27  |  |  | 
28  |  | // C++ 20  | 
29  |  | #if !defined(SIMDUTF_CPLUSPLUS20) && (SIMDUTF_CPLUSPLUS >= 202002L)  | 
30  |  |   #define SIMDUTF_CPLUSPLUS20 1  | 
31  |  | #endif  | 
32  |  |  | 
33  |  | // C++ 17  | 
34  |  | #if !defined(SIMDUTF_CPLUSPLUS17) && (SIMDUTF_CPLUSPLUS >= 201703L)  | 
35  |  |   #define SIMDUTF_CPLUSPLUS17 1  | 
36  |  | #endif  | 
37  |  |  | 
38  |  | // C++ 14  | 
39  |  | #if !defined(SIMDUTF_CPLUSPLUS14) && (SIMDUTF_CPLUSPLUS >= 201402L)  | 
40  |  |   #define SIMDUTF_CPLUSPLUS14 1  | 
41  |  | #endif  | 
42  |  |  | 
43  |  | // C++ 11  | 
44  |  | #if !defined(SIMDUTF_CPLUSPLUS11) && (SIMDUTF_CPLUSPLUS >= 201103L)  | 
45  |  |   #define SIMDUTF_CPLUSPLUS11 1  | 
46  |  | #endif  | 
47  |  |  | 
48  |  | #ifndef SIMDUTF_CPLUSPLUS11  | 
49  |  |   #error simdutf requires a compiler compliant with the C++11 standard  | 
50  |  | #endif  | 
51  |  |  | 
52  |  | #endif // SIMDUTF_COMPILER_CHECK_H  | 
53  |  | /* end file include/simdutf/compiler_check.h */  | 
54  |  | /* begin file include/simdutf/common_defs.h */  | 
55  |  | #ifndef SIMDUTF_COMMON_DEFS_H  | 
56  |  | #define SIMDUTF_COMMON_DEFS_H  | 
57  |  |  | 
58  |  | /* begin file include/simdutf/portability.h */  | 
59  |  | #ifndef SIMDUTF_PORTABILITY_H  | 
60  |  | #define SIMDUTF_PORTABILITY_H  | 
61  |  |  | 
62  |  |  | 
63  |  | #include <cfloat>  | 
64  |  | #include <cstddef>  | 
65  |  | #include <cstdint>  | 
66  |  | #include <cstdlib>  | 
67  |  | #ifndef _WIN32  | 
68  |  |   // strcasecmp, strncasecmp  | 
69  |  |   #include <strings.h>  | 
70  |  | #endif  | 
71  |  |  | 
72  |  | #if defined(__apple_build_version__)  | 
73  |  |   #if __apple_build_version__ < 14000000  | 
74  |  |     #define SIMDUTF_SPAN_DISABLED                                              \  | 
75  |  |       1 // apple-clang/13 doesn't support std::convertible_to  | 
76  |  |   #endif  | 
77  |  | #endif  | 
78  |  |  | 
79  |  | #if SIMDUTF_CPLUSPLUS20  | 
80  |  |   #include <version>  | 
81  |  |   #if __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L &&                \  | 
82  |  |       !defined(SIMDUTF_SPAN_DISABLED)  | 
83  |  |     #define SIMDUTF_SPAN 1  | 
84  |  |   #endif // __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L  | 
85  |  |   #if __cpp_lib_atomic_ref >= 201806L  | 
86  |  |     #define SIMDUTF_ATOMIC_REF 1  | 
87  |  |   #endif // __cpp_lib_atomic_ref  | 
88  |  |   #if __has_cpp_attribute(maybe_unused) >= 201603L  | 
89  |  |     #define SIMDUTF_MAYBE_UNUSED_AVAILABLE 1  | 
90  |  |   #endif // __has_cpp_attribute(maybe_unused) >= 201603L  | 
91  |  | #endif  | 
92  |  |  | 
93  |  | /**  | 
94  |  |  * We want to check that it is actually a little endian system at  | 
95  |  |  * compile-time.  | 
96  |  |  */  | 
97  |  |  | 
98  |  | #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)  | 
99  |  |   #define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)  | 
100  |  | #elif defined(_WIN32)  | 
101  |  |   #define SIMDUTF_IS_BIG_ENDIAN 0  | 
102  |  | #else  | 
103  |  |   #if defined(__APPLE__) ||                                                    \  | 
104  |  |       defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined  | 
105  |  |                            // __ORDER_BIG_ENDIAN__  | 
106  |  |     #include <machine/endian.h>  | 
107  |  |   #elif defined(sun) ||                                                        \  | 
108  |  |       defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)  | 
109  |  |     #include <sys/byteorder.h>  | 
110  |  |   #else // defined(__APPLE__) || defined(__FreeBSD__)  | 
111  |  |  | 
112  |  |     #ifdef __has_include  | 
113  |  |       #if __has_include(<endian.h>)  | 
114  |  |         #include <endian.h>  | 
115  |  |       #endif //__has_include(<endian.h>)  | 
116  |  |     #endif   //__has_include  | 
117  |  |  | 
118  |  |   #endif // defined(__APPLE__) || defined(__FreeBSD__)  | 
119  |  |  | 
120  |  |   #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)  | 
121  |  |     #define SIMDUTF_IS_BIG_ENDIAN 0  | 
122  |  |   #endif  | 
123  |  |  | 
124  |  |   #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  | 
125  |  |     #define SIMDUTF_IS_BIG_ENDIAN 0  | 
126  |  |   #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  | 
127  |  |     #define SIMDUTF_IS_BIG_ENDIAN 1  | 
128  |  |   #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  | 
129  |  |  | 
130  |  | #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__  | 
131  |  |  | 
132  |  | /**  | 
133  |  |  * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined.  | 
134  |  |  */  | 
135  |  |  | 
136  |  | #ifdef _MSC_VER  | 
137  |  |   #define SIMDUTF_VISUAL_STUDIO 1  | 
138  |  |   /**  | 
139  |  |    * We want to differentiate carefully between  | 
140  |  |    * clang under visual studio and regular visual  | 
141  |  |    * studio.  | 
142  |  |    *  | 
143  |  |    * Under clang for Windows, we enable:  | 
144  |  |    *  * target pragmas so that part and only part of the  | 
145  |  |    *     code gets compiled for advanced instructions.  | 
146  |  |    *  | 
147  |  |    */  | 
148  |  |   #ifdef __clang__  | 
149  |  |     // clang under visual studio  | 
150  |  |     #define SIMDUTF_CLANG_VISUAL_STUDIO 1  | 
151  |  |   #else  | 
152  |  |     // just regular visual studio (best guess)  | 
153  |  |     #define SIMDUTF_REGULAR_VISUAL_STUDIO 1  | 
154  |  |   #endif // __clang__  | 
155  |  | #endif   // _MSC_VER  | 
156  |  |  | 
157  |  | #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO  | 
158  |  |   // https://en.wikipedia.org/wiki/C_alternative_tokens  | 
159  |  |   // This header should have no effect, except maybe  | 
160  |  |   // under Visual Studio.  | 
161  |  |   #include <iso646.h>  | 
162  |  | #endif  | 
163  |  |  | 
164  |  | #if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC)  | 
165  |  |   #define SIMDUTF_IS_X86_64 1  | 
166  |  | #elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)  | 
167  |  |   #define SIMDUTF_IS_ARM64 1  | 
168  |  | #elif defined(__PPC64__) || defined(_M_PPC64)  | 
169  |  |   #if defined(__VEC__) && defined(__ALTIVEC__)  | 
170  |  |     #define SIMDUTF_IS_PPC64 1  | 
171  |  |   #endif  | 
172  |  | #elif defined(__s390__)  | 
173  |  | // s390 IBM system. Big endian.  | 
174  |  | #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64  | 
175  |  |   // RISC-V 64-bit  | 
176  |  |   #define SIMDUTF_IS_RISCV64 1  | 
177  |  |  | 
178  |  |   // #if __riscv_v_intrinsic >= 1000000  | 
179  |  |   //   #define SIMDUTF_HAS_RVV_INTRINSICS 1  | 
180  |  |   //   #define SIMDUTF_HAS_RVV_TARGET_REGION 1  | 
181  |  |   // #elif ...  | 
182  |  |   //  Check for special compiler versions that implement pre v1.0 intrinsics  | 
183  |  |   #if __riscv_v_intrinsic >= 11000  | 
184  |  |     #define SIMDUTF_HAS_RVV_INTRINSICS 1  | 
185  |  |   #endif  | 
186  |  |  | 
187  |  |   #define SIMDUTF_HAS_ZVBB_INTRINSICS                                          \  | 
188  |  |     0 // there is currently no way to detect this  | 
189  |  |  | 
190  |  |   #if SIMDUTF_HAS_RVV_INTRINSICS && __riscv_vector &&                          \  | 
191  |  |       __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64  | 
192  |  |     // RISC-V V extension  | 
193  |  |     #define SIMDUTF_IS_RVV 1  | 
194  |  |     #if SIMDUTF_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000  | 
195  |  |       // RISC-V Vector Basic Bit-manipulation  | 
196  |  |       #define SIMDUTF_IS_ZVBB 1  | 
197  |  |     #endif  | 
198  |  |   #endif  | 
199  |  |  | 
200  |  | #elif defined(__loongarch_lp64)  | 
201  |  |   #if defined(__loongarch_sx) && defined(__loongarch_asx)  | 
202  |  |     #define SIMDUTF_IS_LSX 1  | 
203  |  |     #define SIMDUTF_IS_LASX 1  | 
204  |  |   #elif defined(__loongarch_sx)  | 
205  |  |     #define SIMDUTF_IS_LSX 1  | 
206  |  |   #endif  | 
207  |  | #else  | 
208  |  |   // The simdutf library is designed  | 
209  |  |   // for 64-bit processors and it seems that you are not  | 
210  |  |   // compiling for a known 64-bit platform. Please  | 
211  |  |   // use a 64-bit target such as x64 or 64-bit ARM for best performance.  | 
212  |  |   #define SIMDUTF_IS_32BITS 1  | 
213  |  |  | 
214  |  |   // We do not support 32-bit platforms, but it can be  | 
215  |  |   // handy to identify them.  | 
216  |  |   #if defined(_M_IX86) || defined(__i386__)  | 
217  |  |     #define SIMDUTF_IS_X86_32BITS 1  | 
218  |  |   #elif defined(__arm__) || defined(_M_ARM)  | 
219  |  |     #define SIMDUTF_IS_ARM_32BITS 1  | 
220  |  |   #elif defined(__PPC__) || defined(_M_PPC)  | 
221  |  |     #define SIMDUTF_IS_PPC_32BITS 1  | 
222  |  |   #endif  | 
223  |  |  | 
224  |  | #endif // defined(__x86_64__) || defined(_M_AMD64)  | 
225  |  |  | 
226  |  | #ifdef SIMDUTF_IS_32BITS  | 
227  |  |   #ifndef SIMDUTF_NO_PORTABILITY_WARNING  | 
228  |  |   // In the future, we may want to warn users of 32-bit systems that  | 
229  |  |   // the simdutf does not support accelerated kernels for such systems.  | 
230  |  |   #endif // SIMDUTF_NO_PORTABILITY_WARNING  | 
231  |  | #endif   // SIMDUTF_IS_32BITS  | 
232  |  |  | 
233  |  | // this is almost standard?  | 
234  |  | #define SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) #a  | 
235  |  | #define SIMDUTF_STRINGIFY(a) SIMDUTF_STRINGIFY_IMPLEMENTATION_(a)  | 
236  |  |  | 
237  |  | // Our fast kernels require 64-bit systems.  | 
238  |  | //  | 
239  |  | // On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.  | 
240  |  | // Furthermore, the number of SIMD registers is reduced.  | 
241  |  | //  | 
242  |  | // On 32-bit ARM, we would have smaller registers.  | 
243  |  | //  | 
244  |  | // The simdutf users should still have the fallback kernel. It is  | 
245  |  | // slower, but it should run everywhere.  | 
246  |  |  | 
247  |  | //  | 
248  |  | // Enable valid runtime implementations, and select  | 
249  |  | // SIMDUTF_BUILTIN_IMPLEMENTATION  | 
250  |  | //  | 
251  |  |  | 
252  |  | // We are going to use runtime dispatch.  | 
253  |  | #ifdef SIMDUTF_IS_X86_64  | 
254  |  |   #ifdef __clang__  | 
255  |  |     // clang does not have GCC push pop  | 
256  |  |     // warning: clang attribute push can't be used within a namespace in clang  | 
257  |  |     // up til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be  | 
258  |  |     // *outside* of a namespace.  | 
259  |  |     #define SIMDUTF_TARGET_REGION(T)                                           \  | 
260  |  |       _Pragma(SIMDUTF_STRINGIFY(clang attribute push(                          \  | 
261  |  |           __attribute__((target(T))), apply_to = function)))  | 
262  |  |     #define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop") | 
263  |  |   #elif defined(__GNUC__)  | 
264  |  |     // GCC is easier  | 
265  |  |     #define SIMDUTF_TARGET_REGION(T)                                           \  | 
266  |  |       _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T))) | 
267  |  |     #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options") | 
268  |  |   #endif // clang then gcc  | 
269  |  |  | 
270  |  | #endif // x86  | 
271  |  |  | 
272  |  | // Default target region macros don't do anything.  | 
273  |  | #ifndef SIMDUTF_TARGET_REGION  | 
274  |  |   #define SIMDUTF_TARGET_REGION(T)  | 
275  |  |   #define SIMDUTF_UNTARGET_REGION  | 
276  |  | #endif  | 
277  |  |  | 
278  |  | // Is threading enabled?  | 
279  |  | #if defined(_REENTRANT) || defined(_MT)  | 
280  |  |   #ifndef SIMDUTF_THREADS_ENABLED  | 
281  |  |     #define SIMDUTF_THREADS_ENABLED  | 
282  |  |   #endif  | 
283  |  | #endif  | 
284  |  |  | 
285  |  | // workaround for large stack sizes under -O0.  | 
286  |  | // https://github.com/simdutf/simdutf/issues/691  | 
287  |  | #ifdef __APPLE__  | 
288  |  |   #ifndef __OPTIMIZE__  | 
289  |  |     // Apple systems have small stack sizes in secondary threads.  | 
290  |  |     // Lack of compiler optimization may generate high stack usage.  | 
291  |  |     // Users may want to disable threads for safety, but only when  | 
292  |  |     // in debug mode which we detect by the fact that the __OPTIMIZE__  | 
293  |  |     // macro is not defined.  | 
294  |  |     #undef SIMDUTF_THREADS_ENABLED  | 
295  |  |   #endif  | 
296  |  | #endif  | 
297  |  |  | 
298  |  | #ifdef SIMDUTF_VISUAL_STUDIO  | 
299  |  |   // This is one case where we do not distinguish between  | 
300  |  |   // regular visual studio and clang under visual studio.  | 
301  |  |   // clang under Windows has _stricmp (like visual studio) but not strcasecmp  | 
302  |  |   // (as clang normally has)  | 
303  |  |   #define simdutf_strcasecmp _stricmp  | 
304  |  |   #define simdutf_strncasecmp _strnicmp  | 
305  |  | #else  | 
306  |  |   // The strcasecmp, strncasecmp, and strcasestr functions do not work with  | 
307  |  |   // multibyte strings (e.g. UTF-8). So they are only useful for ASCII in our  | 
308  |  |   // context.  | 
309  |  |   // https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings  | 
310  |  |   #define simdutf_strcasecmp strcasecmp  | 
311  |  |   #define simdutf_strncasecmp strncasecmp  | 
312  |  | #endif  | 
313  |  |  | 
314  |  | #if defined(__GNUC__) && !defined(__clang__)  | 
315  |  |   #if __GNUC__ >= 11  | 
316  |  |     #define SIMDUTF_GCC11ORMORE 1  | 
317  |  |   #endif //  __GNUC__ >= 11  | 
318  |  | #endif   // defined(__GNUC__) && !defined(__clang__)  | 
319  |  |  | 
320  |  | #endif // SIMDUTF_PORTABILITY_H  | 
321  |  | /* end file include/simdutf/portability.h */  | 
322  |  | /* begin file include/simdutf/avx512.h */  | 
323  |  | #ifndef SIMDUTF_AVX512_H_  | 
324  |  | #define SIMDUTF_AVX512_H_  | 
325  |  |  | 
326  |  | /*  | 
327  |  |     It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS.  | 
328  |  |  | 
329  |  |     All preprocessor directives has form `SIMDUTF_HAS_AVX512{feature}`, | 
330  |  |     where a feature is a code name for extensions.  | 
331  |  |  | 
332  |  |     Please see the listing below to find which are supported.  | 
333  |  | */  | 
334  |  |  | 
335  |  | #ifndef SIMDUTF_HAS_AVX512F  | 
336  |  |   #if defined(__AVX512F__) && __AVX512F__ == 1  | 
337  |  |     #define SIMDUTF_HAS_AVX512F 1  | 
338  |  |   #endif  | 
339  |  | #endif  | 
340  |  |  | 
341  |  | #ifndef SIMDUTF_HAS_AVX512DQ  | 
342  |  |   #if defined(__AVX512DQ__) && __AVX512DQ__ == 1  | 
343  |  |     #define SIMDUTF_HAS_AVX512DQ 1  | 
344  |  |   #endif  | 
345  |  | #endif  | 
346  |  |  | 
347  |  | #ifndef SIMDUTF_HAS_AVX512IFMA  | 
348  |  |   #if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1  | 
349  |  |     #define SIMDUTF_HAS_AVX512IFMA 1  | 
350  |  |   #endif  | 
351  |  | #endif  | 
352  |  |  | 
353  |  | #ifndef SIMDUTF_HAS_AVX512CD  | 
354  |  |   #if defined(__AVX512CD__) && __AVX512CD__ == 1  | 
355  |  |     #define SIMDUTF_HAS_AVX512CD 1  | 
356  |  |   #endif  | 
357  |  | #endif  | 
358  |  |  | 
359  |  | #ifndef SIMDUTF_HAS_AVX512BW  | 
360  |  |   #if defined(__AVX512BW__) && __AVX512BW__ == 1  | 
361  |  |     #define SIMDUTF_HAS_AVX512BW 1  | 
362  |  |   #endif  | 
363  |  | #endif  | 
364  |  |  | 
365  |  | #ifndef SIMDUTF_HAS_AVX512VL  | 
366  |  |   #if defined(__AVX512VL__) && __AVX512VL__ == 1  | 
367  |  |     #define SIMDUTF_HAS_AVX512VL 1  | 
368  |  |   #endif  | 
369  |  | #endif  | 
370  |  |  | 
371  |  | #ifndef SIMDUTF_HAS_AVX512VBMI  | 
372  |  |   #if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1  | 
373  |  |     #define SIMDUTF_HAS_AVX512VBMI 1  | 
374  |  |   #endif  | 
375  |  | #endif  | 
376  |  |  | 
377  |  | #ifndef SIMDUTF_HAS_AVX512VBMI2  | 
378  |  |   #if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1  | 
379  |  |     #define SIMDUTF_HAS_AVX512VBMI2 1  | 
380  |  |   #endif  | 
381  |  | #endif  | 
382  |  |  | 
383  |  | #ifndef SIMDUTF_HAS_AVX512VNNI  | 
384  |  |   #if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1  | 
385  |  |     #define SIMDUTF_HAS_AVX512VNNI 1  | 
386  |  |   #endif  | 
387  |  | #endif  | 
388  |  |  | 
389  |  | #ifndef SIMDUTF_HAS_AVX512BITALG  | 
390  |  |   #if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1  | 
391  |  |     #define SIMDUTF_HAS_AVX512BITALG 1  | 
392  |  |   #endif  | 
393  |  | #endif  | 
394  |  |  | 
395  |  | #ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ  | 
396  |  |   #if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1  | 
397  |  |     #define SIMDUTF_HAS_AVX512VPOPCNTDQ 1  | 
398  |  |   #endif  | 
399  |  | #endif  | 
400  |  |  | 
401  |  | #endif // SIMDUTF_AVX512_H_  | 
402  |  | /* end file include/simdutf/avx512.h */  | 
403  |  |  | 
404  |  | // Sometimes logging is useful, but we want it disabled by default  | 
405  |  | // and free of any logging code in release builds.  | 
406  |  | #ifdef SIMDUTF_LOGGING  | 
407  |  |   #include <iostream>  | 
408  |  |   #define simdutf_log(msg)                                                     \  | 
409  |  |     std::cout << "[" << __FUNCTION__ << "]: " << msg << std::endl              \  | 
410  |  |               << "\t" << __FILE__ << ":" << __LINE__ << std::endl;  | 
411  |  |   #define simdutf_log_assert(cond, msg)                                        \  | 
412  |  |     do {                                                                       \ | 
413  |  |       if (!(cond)) {                                                           \ | 
414  |  |         std::cerr << "[" << __FUNCTION__ << "]: " << msg << std::endl          \  | 
415  |  |                   << "\t" << __FILE__ << ":" << __LINE__ << std::endl;         \  | 
416  |  |         std::abort();                                                          \  | 
417  |  |       }                                                                        \  | 
418  |  |     } while (0)  | 
419  |  | #else  | 
420  |  |   #define simdutf_log(msg)  | 
421  |  |   #define simdutf_log_assert(cond, msg)  | 
422  |  | #endif  | 
423  |  |  | 
424  |  | #if defined(SIMDUTF_REGULAR_VISUAL_STUDIO)  | 
425  |  |   #define SIMDUTF_DEPRECATED __declspec(deprecated)  | 
426  |  |  | 
427  |  |   #define simdutf_really_inline __forceinline // really inline in release mode  | 
428  |  |   #define simdutf_always_inline __forceinline // always inline, no matter what  | 
429  |  |   #define simdutf_never_inline __declspec(noinline)  | 
430  |  |  | 
431  |  |   #define simdutf_unused  | 
432  |  |   #define simdutf_warn_unused  | 
433  |  |  | 
434  |  |   #ifndef simdutf_likely  | 
435  |  |     #define simdutf_likely(x) x  | 
436  |  |   #endif  | 
437  |  |   #ifndef simdutf_unlikely  | 
438  |  |     #define simdutf_unlikely(x) x  | 
439  |  |   #endif  | 
440  |  |  | 
441  |  |   #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning(push))  | 
442  |  |   #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0))  | 
443  |  |   #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER)                           \  | 
444  |  |     __pragma(warning(disable : WARNING_NUMBER))  | 
445  |  |   // Get rid of Intellisense-only warnings (Code Analysis)  | 
446  |  |   // Though __has_include is C++17, it is supported in Visual Studio 2017 or  | 
447  |  |   // better (_MSC_VER>=1910).  | 
448  |  |   #ifdef __has_include  | 
449  |  |     #if __has_include(<CppCoreCheck\Warnings.h>)  | 
450  |  |       #include <CppCoreCheck\Warnings.h>  | 
451  |  |       #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS                               \  | 
452  |  |         SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)  | 
453  |  |     #endif  | 
454  |  |   #endif  | 
455  |  |  | 
456  |  |   #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS  | 
457  |  |     #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS  | 
458  |  |   #endif  | 
459  |  |  | 
460  |  |   #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996)  | 
461  |  |   #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING  | 
462  |  |   #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning(pop))  | 
463  |  |   #define SIMDUTF_DISABLE_UNUSED_WARNING  | 
464  |  | #else // SIMDUTF_REGULAR_VISUAL_STUDIO  | 
465  |  |   #if defined(__OPTIMIZE__) || defined(NDEBUG)  | 
466  |  |     #define simdutf_really_inline inline __attribute__((always_inline))  | 
467  |  |   #else  | 
468  |  |     #define simdutf_really_inline inline  | 
469  |  |   #endif  | 
470  |  |   #define simdutf_always_inline                                                \  | 
471  |  |     inline __attribute__((always_inline)) // always inline, no matter what  | 
472  |  |   #define SIMDUTF_DEPRECATED __attribute__((deprecated))  | 
473  |  |   #define simdutf_never_inline inline __attribute__((noinline))  | 
474  |  |  | 
475  |  |   #define simdutf_unused __attribute__((unused))  | 
476  |  |   #define simdutf_warn_unused __attribute__((warn_unused_result))  | 
477  |  |  | 
478  |  |   #ifndef simdutf_likely  | 
479  |  |     #define simdutf_likely(x) __builtin_expect(!!(x), 1)  | 
480  |  |   #endif  | 
481  |  |   #ifndef simdutf_unlikely  | 
482  |  |     #define simdutf_unlikely(x) __builtin_expect(!!(x), 0)  | 
483  |  |   #endif  | 
484  |  |   // clang-format off  | 
485  |  |   #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") | 
486  |  |   // gcc doesn't seem to disable all warnings with all and extra, add warnings  | 
487  |  |   // here as necessary  | 
488  |  |   #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS                                    \  | 
489  |  |     SIMDUTF_PUSH_DISABLE_WARNINGS                                              \  | 
490  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Weffc++)                                      \  | 
491  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wall)                                         \  | 
492  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wconversion)                                  \  | 
493  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wextra)                                       \  | 
494  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wattributes)                                  \  | 
495  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough)                        \  | 
496  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor)                            \  | 
497  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type)                                 \  | 
498  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wshadow)                                      \  | 
499  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter)                            \  | 
500  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable)  | 
501  |  |   #define SIMDUTF_PRAGMA(P) _Pragma(#P)  | 
502  |  |   #define SIMDUTF_DISABLE_GCC_WARNING(WARNING)                                 \  | 
503  |  |     SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING)  | 
504  |  |   #if defined(SIMDUTF_CLANG_VISUAL_STUDIO)  | 
505  |  |     #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS                                 \  | 
506  |  |       SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include)  | 
507  |  |   #else  | 
508  |  |     #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS  | 
509  |  |   #endif  | 
510  |  |   #define SIMDUTF_DISABLE_DEPRECATED_WARNING                                   \  | 
511  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations)  | 
512  |  |   #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING                              \  | 
513  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow)  | 
514  |  |   #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") | 
515  |  |   #define SIMDUTF_DISABLE_UNUSED_WARNING                                       \  | 
516  |  |     SIMDUTF_PUSH_DISABLE_WARNINGS                                              \  | 
517  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-function)                             \  | 
518  |  |     SIMDUTF_DISABLE_GCC_WARNING(-Wunused-const-variable)  | 
519  |  |   // clang-format on  | 
520  |  |  | 
521  |  | #endif // MSC_VER  | 
522  |  |  | 
523  |  | #ifndef SIMDUTF_DLLIMPORTEXPORT  | 
524  |  |   #if defined(SIMDUTF_VISUAL_STUDIO)  | 
525  |  |     /**  | 
526  |  |      * It does not matter here whether you are using  | 
527  |  |      * the regular visual studio or clang under visual  | 
528  |  |      * studio.  | 
529  |  |      */  | 
530  |  |     #if SIMDUTF_USING_LIBRARY  | 
531  |  |       #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)  | 
532  |  |     #else  | 
533  |  |       #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)  | 
534  |  |     #endif  | 
535  |  |   #else  | 
536  |  |     #define SIMDUTF_DLLIMPORTEXPORT  | 
537  |  |   #endif  | 
538  |  | #endif  | 
539  |  |  | 
540  |  | #if SIMDUTF_MAYBE_UNUSED_AVAILABLE  | 
541  |  |   #define simdutf_maybe_unused [[maybe_unused]]  | 
542  |  | #else  | 
543  |  |   #define simdutf_maybe_unused  | 
544  |  | #endif  | 
545  |  |  | 
546  |  | #endif // SIMDUTF_COMMON_DEFS_H  | 
547  |  | /* end file include/simdutf/common_defs.h */  | 
548  |  | /* begin file include/simdutf/encoding_types.h */  | 
549  |  | #ifndef SIMDUTF_ENCODING_TYPES_H  | 
550  |  | #define SIMDUTF_ENCODING_TYPES_H  | 
551  |  | #include <string>  | 
552  |  |  | 
553  |  | namespace simdutf { | 
554  |  |  | 
555  |  | enum encoding_type { | 
556  |  |   UTF8 = 1,      // BOM 0xef 0xbb 0xbf  | 
557  |  |   UTF16_LE = 2,  // BOM 0xff 0xfe  | 
558  |  |   UTF16_BE = 4,  // BOM 0xfe 0xff  | 
559  |  |   UTF32_LE = 8,  // BOM 0xff 0xfe 0x00 0x00  | 
560  |  |   UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff  | 
561  |  |   Latin1 = 32,  | 
562  |  |  | 
563  |  |   unspecified = 0  | 
564  |  | };  | 
565  |  |  | 
566  |  | enum endianness { LITTLE = 0, BIG = 1 }; | 
567  |  |  | 
568  |  | bool match_system(endianness e);  | 
569  |  |  | 
570  |  | std::string to_string(encoding_type bom);  | 
571  |  |  | 
572  |  | // Note that BOM for UTF8 is discouraged.  | 
573  |  | namespace BOM { | 
574  |  |  | 
575  |  | /**  | 
576  |  |  * Checks for a BOM. If not, returns unspecified  | 
577  |  |  * @param input         the string to process  | 
578  |  |  * @param length        the length of the string in code units  | 
579  |  |  * @return the corresponding encoding  | 
580  |  |  */  | 
581  |  |  | 
582  |  | encoding_type check_bom(const uint8_t *byte, size_t length);  | 
583  |  | encoding_type check_bom(const char *byte, size_t length);  | 
584  |  | /**  | 
585  |  |  * Returns the size, in bytes, of the BOM for a given encoding type.  | 
586  |  |  * Note that UTF8 BOM are discouraged.  | 
587  |  |  * @param bom         the encoding type  | 
588  |  |  * @return the size in bytes of the corresponding BOM  | 
589  |  |  */  | 
590  |  | size_t bom_byte_size(encoding_type bom);  | 
591  |  |  | 
592  |  | } // namespace BOM  | 
593  |  | } // namespace simdutf  | 
594  |  | #endif  | 
595  |  | /* end file include/simdutf/encoding_types.h */  | 
596  |  | /* begin file include/simdutf/error.h */  | 
597  |  | #ifndef SIMDUTF_ERROR_H  | 
598  |  | #define SIMDUTF_ERROR_H  | 
599  |  | namespace simdutf { | 
600  |  |  | 
601  |  | enum error_code { | 
602  |  |   SUCCESS = 0,  | 
603  |  |   HEADER_BITS, // Any byte must have fewer than 5 header bits.  | 
604  |  |   TOO_SHORT,   // The leading byte must be followed by N-1 continuation bytes,  | 
605  |  |                // where N is the UTF-8 character length This is also the error  | 
606  |  |                // when the input is truncated.  | 
607  |  |   TOO_LONG,    // We either have too many consecutive continuation bytes or the  | 
608  |  |                // string starts with a continuation byte.  | 
609  |  |   OVERLONG, // The decoded character must be above U+7F for two-byte characters,  | 
610  |  |             // U+7FF for three-byte characters, and U+FFFF for four-byte  | 
611  |  |             // characters.  | 
612  |  |   TOO_LARGE, // The decoded character must be less than or equal to  | 
613  |  |              // U+10FFFF,less than or equal than U+7F for ASCII OR less than  | 
614  |  |              // equal than U+FF for Latin1  | 
615  |  |   SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or  | 
616  |  |              // UTF-32) OR a high surrogate must be followed by a low surrogate  | 
617  |  |              // and a low surrogate must be preceded by a high surrogate  | 
618  |  |              // (UTF-16) OR there must be no surrogate at all (Latin1)  | 
619  |  |   INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid  | 
620  |  |                             // base64 string. This may include a misplaced  | 
621  |  |                             // padding character ('='). | 
622  |  |   BASE64_INPUT_REMAINDER,   // The base64 input terminates with a single  | 
623  |  |                             // character, excluding padding (=). It is also used  | 
624  |  |                             // in strict mode when padding is not adequate.  | 
625  |  |   BASE64_EXTRA_BITS,        // The base64 input terminates with non-zero  | 
626  |  |                             // padding bits.  | 
627  |  |   OUTPUT_BUFFER_TOO_SMALL,  // The provided buffer is too small.  | 
628  |  |   OTHER                     // Not related to validation/transcoding.  | 
629  |  | };  | 
630  |  | #if SIMDUTF_CPLUSPLUS17  | 
631  | 0  | inline std::string_view error_to_string(error_code code) noexcept { | 
632  | 0  |   switch (code) { | 
633  | 0  |   case SUCCESS:  | 
634  | 0  |     return "SUCCESS";  | 
635  | 0  |   case HEADER_BITS:  | 
636  | 0  |     return "HEADER_BITS";  | 
637  | 0  |   case TOO_SHORT:  | 
638  | 0  |     return "TOO_SHORT";  | 
639  | 0  |   case TOO_LONG:  | 
640  | 0  |     return "TOO_LONG";  | 
641  | 0  |   case OVERLONG:  | 
642  | 0  |     return "OVERLONG";  | 
643  | 0  |   case TOO_LARGE:  | 
644  | 0  |     return "TOO_LARGE";  | 
645  | 0  |   case SURROGATE:  | 
646  | 0  |     return "SURROGATE";  | 
647  | 0  |   case INVALID_BASE64_CHARACTER:  | 
648  | 0  |     return "INVALID_BASE64_CHARACTER";  | 
649  | 0  |   case BASE64_INPUT_REMAINDER:  | 
650  | 0  |     return "BASE64_INPUT_REMAINDER";  | 
651  | 0  |   case BASE64_EXTRA_BITS:  | 
652  | 0  |     return "BASE64_EXTRA_BITS";  | 
653  | 0  |   case OUTPUT_BUFFER_TOO_SMALL:  | 
654  | 0  |     return "OUTPUT_BUFFER_TOO_SMALL";  | 
655  | 0  |   default:  | 
656  | 0  |     return "OTHER";  | 
657  | 0  |   }  | 
658  | 0  | }  | 
659  |  | #endif  | 
660  |  |  | 
661  |  | struct result { | 
662  |  |   error_code error;  | 
663  |  |   size_t count; // In case of error, indicates the position of the error. In  | 
664  |  |                 // case of success, indicates the number of code units  | 
665  |  |                 // validated/written.  | 
666  |  |  | 
667  |  |   simdutf_really_inline result() noexcept  | 
668  | 0  |       : error{error_code::SUCCESS}, count{0} {} | 
669  |  |  | 
670  |  |   simdutf_really_inline result(error_code err, size_t pos) noexcept  | 
671  | 0  |       : error{err}, count{pos} {} | 
672  |  |  | 
673  | 0  |   simdutf_really_inline bool is_ok() const noexcept { | 
674  | 0  |     return error == error_code::SUCCESS;  | 
675  | 0  |   }  | 
676  |  |  | 
677  | 0  |   simdutf_really_inline bool is_err() const noexcept { | 
678  | 0  |     return error != error_code::SUCCESS;  | 
679  | 0  |   }  | 
680  |  | };  | 
681  |  |  | 
682  |  | struct full_result { | 
683  |  |   error_code error;  | 
684  |  |   size_t input_count;  | 
685  |  |   size_t output_count;  | 
686  |  |   bool padding_error = false; // true if the error is due to padding, only  | 
687  |  |                               // meaningful when error is not SUCCESS  | 
688  |  |  | 
689  |  |   simdutf_really_inline full_result() noexcept  | 
690  | 0  |       : error{error_code::SUCCESS}, input_count{0}, output_count{0} {} | 
691  |  |  | 
692  |  |   simdutf_really_inline full_result(error_code err, size_t pos_in,  | 
693  |  |                                     size_t pos_out) noexcept  | 
694  | 0  |       : error{err}, input_count{pos_in}, output_count{pos_out} {} | 
695  |  |   simdutf_really_inline full_result(error_code err, size_t pos_in,  | 
696  |  |                                     size_t pos_out, bool padding_err) noexcept  | 
697  |  |       : error{err}, input_count{pos_in}, output_count{pos_out}, | 
698  | 0  |         padding_error{padding_err} {} | 
699  |  |  | 
700  | 0  |   simdutf_really_inline operator result() const noexcept { | 
701  | 0  |     if (error == error_code::SUCCESS) { | 
702  | 0  |       return result{error, output_count}; | 
703  | 0  |     } else { | 
704  | 0  |       return result{error, input_count}; | 
705  | 0  |     }  | 
706  | 0  |   }  | 
707  |  | };  | 
708  |  |  | 
709  |  | } // namespace simdutf  | 
710  |  | #endif  | 
711  |  | /* end file include/simdutf/error.h */  | 
712  |  |  | 
713  |  | SIMDUTF_PUSH_DISABLE_WARNINGS  | 
714  |  | SIMDUTF_DISABLE_UNDESIRED_WARNINGS  | 
715  |  |  | 
716  |  | // Public API  | 
717  |  | /* begin file include/simdutf/simdutf_version.h */  | 
718  |  | // /include/simdutf/simdutf_version.h automatically generated by release.py,  | 
719  |  | // do not change by hand  | 
720  |  | #ifndef SIMDUTF_SIMDUTF_VERSION_H  | 
721  |  | #define SIMDUTF_SIMDUTF_VERSION_H  | 
722  |  |  | 
723  |  | /** The version of simdutf being used (major.minor.revision) */  | 
724  | 72  | #define SIMDUTF_VERSION "7.3.3"  | 
725  |  |  | 
726  |  | namespace simdutf { | 
727  |  | enum { | 
728  |  |   /**  | 
729  |  |    * The major version (MAJOR.minor.revision) of simdutf being used.  | 
730  |  |    */  | 
731  |  |   SIMDUTF_VERSION_MAJOR = 7,  | 
732  |  |   /**  | 
733  |  |    * The minor version (major.MINOR.revision) of simdutf being used.  | 
734  |  |    */  | 
735  |  |   SIMDUTF_VERSION_MINOR = 3,  | 
736  |  |   /**  | 
737  |  |    * The revision (major.minor.REVISION) of simdutf being used.  | 
738  |  |    */  | 
739  |  |   SIMDUTF_VERSION_REVISION = 3  | 
740  |  | };  | 
741  |  | } // namespace simdutf  | 
742  |  |  | 
743  |  | #endif // SIMDUTF_SIMDUTF_VERSION_H  | 
744  |  | /* end file include/simdutf/simdutf_version.h */  | 
745  |  | /* begin file include/simdutf/implementation.h */  | 
746  |  | #ifndef SIMDUTF_IMPLEMENTATION_H  | 
747  |  | #define SIMDUTF_IMPLEMENTATION_H  | 
748  |  | #if !defined(SIMDUTF_NO_THREADS)  | 
749  |  |   #include <atomic>  | 
750  |  | #endif  | 
751  |  | #include <string>  | 
752  |  | #ifdef SIMDUTF_INTERNAL_TESTS  | 
753  |  |   #include <vector>  | 
754  |  | #endif  | 
755  |  | /* begin file include/simdutf/internal/isadetection.h */  | 
756  |  | /* From  | 
757  |  | https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h  | 
758  |  | Highly modified.  | 
759  |  |  | 
760  |  | Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)  | 
761  |  | Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)  | 
762  |  | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)  | 
763  |  | Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)  | 
764  |  | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)  | 
765  |  | Copyright (c) 2011-2013 NYU                      (Clement Farabet)  | 
766  |  | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,  | 
767  |  | Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute  | 
768  |  | (Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,  | 
769  |  | Samy Bengio, Johnny Mariethoz)  | 
770  |  |  | 
771  |  | All rights reserved.  | 
772  |  |  | 
773  |  | Redistribution and use in source and binary forms, with or without  | 
774  |  | modification, are permitted provided that the following conditions are met:  | 
775  |  |  | 
776  |  | 1. Redistributions of source code must retain the above copyright  | 
777  |  |    notice, this list of conditions and the following disclaimer.  | 
778  |  |  | 
779  |  | 2. Redistributions in binary form must reproduce the above copyright  | 
780  |  |    notice, this list of conditions and the following disclaimer in the  | 
781  |  |    documentation and/or other materials provided with the distribution.  | 
782  |  |  | 
783  |  | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories  | 
784  |  | America and IDIAP Research Institute nor the names of its contributors may be  | 
785  |  |    used to endorse or promote products derived from this software without  | 
786  |  |    specific prior written permission.  | 
787  |  |  | 
788  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  | 
789  |  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
790  |  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
791  |  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE  | 
792  |  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  | 
793  |  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF  | 
794  |  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  | 
795  |  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN  | 
796  |  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)  | 
797  |  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  | 
798  |  | POSSIBILITY OF SUCH DAMAGE.  | 
799  |  | */  | 
800  |  |  | 
801  |  | #ifndef SIMDutf_INTERNAL_ISADETECTION_H  | 
802  |  | #define SIMDutf_INTERNAL_ISADETECTION_H  | 
803  |  |  | 
804  |  | #include <cstdint>  | 
805  |  | #include <cstdlib>  | 
806  |  | #if defined(_MSC_VER)  | 
807  |  |   #include <intrin.h>  | 
808  |  | #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)  | 
809  |  |   #include <cpuid.h>  | 
810  |  | #endif  | 
811  |  |  | 
812  |  |  | 
813  |  | // RISC-V ISA detection utilities  | 
814  |  | #if SIMDUTF_IS_RISCV64 && defined(__linux__)  | 
815  |  |   #include <unistd.h> // for syscall  | 
816  |  | // We define these ourselves, for backwards compatibility  | 
817  |  | struct simdutf_riscv_hwprobe { | 
818  |  |   int64_t key;  | 
819  |  |   uint64_t value;  | 
820  |  | };  | 
821  |  |   #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)  | 
822  |  |   #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4  | 
823  |  |   #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)  | 
824  |  |   #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)  | 
825  |  | #endif // SIMDUTF_IS_RISCV64 && defined(__linux__)  | 
826  |  |  | 
827  |  | #if defined(__loongarch__) && defined(__linux__)  | 
828  |  |   #include <sys/auxv.h>  | 
829  |  | // bits/hwcap.h  | 
830  |  | // #define HWCAP_LOONGARCH_LSX             (1 << 4)  | 
831  |  | // #define HWCAP_LOONGARCH_LASX            (1 << 5)  | 
832  |  | #endif  | 
833  |  |  | 
834  |  | namespace simdutf { | 
835  |  | namespace internal { | 
836  |  |  | 
837  |  | enum instruction_set { | 
838  |  |   DEFAULT = 0x0,  | 
839  |  |   NEON = 0x1,  | 
840  |  |   AVX2 = 0x4,  | 
841  |  |   SSE42 = 0x8,  | 
842  |  |   PCLMULQDQ = 0x10,  | 
843  |  |   BMI1 = 0x20,  | 
844  |  |   BMI2 = 0x40,  | 
845  |  |   ALTIVEC = 0x80,  | 
846  |  |   AVX512F = 0x100,  | 
847  |  |   AVX512DQ = 0x200,  | 
848  |  |   AVX512IFMA = 0x400,  | 
849  |  |   AVX512PF = 0x800,  | 
850  |  |   AVX512ER = 0x1000,  | 
851  |  |   AVX512CD = 0x2000,  | 
852  |  |   AVX512BW = 0x4000,  | 
853  |  |   AVX512VL = 0x8000,  | 
854  |  |   AVX512VBMI2 = 0x10000,  | 
855  |  |   AVX512VPOPCNTDQ = 0x2000,  | 
856  |  |   RVV = 0x4000,  | 
857  |  |   ZVBB = 0x8000,  | 
858  |  |   LSX = 0x40000,  | 
859  |  |   LASX = 0x80000,  | 
860  |  | };  | 
861  |  |  | 
862  |  | #if defined(__PPC64__)  | 
863  |  |  | 
864  |  | static inline uint32_t detect_supported_architectures() { | 
865  |  |   return instruction_set::ALTIVEC;  | 
866  |  | }  | 
867  |  |  | 
868  |  | #elif SIMDUTF_IS_RISCV64  | 
869  |  |  | 
870  |  | static inline uint32_t detect_supported_architectures() { | 
871  |  |   uint32_t host_isa = instruction_set::DEFAULT;  | 
872  |  |   #if SIMDUTF_IS_RVV  | 
873  |  |   host_isa |= instruction_set::RVV;  | 
874  |  |   #endif  | 
875  |  |   #if SIMDUTF_IS_ZVBB  | 
876  |  |   host_isa |= instruction_set::ZVBB;  | 
877  |  |   #endif  | 
878  |  |   #if defined(__linux__)  | 
879  |  |   simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}}; | 
880  |  |   long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0,  | 
881  |  |                                    nullptr, 0);  | 
882  |  |   if (ret == 0) { | 
883  |  |     uint64_t extensions = probes[0].value;  | 
884  |  |     if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)  | 
885  |  |       host_isa |= instruction_set::RVV;  | 
886  |  |     if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)  | 
887  |  |       host_isa |= instruction_set::ZVBB;  | 
888  |  |   }  | 
889  |  |   #endif  | 
890  |  |   #if defined(RUN_IN_SPIKE_SIMULATOR)  | 
891  |  |   // Proxy Kernel does not implement yet hwprobe syscall  | 
892  |  |   host_isa |= instruction_set::RVV;  | 
893  |  |   #endif  | 
894  |  |   return host_isa;  | 
895  |  | }  | 
896  |  |  | 
897  |  | #elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)  | 
898  |  |  | 
899  |  | static inline uint32_t detect_supported_architectures() { | 
900  |  |   return instruction_set::NEON;  | 
901  |  | }  | 
902  |  |  | 
903  |  | #elif defined(__x86_64__) || defined(_M_AMD64) // x64  | 
904  |  |  | 
905  |  | namespace { | 
906  |  | namespace cpuid_bit { | 
907  |  | // Can be found on Intel ISA Reference for CPUID  | 
908  |  |  | 
909  |  | // EAX = 0x01  | 
910  |  | constexpr uint32_t pclmulqdq = uint32_t(1)  | 
911  |  |                                << 1; ///< @private bit  1 of ECX for EAX=0x1  | 
912  |  | constexpr uint32_t sse42 = uint32_t(1)  | 
913  |  |                            << 20; ///< @private bit 20 of ECX for EAX=0x1  | 
914  |  | constexpr uint32_t osxsave =  | 
915  |  |     (uint32_t(1) << 26) |  | 
916  |  |     (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1  | 
917  |  |  | 
918  |  | // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)  | 
919  |  | // See: "Table 3-8. Information Returned by CPUID Instruction"  | 
920  |  | namespace ebx { | 
921  |  | constexpr uint32_t bmi1 = uint32_t(1) << 3;  | 
922  |  | constexpr uint32_t avx2 = uint32_t(1) << 5;  | 
923  |  | constexpr uint32_t bmi2 = uint32_t(1) << 8;  | 
924  |  | constexpr uint32_t avx512f = uint32_t(1) << 16;  | 
925  |  | constexpr uint32_t avx512dq = uint32_t(1) << 17;  | 
926  |  | constexpr uint32_t avx512ifma = uint32_t(1) << 21;  | 
927  |  | constexpr uint32_t avx512cd = uint32_t(1) << 28;  | 
928  |  | constexpr uint32_t avx512bw = uint32_t(1) << 30;  | 
929  |  | constexpr uint32_t avx512vl = uint32_t(1) << 31;  | 
930  |  | } // namespace ebx  | 
931  |  |  | 
932  |  | namespace ecx { | 
933  |  | constexpr uint32_t avx512vbmi = uint32_t(1) << 1;  | 
934  |  | constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;  | 
935  |  | constexpr uint32_t avx512vnni = uint32_t(1) << 11;  | 
936  |  | constexpr uint32_t avx512bitalg = uint32_t(1) << 12;  | 
937  |  | constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;  | 
938  |  | } // namespace ecx  | 
939  |  | namespace edx { | 
940  |  | constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;  | 
941  |  | }  | 
942  |  | namespace xcr0_bit { | 
943  |  | constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX  | 
944  |  | constexpr uint64_t avx512_saved =  | 
945  |  |     uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM  | 
946  |  | } // namespace xcr0_bit  | 
947  |  | } // namespace cpuid_bit  | 
948  |  | } // namespace  | 
949  |  |  | 
950  |  | static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,  | 
951  | 0  |                          uint32_t *edx) { | 
952  | 0  |   #if defined(_MSC_VER)  | 
953  | 0  |   int cpu_info[4];  | 
954  | 0  |   __cpuidex(cpu_info, *eax, *ecx);  | 
955  | 0  |   *eax = cpu_info[0];  | 
956  | 0  |   *ebx = cpu_info[1];  | 
957  | 0  |   *ecx = cpu_info[2];  | 
958  | 0  |   *edx = cpu_info[3];  | 
959  | 0  |   #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)  | 
960  | 0  |   uint32_t level = *eax;  | 
961  | 0  |   __get_cpuid(level, eax, ebx, ecx, edx);  | 
962  | 0  |   #else  | 
963  | 0  |   uint32_t a = *eax, b, c = *ecx, d;  | 
964  | 0  |   asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); | 
965  | 0  |   *eax = a;  | 
966  | 0  |   *ebx = b;  | 
967  | 0  |   *ecx = c;  | 
968  | 0  |   *edx = d;  | 
969  | 0  |   #endif  | 
970  | 0  | } Unexecuted instantiation: node_buffer.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: node_builtins.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: node_i18n.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: node_metadata.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: string_bytes.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: util.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: inspector_profiler.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: main_thread_interface.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: node_string.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: encoding_binding.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*) Unexecuted instantiation: inspector_socket.cc:simdutf::internal::cpuid(unsigned int*, unsigned int*, unsigned int*, unsigned int*)  | 
971  |  |  | 
972  | 0  | static inline uint64_t xgetbv() { | 
973  | 0  |   #if defined(_MSC_VER)  | 
974  | 0  |   return _xgetbv(0);  | 
975  | 0  |   #else  | 
976  | 0  |   uint32_t xcr0_lo, xcr0_hi;  | 
977  | 0  |   asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0)); | 
978  | 0  |   return xcr0_lo | ((uint64_t)xcr0_hi << 32);  | 
979  | 0  |   #endif  | 
980  | 0  | } Unexecuted instantiation: node_buffer.cc:simdutf::internal::xgetbv() Unexecuted instantiation: node_builtins.cc:simdutf::internal::xgetbv() Unexecuted instantiation: node_i18n.cc:simdutf::internal::xgetbv() Unexecuted instantiation: node_metadata.cc:simdutf::internal::xgetbv() Unexecuted instantiation: string_bytes.cc:simdutf::internal::xgetbv() Unexecuted instantiation: util.cc:simdutf::internal::xgetbv() Unexecuted instantiation: inspector_profiler.cc:simdutf::internal::xgetbv() Unexecuted instantiation: main_thread_interface.cc:simdutf::internal::xgetbv() Unexecuted instantiation: node_string.cc:simdutf::internal::xgetbv() Unexecuted instantiation: encoding_binding.cc:simdutf::internal::xgetbv() Unexecuted instantiation: inspector_socket.cc:simdutf::internal::xgetbv()  | 
981  |  |  | 
982  | 0  | static inline uint32_t detect_supported_architectures() { | 
983  | 0  |   uint32_t eax;  | 
984  | 0  |   uint32_t ebx = 0;  | 
985  | 0  |   uint32_t ecx = 0;  | 
986  | 0  |   uint32_t edx = 0;  | 
987  | 0  |   uint32_t host_isa = 0x0;  | 
988  | 0  | 
  | 
989  | 0  |   // EBX for EAX=0x1  | 
990  | 0  |   eax = 0x1;  | 
991  | 0  |   cpuid(&eax, &ebx, &ecx, &edx);  | 
992  | 0  | 
  | 
993  | 0  |   if (ecx & cpuid_bit::sse42) { | 
994  | 0  |     host_isa |= instruction_set::SSE42;  | 
995  | 0  |   }  | 
996  | 0  | 
  | 
997  | 0  |   if (ecx & cpuid_bit::pclmulqdq) { | 
998  | 0  |     host_isa |= instruction_set::PCLMULQDQ;  | 
999  | 0  |   }  | 
1000  | 0  | 
  | 
1001  | 0  |   if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { | 
1002  | 0  |     return host_isa;  | 
1003  | 0  |   }  | 
1004  | 0  | 
  | 
1005  | 0  |   // xgetbv for checking if the OS saves registers  | 
1006  | 0  |   uint64_t xcr0 = xgetbv();  | 
1007  | 0  | 
  | 
1008  | 0  |   if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { | 
1009  | 0  |     return host_isa;  | 
1010  | 0  |   }  | 
1011  | 0  |   // ECX for EAX=0x7  | 
1012  | 0  |   eax = 0x7;  | 
1013  | 0  |   ecx = 0x0; // Sub-leaf = 0  | 
1014  | 0  |   cpuid(&eax, &ebx, &ecx, &edx);  | 
1015  | 0  |   if (ebx & cpuid_bit::ebx::avx2) { | 
1016  | 0  |     host_isa |= instruction_set::AVX2;  | 
1017  | 0  |   }  | 
1018  | 0  |   if (ebx & cpuid_bit::ebx::bmi1) { | 
1019  | 0  |     host_isa |= instruction_set::BMI1;  | 
1020  | 0  |   }  | 
1021  | 0  |   if (ebx & cpuid_bit::ebx::bmi2) { | 
1022  | 0  |     host_isa |= instruction_set::BMI2;  | 
1023  | 0  |   }  | 
1024  | 0  |   if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==  | 
1025  | 0  |         cpuid_bit::xcr0_bit::avx512_saved)) { | 
1026  | 0  |     return host_isa;  | 
1027  | 0  |   }  | 
1028  | 0  |   if (ebx & cpuid_bit::ebx::avx512f) { | 
1029  | 0  |     host_isa |= instruction_set::AVX512F;  | 
1030  | 0  |   }  | 
1031  | 0  |   if (ebx & cpuid_bit::ebx::avx512bw) { | 
1032  | 0  |     host_isa |= instruction_set::AVX512BW;  | 
1033  | 0  |   }  | 
1034  | 0  |   if (ebx & cpuid_bit::ebx::avx512cd) { | 
1035  | 0  |     host_isa |= instruction_set::AVX512CD;  | 
1036  | 0  |   }  | 
1037  | 0  |   if (ebx & cpuid_bit::ebx::avx512dq) { | 
1038  | 0  |     host_isa |= instruction_set::AVX512DQ;  | 
1039  | 0  |   }  | 
1040  | 0  |   if (ebx & cpuid_bit::ebx::avx512vl) { | 
1041  | 0  |     host_isa |= instruction_set::AVX512VL;  | 
1042  | 0  |   }  | 
1043  | 0  |   if (ecx & cpuid_bit::ecx::avx512vbmi2) { | 
1044  | 0  |     host_isa |= instruction_set::AVX512VBMI2;  | 
1045  | 0  |   }  | 
1046  | 0  |   if (ecx & cpuid_bit::ecx::avx512vpopcnt) { | 
1047  | 0  |     host_isa |= instruction_set::AVX512VPOPCNTDQ;  | 
1048  | 0  |   }  | 
1049  | 0  |   return host_isa;  | 
1050  | 0  | } Unexecuted instantiation: node_buffer.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: node_builtins.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: node_i18n.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: node_metadata.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: string_bytes.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: util.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: inspector_profiler.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: main_thread_interface.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: node_string.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: encoding_binding.cc:simdutf::internal::detect_supported_architectures() Unexecuted instantiation: inspector_socket.cc:simdutf::internal::detect_supported_architectures()  | 
1051  |  | #elif defined(__loongarch__)  | 
1052  |  |  | 
1053  |  | static inline uint32_t detect_supported_architectures() { | 
1054  |  |   uint32_t host_isa = instruction_set::DEFAULT;  | 
1055  |  |   #if defined(__linux__)  | 
1056  |  |   uint64_t hwcap = 0;  | 
1057  |  |   hwcap = getauxval(AT_HWCAP);  | 
1058  |  |   if (hwcap & HWCAP_LOONGARCH_LSX) { | 
1059  |  |     host_isa |= instruction_set::LSX;  | 
1060  |  |   }  | 
1061  |  |   if (hwcap & HWCAP_LOONGARCH_LASX) { | 
1062  |  |     host_isa |= instruction_set::LASX;  | 
1063  |  |   }  | 
1064  |  |   #endif  | 
1065  |  |   return host_isa;  | 
1066  |  | }  | 
1067  |  | #else // fallback  | 
1068  |  |  | 
1069  |  | // includes 32-bit ARM.  | 
1070  |  | static inline uint32_t detect_supported_architectures() { | 
1071  |  |   return instruction_set::DEFAULT;  | 
1072  |  | }  | 
1073  |  |  | 
1074  |  | #endif // end SIMD extension detection code  | 
1075  |  |  | 
1076  |  | } // namespace internal  | 
1077  |  | } // namespace simdutf  | 
1078  |  |  | 
1079  |  | #endif // SIMDutf_INTERNAL_ISADETECTION_H  | 
1080  |  | /* end file include/simdutf/internal/isadetection.h */  | 
1081  |  |  | 
1082  |  | #if SIMDUTF_SPAN  | 
1083  |  |   #include <concepts>  | 
1084  |  |   #include <type_traits>  | 
1085  |  |   #include <span>  | 
1086  |  |   #include <tuple>  | 
1087  |  | #endif  | 
1088  |  | #if SIMDUTF_CPLUSPLUS17  | 
1089  |  |   #include <string_view>  | 
1090  |  | #endif  | 
1091  |  | // The following defines are conditionally enabled/disabled during amalgamation.  | 
1092  |  | // By default all features are enabled, regular code shouldn't check them. Only  | 
1093  |  | // when user code really relies of a selected subset, it's good to verify these  | 
1094  |  | // flags, like:  | 
1095  |  | //  | 
1096  |  | //      #if !SIMDUTF_FEATURE_UTF16  | 
1097  |  | //      #   error("Please amalgamate simdutf with UTF-16 support") | 
1098  |  | //      #endif  | 
1099  |  | //  | 
1100  |  | #define SIMDUTF_FEATURE_DETECT_ENCODING 1  | 
1101  |  | #define SIMDUTF_FEATURE_ASCII 1  | 
1102  |  | #define SIMDUTF_FEATURE_LATIN1 1  | 
1103  |  | #define SIMDUTF_FEATURE_UTF8 1  | 
1104  |  | #define SIMDUTF_FEATURE_UTF16 1  | 
1105  |  | #define SIMDUTF_FEATURE_UTF32 1  | 
1106  |  | #define SIMDUTF_FEATURE_BASE64 1  | 
1107  |  |  | 
1108  |  | namespace simdutf { | 
1109  |  |  | 
1110  |  | #if SIMDUTF_SPAN  | 
1111  |  | /// helpers placed in namespace detail are not a part of the public API  | 
1112  |  | namespace detail { | 
1113  |  | /**  | 
1114  |  |  * matches a byte, in the many ways C++ allows. note that these  | 
1115  |  |  * are all distinct types.  | 
1116  |  |  */  | 
1117  |  | template <typename T>  | 
1118  |  | concept byte_like = std::is_same_v<T, std::byte> ||   //  | 
1119  |  |                     std::is_same_v<T, char> ||        //  | 
1120  |  |                     std::is_same_v<T, signed char> || //  | 
1121  |  |                     std::is_same_v<T, unsigned char>;  | 
1122  |  |  | 
1123  |  | template <typename T>  | 
1124  |  | concept is_byte_like = byte_like<std::remove_cvref_t<T>>;  | 
1125  |  |  | 
1126  |  | template <typename T>  | 
1127  |  | concept is_pointer = std::is_pointer_v<T>;  | 
1128  |  |  | 
1129  |  | /**  | 
1130  |  |  * matches anything that behaves like std::span and points to character-like  | 
1131  |  |  * data such as: std::byte, char, unsigned char, signed char, std::int8_t,  | 
1132  |  |  * std::uint8_t  | 
1133  |  |  */  | 
1134  |  | template <typename T>  | 
1135  |  | concept input_span_of_byte_like = requires(const T &t) { | 
1136  |  |   { t.size() } noexcept -> std::convertible_to<std::size_t>; | 
1137  |  |   { t.data() } noexcept -> is_pointer; | 
1138  |  |   { *t.data() } noexcept -> is_byte_like; | 
1139  |  | };  | 
1140  |  |  | 
1141  |  | template <typename T>  | 
1142  |  | concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;  | 
1143  |  |  | 
1144  |  | /**  | 
1145  |  |  * like span_of_byte_like, but for an output span (intended to be written to)  | 
1146  |  |  */  | 
1147  |  | template <typename T>  | 
1148  |  | concept output_span_of_byte_like = requires(T &t) { | 
1149  |  |   { t.size() } noexcept -> std::convertible_to<std::size_t>; | 
1150  |  |   { t.data() } noexcept -> is_pointer; | 
1151  |  |   { *t.data() } noexcept -> is_byte_like; | 
1152  |  |   { *t.data() } noexcept -> is_mutable; | 
1153  |  | };  | 
1154  |  | } // namespace detail  | 
1155  |  | #endif  | 
1156  |  |  | 
1157  |  | #if SIMDUTF_FEATURE_DETECT_ENCODING  | 
1158  |  | /**  | 
1159  |  |  * Autodetect the encoding of the input, a single encoding is recommended.  | 
1160  |  |  * E.g., the function might return simdutf::encoding_type::UTF8,  | 
1161  |  |  * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or  | 
1162  |  |  * simdutf::encoding_type::UTF32_LE.  | 
1163  |  |  *  | 
1164  |  |  * @param input the string to analyze.  | 
1165  |  |  * @param length the length of the string in bytes.  | 
1166  |  |  * @return the detected encoding type  | 
1167  |  |  */  | 
1168  |  | simdutf_warn_unused simdutf::encoding_type  | 
1169  |  | autodetect_encoding(const char *input, size_t length) noexcept;  | 
1170  |  | simdutf_really_inline simdutf_warn_unused simdutf::encoding_type  | 
1171  | 0  | autodetect_encoding(const uint8_t *input, size_t length) noexcept { | 
1172  | 0  |   return autodetect_encoding(reinterpret_cast<const char *>(input), length);  | 
1173  | 0  | }  | 
1174  |  |   #if SIMDUTF_SPAN  | 
1175  |  | /**  | 
1176  |  |  * Autodetect the encoding of the input, a single encoding is recommended.  | 
1177  |  |  * E.g., the function might return simdutf::encoding_type::UTF8,  | 
1178  |  |  * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or  | 
1179  |  |  * simdutf::encoding_type::UTF32_LE.  | 
1180  |  |  *  | 
1181  |  |  * @param input the string to analyze. can be a anything span-like that has a  | 
1182  |  |  * data() and size() that points to character data: std::string,  | 
1183  |  |  * std::string_view, std::vector<char>, std::span<const std::byte> etc.  | 
1184  |  |  * @return the detected encoding type  | 
1185  |  |  */  | 
1186  |  | simdutf_really_inline simdutf_warn_unused simdutf::encoding_type  | 
1187  |  | autodetect_encoding(  | 
1188  |  |     const detail::input_span_of_byte_like auto &input) noexcept { | 
1189  |  |   return autodetect_encoding(reinterpret_cast<const char *>(input.data()),  | 
1190  |  |                              input.size());  | 
1191  |  | }  | 
1192  |  |   #endif // SIMDUTF_SPAN  | 
1193  |  |  | 
1194  |  | /**  | 
1195  |  |  * Autodetect the possible encodings of the input in one pass.  | 
1196  |  |  * E.g., if the input might be UTF-16LE or UTF-8, this function returns  | 
1197  |  |  * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE).  | 
1198  |  |  *  | 
1199  |  |  * Overridden by each implementation.  | 
1200  |  |  *  | 
1201  |  |  * @param input the string to analyze.  | 
1202  |  |  * @param length the length of the string in bytes.  | 
1203  |  |  * @return the detected encoding type  | 
1204  |  |  */  | 
1205  |  | simdutf_warn_unused int detect_encodings(const char *input,  | 
1206  |  |                                          size_t length) noexcept;  | 
1207  |  | simdutf_really_inline simdutf_warn_unused int  | 
1208  | 0  | detect_encodings(const uint8_t *input, size_t length) noexcept { | 
1209  | 0  |   return detect_encodings(reinterpret_cast<const char *>(input), length);  | 
1210  | 0  | }  | 
1211  |  |   #if SIMDUTF_SPAN  | 
1212  |  | simdutf_really_inline simdutf_warn_unused int  | 
1213  |  | detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { | 
1214  |  |   return detect_encodings(reinterpret_cast<const char *>(input.data()),  | 
1215  |  |                           input.size());  | 
1216  |  | }  | 
1217  |  |   #endif // SIMDUTF_SPAN  | 
1218  |  | #endif   // SIMDUTF_FEATURE_DETECT_ENCODING  | 
1219  |  |  | 
1220  |  | #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1221  |  | /**  | 
1222  |  |  * Validate the UTF-8 string. This function may be best when you expect  | 
1223  |  |  * the input to be almost always valid. Otherwise, consider using  | 
1224  |  |  * validate_utf8_with_errors.  | 
1225  |  |  *  | 
1226  |  |  * Overridden by each implementation.  | 
1227  |  |  *  | 
1228  |  |  * @param buf the UTF-8 string to validate.  | 
1229  |  |  * @param len the length of the string in bytes.  | 
1230  |  |  * @return true if and only if the string is valid UTF-8.  | 
1231  |  |  */  | 
1232  |  | simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;  | 
1233  |  |   #if SIMDUTF_SPAN  | 
1234  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1235  |  | validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { | 
1236  |  |   return validate_utf8(reinterpret_cast<const char *>(input.data()),  | 
1237  |  |                        input.size());  | 
1238  |  | }  | 
1239  |  |   #endif // SIMDUTF_SPAN  | 
1240  |  | #endif   // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1241  |  |  | 
1242  |  | #if SIMDUTF_FEATURE_UTF8  | 
1243  |  | /**  | 
1244  |  |  * Validate the UTF-8 string and stop on error.  | 
1245  |  |  *  | 
1246  |  |  * Overridden by each implementation.  | 
1247  |  |  *  | 
1248  |  |  * @param buf the UTF-8 string to validate.  | 
1249  |  |  * @param len the length of the string in bytes.  | 
1250  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1251  |  |  * fields error and count) with an error code and either position of the error  | 
1252  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1253  |  |  * successful.  | 
1254  |  |  */  | 
1255  |  | simdutf_warn_unused result validate_utf8_with_errors(const char *buf,  | 
1256  |  |                                                      size_t len) noexcept;  | 
1257  |  |   #if SIMDUTF_SPAN  | 
1258  |  | simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors(  | 
1259  |  |     const detail::input_span_of_byte_like auto &input) noexcept { | 
1260  |  |   return validate_utf8_with_errors(reinterpret_cast<const char *>(input.data()),  | 
1261  |  |                                    input.size());  | 
1262  |  | }  | 
1263  |  |   #endif // SIMDUTF_SPAN  | 
1264  |  | #endif   // SIMDUTF_FEATURE_UTF8  | 
1265  |  |  | 
1266  |  | #if SIMDUTF_FEATURE_ASCII  | 
1267  |  | /**  | 
1268  |  |  * Validate the ASCII string.  | 
1269  |  |  *  | 
1270  |  |  * Overridden by each implementation.  | 
1271  |  |  *  | 
1272  |  |  * @param buf the ASCII string to validate.  | 
1273  |  |  * @param len the length of the string in bytes.  | 
1274  |  |  * @return true if and only if the string is valid ASCII.  | 
1275  |  |  */  | 
1276  |  | simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;  | 
1277  |  |   #if SIMDUTF_SPAN  | 
1278  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1279  |  | validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { | 
1280  |  |   return validate_ascii(reinterpret_cast<const char *>(input.data()),  | 
1281  |  |                         input.size());  | 
1282  |  | }  | 
1283  |  |   #endif // SIMDUTF_SPAN  | 
1284  |  |  | 
1285  |  | /**  | 
1286  |  |  * Validate the ASCII string and stop on error. It might be faster than  | 
1287  |  |  * validate_utf8 when an error is expected to occur early.  | 
1288  |  |  *  | 
1289  |  |  * Overridden by each implementation.  | 
1290  |  |  *  | 
1291  |  |  * @param buf the ASCII string to validate.  | 
1292  |  |  * @param len the length of the string in bytes.  | 
1293  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1294  |  |  * fields error and count) with an error code and either position of the error  | 
1295  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1296  |  |  * successful.  | 
1297  |  |  */  | 
1298  |  | simdutf_warn_unused result validate_ascii_with_errors(const char *buf,  | 
1299  |  |                                                       size_t len) noexcept;  | 
1300  |  |   #if SIMDUTF_SPAN  | 
1301  |  | simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors(  | 
1302  |  |     const detail::input_span_of_byte_like auto &input) noexcept { | 
1303  |  |   return validate_ascii_with_errors(  | 
1304  |  |       reinterpret_cast<const char *>(input.data()), input.size());  | 
1305  |  | }  | 
1306  |  |   #endif // SIMDUTF_SPAN  | 
1307  |  | #endif   // SIMDUTF_FEATURE_ASCII  | 
1308  |  |  | 
1309  |  | #if SIMDUTF_FEATURE_UTF16  | 
1310  |  | /**  | 
1311  |  |  * Using native endianness; Validate the UTF-16 string.  | 
1312  |  |  * This function may be best when you expect the input to be almost always  | 
1313  |  |  * valid. Otherwise, consider using validate_utf16_with_errors.  | 
1314  |  |  *  | 
1315  |  |  * Overridden by each implementation.  | 
1316  |  |  *  | 
1317  |  |  * This function is not BOM-aware.  | 
1318  |  |  *  | 
1319  |  |  * @param buf the UTF-16 string to validate.  | 
1320  |  |  * @param len the length of the string in number of 2-byte code units  | 
1321  |  |  * (char16_t).  | 
1322  |  |  * @return true if and only if the string is valid UTF-16.  | 
1323  |  |  */  | 
1324  |  | simdutf_warn_unused bool validate_utf16(const char16_t *buf,  | 
1325  |  |                                         size_t len) noexcept;  | 
1326  |  |   #if SIMDUTF_SPAN  | 
1327  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1328  | 0  | validate_utf16(std::span<const char16_t> input) noexcept { | 
1329  | 0  |   return validate_utf16(input.data(), input.size());  | 
1330  | 0  | }  | 
1331  |  |   #endif // SIMDUTF_SPAN  | 
1332  |  | #endif   // SIMDUTF_FEATURE_UTF16  | 
1333  |  |  | 
1334  |  | #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1335  |  | /**  | 
1336  |  |  * Validate the UTF-16LE string. This function may be best when you expect  | 
1337  |  |  * the input to be almost always valid. Otherwise, consider using  | 
1338  |  |  * validate_utf16le_with_errors.  | 
1339  |  |  *  | 
1340  |  |  * Overridden by each implementation.  | 
1341  |  |  *  | 
1342  |  |  * This function is not BOM-aware.  | 
1343  |  |  *  | 
1344  |  |  * @param buf the UTF-16LE string to validate.  | 
1345  |  |  * @param len the length of the string in number of 2-byte code units  | 
1346  |  |  * (char16_t).  | 
1347  |  |  * @return true if and only if the string is valid UTF-16LE.  | 
1348  |  |  */  | 
1349  |  | simdutf_warn_unused bool validate_utf16le(const char16_t *buf,  | 
1350  |  |                                           size_t len) noexcept;  | 
1351  |  |   #if SIMDUTF_SPAN  | 
1352  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1353  | 0  | validate_utf16le(std::span<const char16_t> input) noexcept { | 
1354  | 0  |   return validate_utf16le(input.data(), input.size());  | 
1355  | 0  | }  | 
1356  |  |   #endif // SIMDUTF_SPAN  | 
1357  |  | #endif   // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1358  |  |  | 
1359  |  | #if SIMDUTF_FEATURE_UTF16  | 
1360  |  | /**  | 
1361  |  |  * Validate the UTF-16BE string. This function may be best when you expect  | 
1362  |  |  * the input to be almost always valid. Otherwise, consider using  | 
1363  |  |  * validate_utf16be_with_errors.  | 
1364  |  |  *  | 
1365  |  |  * Overridden by each implementation.  | 
1366  |  |  *  | 
1367  |  |  * This function is not BOM-aware.  | 
1368  |  |  *  | 
1369  |  |  * @param buf the UTF-16BE string to validate.  | 
1370  |  |  * @param len the length of the string in number of 2-byte code units  | 
1371  |  |  * (char16_t).  | 
1372  |  |  * @return true if and only if the string is valid UTF-16BE.  | 
1373  |  |  */  | 
1374  |  | simdutf_warn_unused bool validate_utf16be(const char16_t *buf,  | 
1375  |  |                                           size_t len) noexcept;  | 
1376  |  |   #if SIMDUTF_SPAN  | 
1377  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1378  | 0  | validate_utf16be(std::span<const char16_t> input) noexcept { | 
1379  | 0  |   return validate_utf16be(input.data(), input.size());  | 
1380  | 0  | }  | 
1381  |  |   #endif // SIMDUTF_SPAN  | 
1382  |  |  | 
1383  |  | /**  | 
1384  |  |  * Using native endianness; Validate the UTF-16 string and stop on error.  | 
1385  |  |  * It might be faster than validate_utf16 when an error is expected to occur  | 
1386  |  |  * early.  | 
1387  |  |  *  | 
1388  |  |  * Overridden by each implementation.  | 
1389  |  |  *  | 
1390  |  |  * This function is not BOM-aware.  | 
1391  |  |  *  | 
1392  |  |  * @param buf the UTF-16 string to validate.  | 
1393  |  |  * @param len the length of the string in number of 2-byte code units  | 
1394  |  |  * (char16_t).  | 
1395  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1396  |  |  * fields error and count) with an error code and either position of the error  | 
1397  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1398  |  |  * successful.  | 
1399  |  |  */  | 
1400  |  | simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,  | 
1401  |  |                                                       size_t len) noexcept;  | 
1402  |  |   #if SIMDUTF_SPAN  | 
1403  |  | simdutf_really_inline simdutf_warn_unused result  | 
1404  | 0  | validate_utf16_with_errors(std::span<const char16_t> input) noexcept { | 
1405  | 0  |   return validate_utf16_with_errors(input.data(), input.size());  | 
1406  | 0  | }  | 
1407  |  |   #endif // SIMDUTF_SPAN  | 
1408  |  |  | 
1409  |  | /**  | 
1410  |  |  * Validate the UTF-16LE string and stop on error. It might be faster than  | 
1411  |  |  * validate_utf16le when an error is expected to occur early.  | 
1412  |  |  *  | 
1413  |  |  * Overridden by each implementation.  | 
1414  |  |  *  | 
1415  |  |  * This function is not BOM-aware.  | 
1416  |  |  *  | 
1417  |  |  * @param buf the UTF-16LE string to validate.  | 
1418  |  |  * @param len the length of the string in number of 2-byte code units  | 
1419  |  |  * (char16_t).  | 
1420  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1421  |  |  * fields error and count) with an error code and either position of the error  | 
1422  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1423  |  |  * successful.  | 
1424  |  |  */  | 
1425  |  | simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,  | 
1426  |  |                                                         size_t len) noexcept;  | 
1427  |  |   #if SIMDUTF_SPAN  | 
1428  |  | simdutf_really_inline simdutf_warn_unused result  | 
1429  | 0  | validate_utf16le_with_errors(std::span<const char16_t> input) noexcept { | 
1430  | 0  |   return validate_utf16le_with_errors(input.data(), input.size());  | 
1431  | 0  | }  | 
1432  |  |   #endif // SIMDUTF_SPAN  | 
1433  |  |  | 
1434  |  | /**  | 
1435  |  |  * Validate the UTF-16BE string and stop on error. It might be faster than  | 
1436  |  |  * validate_utf16be when an error is expected to occur early.  | 
1437  |  |  *  | 
1438  |  |  * Overridden by each implementation.  | 
1439  |  |  *  | 
1440  |  |  * This function is not BOM-aware.  | 
1441  |  |  *  | 
1442  |  |  * @param buf the UTF-16BE string to validate.  | 
1443  |  |  * @param len the length of the string in number of 2-byte code units  | 
1444  |  |  * (char16_t).  | 
1445  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1446  |  |  * fields error and count) with an error code and either position of the error  | 
1447  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1448  |  |  * successful.  | 
1449  |  |  */  | 
1450  |  | simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,  | 
1451  |  |                                                         size_t len) noexcept;  | 
1452  |  |   #if SIMDUTF_SPAN  | 
1453  |  | simdutf_really_inline simdutf_warn_unused result  | 
1454  | 0  | validate_utf16be_with_errors(std::span<const char16_t> input) noexcept { | 
1455  | 0  |   return validate_utf16be_with_errors(input.data(), input.size());  | 
1456  | 0  | }  | 
1457  |  |   #endif // SIMDUTF_SPAN  | 
1458  |  |  | 
1459  |  | /**  | 
1460  |  |  * Fixes an ill-formed UTF-16LE string by replacing mismatched surrogates with  | 
1461  |  |  * the Unicode replacement character U+FFFD. If input and output points to  | 
1462  |  |  * different memory areas, the procedure copies string, and it's expected that  | 
1463  |  |  * output memory is at least as big as the input. It's also possible to set  | 
1464  |  |  * input equal output, that makes replacements an in-place operation.  | 
1465  |  |  *  | 
1466  |  |  * @param input the UTF-16LE string to correct.  | 
1467  |  |  * @param len the length of the string in number of 2-byte code units  | 
1468  |  |  * (char16_t).  | 
1469  |  |  * @param output the output buffer.  | 
1470  |  |  */  | 
1471  |  | void to_well_formed_utf16le(const char16_t *input, size_t len,  | 
1472  |  |                             char16_t *output) noexcept;  | 
1473  |  |   #if SIMDUTF_SPAN  | 
1474  |  | simdutf_really_inline void  | 
1475  |  | to_well_formed_utf16le(std::span<const char16_t> input,  | 
1476  | 0  |                        std::span<char16_t> output) noexcept { | 
1477  | 0  |   to_well_formed_utf16le(input.data(), input.size(), output.data());  | 
1478  | 0  | }  | 
1479  |  |   #endif // SIMDUTF_SPAN  | 
1480  |  |  | 
1481  |  | /**  | 
1482  |  |  * Fixes an ill-formed UTF-16BE string by replacing mismatched surrogates with  | 
1483  |  |  * the Unicode replacement character U+FFFD. If input and output points to  | 
1484  |  |  * different memory areas, the procedure copies string, and it's expected that  | 
1485  |  |  * output memory is at least as big as the input. It's also possible to set  | 
1486  |  |  * input equal output, that makes replacements an in-place operation.  | 
1487  |  |  *  | 
1488  |  |  * @param input the UTF-16BE string to correct.  | 
1489  |  |  * @param len the length of the string in number of 2-byte code units  | 
1490  |  |  * (char16_t).  | 
1491  |  |  * @param output the output buffer.  | 
1492  |  |  */  | 
1493  |  | void to_well_formed_utf16be(const char16_t *input, size_t len,  | 
1494  |  |                             char16_t *output) noexcept;  | 
1495  |  |   #if SIMDUTF_SPAN  | 
1496  |  | simdutf_really_inline void  | 
1497  |  | to_well_formed_utf16be(std::span<const char16_t> input,  | 
1498  | 0  |                        std::span<char16_t> output) noexcept { | 
1499  | 0  |   to_well_formed_utf16be(input.data(), input.size(), output.data());  | 
1500  | 0  | }  | 
1501  |  |   #endif // SIMDUTF_SPAN  | 
1502  |  |  | 
1503  |  | /**  | 
1504  |  |  * Fixes an ill-formed UTF-16 string by replacing mismatched surrogates with the  | 
1505  |  |  * Unicode replacement character U+FFFD. If input and output points to different  | 
1506  |  |  * memory areas, the procedure copies string, and it's expected that output  | 
1507  |  |  * memory is at least as big as the input. It's also possible to set input equal  | 
1508  |  |  * output, that makes replacements an in-place operation.  | 
1509  |  |  *  | 
1510  |  |  * @param input the UTF-16 string to correct.  | 
1511  |  |  * @param len the length of the string in number of 2-byte code units  | 
1512  |  |  * (char16_t).  | 
1513  |  |  * @param output the output buffer.  | 
1514  |  |  */  | 
1515  |  | void to_well_formed_utf16(const char16_t *input, size_t len,  | 
1516  |  |                           char16_t *output) noexcept;  | 
1517  |  |   #if SIMDUTF_SPAN  | 
1518  |  | simdutf_really_inline void  | 
1519  |  | to_well_formed_utf16(std::span<const char16_t> input,  | 
1520  | 0  |                      std::span<char16_t> output) noexcept { | 
1521  | 0  |   to_well_formed_utf16(input.data(), input.size(), output.data());  | 
1522  | 0  | }  | 
1523  |  |   #endif // SIMDUTF_SPAN  | 
1524  |  |  | 
1525  |  | #endif // SIMDUTF_FEATURE_UTF16  | 
1526  |  |  | 
1527  |  | #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1528  |  | /**  | 
1529  |  |  * Validate the UTF-32 string. This function may be best when you expect  | 
1530  |  |  * the input to be almost always valid. Otherwise, consider using  | 
1531  |  |  * validate_utf32_with_errors.  | 
1532  |  |  *  | 
1533  |  |  * Overridden by each implementation.  | 
1534  |  |  *  | 
1535  |  |  * This function is not BOM-aware.  | 
1536  |  |  *  | 
1537  |  |  * @param buf the UTF-32 string to validate.  | 
1538  |  |  * @param len the length of the string in number of 4-byte code units  | 
1539  |  |  * (char32_t).  | 
1540  |  |  * @return true if and only if the string is valid UTF-32.  | 
1541  |  |  */  | 
1542  |  | simdutf_warn_unused bool validate_utf32(const char32_t *buf,  | 
1543  |  |                                         size_t len) noexcept;  | 
1544  |  |   #if SIMDUTF_SPAN  | 
1545  |  | simdutf_really_inline simdutf_warn_unused bool  | 
1546  | 0  | validate_utf32(std::span<const char32_t> input) noexcept { | 
1547  | 0  |   return validate_utf32(input.data(), input.size());  | 
1548  | 0  | }  | 
1549  |  |   #endif // SIMDUTF_SPAN  | 
1550  |  | #endif   // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
1551  |  |  | 
1552  |  | #if SIMDUTF_FEATURE_UTF32  | 
1553  |  | /**  | 
1554  |  |  * Validate the UTF-32 string and stop on error. It might be faster than  | 
1555  |  |  * validate_utf32 when an error is expected to occur early.  | 
1556  |  |  *  | 
1557  |  |  * Overridden by each implementation.  | 
1558  |  |  *  | 
1559  |  |  * This function is not BOM-aware.  | 
1560  |  |  *  | 
1561  |  |  * @param buf the UTF-32 string to validate.  | 
1562  |  |  * @param len the length of the string in number of 4-byte code units  | 
1563  |  |  * (char32_t).  | 
1564  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1565  |  |  * fields error and count) with an error code and either position of the error  | 
1566  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1567  |  |  * successful.  | 
1568  |  |  */  | 
1569  |  | simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,  | 
1570  |  |                                                       size_t len) noexcept;  | 
1571  |  |   #if SIMDUTF_SPAN  | 
1572  |  | simdutf_really_inline simdutf_warn_unused result  | 
1573  | 0  | validate_utf32_with_errors(std::span<const char32_t> input) noexcept { | 
1574  | 0  |   return validate_utf32_with_errors(input.data(), input.size());  | 
1575  | 0  | }  | 
1576  |  |   #endif // SIMDUTF_SPAN  | 
1577  |  | #endif   // SIMDUTF_FEATURE_UTF32  | 
1578  |  |  | 
1579  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1580  |  | /**  | 
1581  |  |  * Convert Latin1 string into UTF-8 string.  | 
1582  |  |  *  | 
1583  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1584  |  |  *  | 
1585  |  |  * @param input         the Latin1 string to convert  | 
1586  |  |  * @param length        the length of the string in bytes  | 
1587  |  |  * @param utf8_output   the pointer to buffer that can hold conversion result  | 
1588  |  |  * @return the number of written char; 0 if conversion is not possible  | 
1589  |  |  */  | 
1590  |  | simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,  | 
1591  |  |                                                   size_t length,  | 
1592  |  |                                                   char *utf8_output) noexcept;  | 
1593  |  |   #if SIMDUTF_SPAN  | 
1594  |  | simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8(  | 
1595  |  |     const detail::input_span_of_byte_like auto &latin1_input,  | 
1596  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
1597  |  |   return convert_latin1_to_utf8(  | 
1598  |  |       reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),  | 
1599  |  |       utf8_output.data());  | 
1600  |  | }  | 
1601  |  |   #endif // SIMDUTF_SPAN  | 
1602  |  |  | 
1603  |  | /**  | 
1604  |  |  * Convert Latin1 string into UTF-8 string with output limit.  | 
1605  |  |  *  | 
1606  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1607  |  |  *  | 
1608  |  |  * We write as many characters as possible.  | 
1609  |  |  *  | 
1610  |  |  * @param input         the Latin1 string to convert  | 
1611  |  |  * @param length        the length of the string in bytes  | 
1612  |  |  * @param utf8_output   the pointer to buffer that can hold conversion result  | 
1613  |  |  * @param utf8_len      the maximum output length  | 
1614  |  |  * @return the number of written char; 0 if conversion is not possible  | 
1615  |  |  */  | 
1616  |  | simdutf_warn_unused size_t  | 
1617  |  | convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,  | 
1618  |  |                             size_t utf8_len) noexcept;  | 
1619  |  |   #if SIMDUTF_SPAN  | 
1620  |  | simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe(  | 
1621  |  |     const detail::input_span_of_byte_like auto &input,  | 
1622  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
1623  |  |   // implementation note: outputspan is a forwarding ref to avoid copying and  | 
1624  |  |   // allow both lvalues and rvalues. std::span can be copied without problems,  | 
1625  |  |   // but std::vector should not, and this function should accept both. it will  | 
1626  |  |   // allow using an owning rvalue ref (example: passing a temporary std::string)  | 
1627  |  |   // as output, but the user will quickly find out that he has no way of getting  | 
1628  |  |   // the data out of the object in that case.  | 
1629  |  |   return convert_latin1_to_utf8_safe(  | 
1630  |  |       input.data(), input.size(), reinterpret_cast<char *>(utf8_output.data()),  | 
1631  |  |       utf8_output.size());  | 
1632  |  | }  | 
1633  |  |   #endif // SIMDUTF_SPAN  | 
1634  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1635  |  |  | 
1636  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
1637  |  | /**  | 
1638  |  |  * Convert possibly Latin1 string into UTF-16LE string.  | 
1639  |  |  *  | 
1640  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1641  |  |  *  | 
1642  |  |  * @param input         the Latin1 string to convert  | 
1643  |  |  * @param length        the length of the string in bytes  | 
1644  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1645  |  |  * @return the number of written char16_t; 0 if conversion is not possible  | 
1646  |  |  */  | 
1647  |  | simdutf_warn_unused size_t convert_latin1_to_utf16le(  | 
1648  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1649  |  |   #if SIMDUTF_SPAN  | 
1650  |  | simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le(  | 
1651  |  |     const detail::input_span_of_byte_like auto &latin1_input,  | 
1652  |  |     std::span<char16_t> utf16_output) noexcept { | 
1653  |  |   return convert_latin1_to_utf16le(  | 
1654  |  |       reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),  | 
1655  |  |       utf16_output.data());  | 
1656  |  | }  | 
1657  |  |   #endif // SIMDUTF_SPAN  | 
1658  |  |  | 
1659  |  | /**  | 
1660  |  |  * Convert Latin1 string into UTF-16BE string.  | 
1661  |  |  *  | 
1662  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1663  |  |  *  | 
1664  |  |  * @param input         the Latin1 string to convert  | 
1665  |  |  * @param length        the length of the string in bytes  | 
1666  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1667  |  |  * @return the number of written char16_t; 0 if conversion is not possible  | 
1668  |  |  */  | 
1669  |  | simdutf_warn_unused size_t convert_latin1_to_utf16be(  | 
1670  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1671  |  |   #if SIMDUTF_SPAN  | 
1672  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1673  |  | convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,  | 
1674  |  |                           std::span<char16_t> output) noexcept { | 
1675  |  |   return convert_latin1_to_utf16be(reinterpret_cast<const char *>(input.data()),  | 
1676  |  |                                    input.size(), output.data());  | 
1677  |  | }  | 
1678  |  |   #endif // SIMDUTF_SPAN  | 
1679  |  | /**  | 
1680  |  |  * Compute the number of bytes that this UTF-16 string would require in Latin1  | 
1681  |  |  * format.  | 
1682  |  |  *  | 
1683  |  |  * @param length        the length of the string in Latin1 code units (char)  | 
1684  |  |  * @return the length of the string in Latin1 code units (char) required to  | 
1685  |  |  * encode the UTF-16 string as Latin1  | 
1686  |  |  */  | 
1687  |  | simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;  | 
1688  |  |  | 
1689  |  | /**  | 
1690  |  |  * Compute the number of code units that this Latin1 string would require in  | 
1691  |  |  * UTF-16 format.  | 
1692  |  |  *  | 
1693  |  |  * @param length        the length of the string in Latin1 code units (char)  | 
1694  |  |  * @return the length of the string in 2-byte code units (char16_t) required to  | 
1695  |  |  * encode the Latin1 string as UTF-16  | 
1696  |  |  */  | 
1697  |  | simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept;  | 
1698  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
1699  |  |  | 
1700  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
1701  |  | /**  | 
1702  |  |  * Convert Latin1 string into UTF-32 string.  | 
1703  |  |  *  | 
1704  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1705  |  |  *  | 
1706  |  |  * @param input         the Latin1 string to convert  | 
1707  |  |  * @param length        the length of the string in bytes  | 
1708  |  |  * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
1709  |  |  * @return the number of written char32_t; 0 if conversion is not possible  | 
1710  |  |  */  | 
1711  |  | simdutf_warn_unused size_t convert_latin1_to_utf32(  | 
1712  |  |     const char *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
1713  |  |   #if SIMDUTF_SPAN  | 
1714  |  | simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32(  | 
1715  |  |     const detail::input_span_of_byte_like auto &latin1_input,  | 
1716  |  |     std::span<char32_t> utf32_output) noexcept { | 
1717  |  |   return convert_latin1_to_utf32(  | 
1718  |  |       reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),  | 
1719  |  |       utf32_output.data());  | 
1720  |  | }  | 
1721  |  |   #endif // SIMDUTF_SPAN  | 
1722  |  | #endif   // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
1723  |  |  | 
1724  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1725  |  | /**  | 
1726  |  |  * Convert possibly broken UTF-8 string into latin1 string.  | 
1727  |  |  *  | 
1728  |  |  * During the conversion also validation of the input string is done.  | 
1729  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1730  |  |  *  | 
1731  |  |  * @param input         the UTF-8 string to convert  | 
1732  |  |  * @param length        the length of the string in bytes  | 
1733  |  |  * @param latin1_output  the pointer to buffer that can hold conversion result  | 
1734  |  |  * @return the number of written char; 0 if the input was not valid UTF-8 string  | 
1735  |  |  * or if it cannot be represented as Latin1  | 
1736  |  |  */  | 
1737  |  | simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,  | 
1738  |  |                                                   size_t length,  | 
1739  |  |                                                   char *latin1_output) noexcept;  | 
1740  |  |   #if SIMDUTF_SPAN  | 
1741  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1(  | 
1742  |  |     const detail::input_span_of_byte_like auto &input,  | 
1743  |  |     detail::output_span_of_byte_like auto &&output) noexcept { | 
1744  |  |   return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),  | 
1745  |  |                                 input.size(),  | 
1746  |  |                                 reinterpret_cast<char *>(output.data()));  | 
1747  |  | }  | 
1748  |  |   #endif // SIMDUTF_SPAN  | 
1749  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1750  |  |  | 
1751  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1752  |  | /**  | 
1753  |  |  * Using native endianness, convert possibly broken UTF-8 string into a UTF-16  | 
1754  |  |  * string.  | 
1755  |  |  *  | 
1756  |  |  * During the conversion also validation of the input string is done.  | 
1757  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1758  |  |  *  | 
1759  |  |  * @param input         the UTF-8 string to convert  | 
1760  |  |  * @param length        the length of the string in bytes  | 
1761  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1762  |  |  * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
1763  |  |  * string  | 
1764  |  |  */  | 
1765  |  | simdutf_warn_unused size_t convert_utf8_to_utf16(  | 
1766  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1767  |  |   #if SIMDUTF_SPAN  | 
1768  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1769  |  | convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,  | 
1770  |  |                       std::span<char16_t> output) noexcept { | 
1771  |  |   return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),  | 
1772  |  |                                input.size(), output.data());  | 
1773  |  | }  | 
1774  |  |   #endif // SIMDUTF_SPAN  | 
1775  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1776  |  |  | 
1777  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
1778  |  | /**  | 
1779  |  |  * Using native endianness, convert a Latin1 string into a UTF-16 string.  | 
1780  |  |  *  | 
1781  |  |  * @param input         the Latin1 string to convert  | 
1782  |  |  * @param length        the length of the string in bytes  | 
1783  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1784  |  |  * @return the number of written char16_t.  | 
1785  |  |  */  | 
1786  |  | simdutf_warn_unused size_t convert_latin1_to_utf16(  | 
1787  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1788  |  |   #if SIMDUTF_SPAN  | 
1789  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1790  |  | convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,  | 
1791  |  |                         std::span<char16_t> output) noexcept { | 
1792  |  |   return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),  | 
1793  |  |                                  input.size(), output.data());  | 
1794  |  | }  | 
1795  |  |   #endif // SIMDUTF_SPAN  | 
1796  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
1797  |  |  | 
1798  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1799  |  | /**  | 
1800  |  |  * Convert possibly broken UTF-8 string into UTF-16LE string.  | 
1801  |  |  *  | 
1802  |  |  * During the conversion also validation of the input string is done.  | 
1803  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1804  |  |  *  | 
1805  |  |  * @param input         the UTF-8 string to convert  | 
1806  |  |  * @param length        the length of the string in bytes  | 
1807  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1808  |  |  * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
1809  |  |  * string  | 
1810  |  |  */  | 
1811  |  | simdutf_warn_unused size_t convert_utf8_to_utf16le(  | 
1812  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1813  |  |   #if SIMDUTF_SPAN  | 
1814  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1815  |  | convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,  | 
1816  |  |                         std::span<char16_t> utf16_output) noexcept { | 
1817  |  |   return convert_utf8_to_utf16le(  | 
1818  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1819  |  |       utf16_output.data());  | 
1820  |  | }  | 
1821  |  |   #endif // SIMDUTF_SPAN  | 
1822  |  |  | 
1823  |  | /**  | 
1824  |  |  * Convert possibly broken UTF-8 string into UTF-16BE string.  | 
1825  |  |  *  | 
1826  |  |  * During the conversion also validation of the input string is done.  | 
1827  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1828  |  |  *  | 
1829  |  |  * @param input         the UTF-8 string to convert  | 
1830  |  |  * @param length        the length of the string in bytes  | 
1831  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1832  |  |  * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
1833  |  |  * string  | 
1834  |  |  */  | 
1835  |  | simdutf_warn_unused size_t convert_utf8_to_utf16be(  | 
1836  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1837  |  |   #if SIMDUTF_SPAN  | 
1838  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1839  |  | convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,  | 
1840  |  |                         std::span<char16_t> utf16_output) noexcept { | 
1841  |  |   return convert_utf8_to_utf16be(  | 
1842  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1843  |  |       utf16_output.data());  | 
1844  |  | }  | 
1845  |  |   #endif // SIMDUTF_SPAN  | 
1846  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1847  |  |  | 
1848  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1849  |  | /**  | 
1850  |  |  * Convert possibly broken UTF-8 string into latin1 string with errors.  | 
1851  |  |  * If the string cannot be represented as Latin1, an error  | 
1852  |  |  * code is returned.  | 
1853  |  |  *  | 
1854  |  |  * During the conversion also validation of the input string is done.  | 
1855  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1856  |  |  *  | 
1857  |  |  * @param input         the UTF-8 string to convert  | 
1858  |  |  * @param length        the length of the string in bytes  | 
1859  |  |  * @param latin1_output  the pointer to buffer that can hold conversion result  | 
1860  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1861  |  |  * fields error and count) with an error code and either position of the error  | 
1862  |  |  * (in the input in code units) if any, or the number of code units validated if  | 
1863  |  |  * successful.  | 
1864  |  |  */  | 
1865  |  | simdutf_warn_unused result convert_utf8_to_latin1_with_errors(  | 
1866  |  |     const char *input, size_t length, char *latin1_output) noexcept;  | 
1867  |  |   #if SIMDUTF_SPAN  | 
1868  |  | simdutf_really_inline simdutf_warn_unused result  | 
1869  |  | convert_utf8_to_latin1_with_errors(  | 
1870  |  |     const detail::input_span_of_byte_like auto &utf8_input,  | 
1871  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
1872  |  |   return convert_utf8_to_latin1_with_errors(  | 
1873  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1874  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
1875  |  | }  | 
1876  |  |   #endif // SIMDUTF_SPAN  | 
1877  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
1878  |  |  | 
1879  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1880  |  | /**  | 
1881  |  |  * Using native endianness, convert possibly broken UTF-8 string into UTF-16  | 
1882  |  |  * string and stop on error.  | 
1883  |  |  *  | 
1884  |  |  * During the conversion also validation of the input string is done.  | 
1885  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1886  |  |  *  | 
1887  |  |  * @param input         the UTF-8 string to convert  | 
1888  |  |  * @param length        the length of the string in bytes  | 
1889  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1890  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1891  |  |  * fields error and count) with an error code and either position of the error  | 
1892  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
1893  |  |  * successful.  | 
1894  |  |  */  | 
1895  |  | simdutf_warn_unused result convert_utf8_to_utf16_with_errors(  | 
1896  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1897  |  |   #if SIMDUTF_SPAN  | 
1898  |  | simdutf_really_inline simdutf_warn_unused result  | 
1899  |  | convert_utf8_to_utf16_with_errors(  | 
1900  |  |     const detail::input_span_of_byte_like auto &utf8_input,  | 
1901  |  |     std::span<char16_t> utf16_output) noexcept { | 
1902  |  |   return convert_utf8_to_utf16_with_errors(  | 
1903  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1904  |  |       utf16_output.data());  | 
1905  |  | }  | 
1906  |  |   #endif // SIMDUTF_SPAN  | 
1907  |  |  | 
1908  |  | /**  | 
1909  |  |  * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.  | 
1910  |  |  *  | 
1911  |  |  * During the conversion also validation of the input string is done.  | 
1912  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1913  |  |  *  | 
1914  |  |  * @param input         the UTF-8 string to convert  | 
1915  |  |  * @param length        the length of the string in bytes  | 
1916  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1917  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1918  |  |  * fields error and count) with an error code and either position of the error  | 
1919  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
1920  |  |  * successful.  | 
1921  |  |  */  | 
1922  |  | simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(  | 
1923  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1924  |  |   #if SIMDUTF_SPAN  | 
1925  |  | simdutf_really_inline simdutf_warn_unused result  | 
1926  |  | convert_utf8_to_utf16le_with_errors(  | 
1927  |  |     const detail::input_span_of_byte_like auto &utf8_input,  | 
1928  |  |     std::span<char16_t> utf16_output) noexcept { | 
1929  |  |   return convert_utf8_to_utf16le_with_errors(  | 
1930  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1931  |  |       utf16_output.data());  | 
1932  |  | }  | 
1933  |  |   #endif // SIMDUTF_SPAN  | 
1934  |  |  | 
1935  |  | /**  | 
1936  |  |  * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.  | 
1937  |  |  *  | 
1938  |  |  * During the conversion also validation of the input string is done.  | 
1939  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1940  |  |  *  | 
1941  |  |  * @param input         the UTF-8 string to convert  | 
1942  |  |  * @param length        the length of the string in bytes  | 
1943  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
1944  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1945  |  |  * fields error and count) with an error code and either position of the error  | 
1946  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
1947  |  |  * successful.  | 
1948  |  |  */  | 
1949  |  | simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(  | 
1950  |  |     const char *input, size_t length, char16_t *utf16_output) noexcept;  | 
1951  |  |   #if SIMDUTF_SPAN  | 
1952  |  | simdutf_really_inline simdutf_warn_unused result  | 
1953  |  | convert_utf8_to_utf16be_with_errors(  | 
1954  |  |     const detail::input_span_of_byte_like auto &utf8_input,  | 
1955  |  |     std::span<char16_t> utf16_output) noexcept { | 
1956  |  |   return convert_utf8_to_utf16be_with_errors(  | 
1957  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1958  |  |       utf16_output.data());  | 
1959  |  | }  | 
1960  |  |   #endif // SIMDUTF_SPAN  | 
1961  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
1962  |  |  | 
1963  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
1964  |  | /**  | 
1965  |  |  * Convert possibly broken UTF-8 string into UTF-32 string.  | 
1966  |  |  *  | 
1967  |  |  * During the conversion also validation of the input string is done.  | 
1968  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1969  |  |  *  | 
1970  |  |  * @param input         the UTF-8 string to convert  | 
1971  |  |  * @param length        the length of the string in bytes  | 
1972  |  |  * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
1973  |  |  * @return the number of written char32_t; 0 if the input was not valid UTF-8  | 
1974  |  |  * string  | 
1975  |  |  */  | 
1976  |  | simdutf_warn_unused size_t convert_utf8_to_utf32(  | 
1977  |  |     const char *input, size_t length, char32_t *utf32_output) noexcept;  | 
1978  |  |   #if SIMDUTF_SPAN  | 
1979  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
1980  |  | convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,  | 
1981  |  |                       std::span<char32_t> utf32_output) noexcept { | 
1982  |  |   return convert_utf8_to_utf32(  | 
1983  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
1984  |  |       utf32_output.data());  | 
1985  |  | }  | 
1986  |  |   #endif // SIMDUTF_SPAN  | 
1987  |  |  | 
1988  |  | /**  | 
1989  |  |  * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.  | 
1990  |  |  *  | 
1991  |  |  * During the conversion also validation of the input string is done.  | 
1992  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
1993  |  |  *  | 
1994  |  |  * @param input         the UTF-8 string to convert  | 
1995  |  |  * @param length        the length of the string in bytes  | 
1996  |  |  * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
1997  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
1998  |  |  * fields error and count) with an error code and either position of the error  | 
1999  |  |  * (in the input in code units) if any, or the number of char32_t written if  | 
2000  |  |  * successful.  | 
2001  |  |  */  | 
2002  |  | simdutf_warn_unused result convert_utf8_to_utf32_with_errors(  | 
2003  |  |     const char *input, size_t length, char32_t *utf32_output) noexcept;  | 
2004  |  |   #if SIMDUTF_SPAN  | 
2005  |  | simdutf_really_inline simdutf_warn_unused result  | 
2006  |  | convert_utf8_to_utf32_with_errors(  | 
2007  |  |     const detail::input_span_of_byte_like auto &utf8_input,  | 
2008  |  |     std::span<char32_t> utf32_output) noexcept { | 
2009  |  |   return convert_utf8_to_utf32_with_errors(  | 
2010  |  |       reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),  | 
2011  |  |       utf32_output.data());  | 
2012  |  | }  | 
2013  |  |   #endif // SIMDUTF_SPAN  | 
2014  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
2015  |  |  | 
2016  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
2017  |  | /**  | 
2018  |  |  * Convert valid UTF-8 string into latin1 string.  | 
2019  |  |  *  | 
2020  |  |  * This function assumes that the input string is valid UTF-8 and that it can be  | 
2021  |  |  * represented as Latin1. If you violate this assumption, the result is  | 
2022  |  |  * implementation defined and may include system-dependent behavior such as  | 
2023  |  |  * crashes.  | 
2024  |  |  *  | 
2025  |  |  * This function is for expert users only and not part of our public API. Use  | 
2026  |  |  * convert_utf8_to_latin1 instead. The function may be removed from the library  | 
2027  |  |  * in the future.  | 
2028  |  |  *  | 
2029  |  |  * This function is not BOM-aware.  | 
2030  |  |  *  | 
2031  |  |  * @param input         the UTF-8 string to convert  | 
2032  |  |  * @param length        the length of the string in bytes  | 
2033  |  |  * @param latin1_output  the pointer to buffer that can hold conversion result  | 
2034  |  |  * @return the number of written char; 0 if the input was not valid UTF-8 string  | 
2035  |  |  */  | 
2036  |  | simdutf_warn_unused size_t convert_valid_utf8_to_latin1(  | 
2037  |  |     const char *input, size_t length, char *latin1_output) noexcept;  | 
2038  |  |   #if SIMDUTF_SPAN  | 
2039  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1(  | 
2040  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input,  | 
2041  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2042  |  |   return convert_valid_utf8_to_latin1(  | 
2043  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2044  |  |       valid_utf8_input.size(), latin1_output.data());  | 
2045  |  | }  | 
2046  |  |   #endif // SIMDUTF_SPAN  | 
2047  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
2048  |  |  | 
2049  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2050  |  | /**  | 
2051  |  |  * Using native endianness, convert valid UTF-8 string into a UTF-16 string.  | 
2052  |  |  *  | 
2053  |  |  * This function assumes that the input string is valid UTF-8.  | 
2054  |  |  *  | 
2055  |  |  * @param input         the UTF-8 string to convert  | 
2056  |  |  * @param length        the length of the string in bytes  | 
2057  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
2058  |  |  * @return the number of written char16_t  | 
2059  |  |  */  | 
2060  |  | simdutf_warn_unused size_t convert_valid_utf8_to_utf16(  | 
2061  |  |     const char *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
2062  |  |   #if SIMDUTF_SPAN  | 
2063  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16(  | 
2064  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input,  | 
2065  |  |     std::span<char16_t> utf16_output) noexcept { | 
2066  |  |   return convert_valid_utf8_to_utf16(  | 
2067  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2068  |  |       valid_utf8_input.size(), utf16_output.data());  | 
2069  |  | }  | 
2070  |  |   #endif // SIMDUTF_SPAN  | 
2071  |  |  | 
2072  |  | /**  | 
2073  |  |  * Convert valid UTF-8 string into UTF-16LE string.  | 
2074  |  |  *  | 
2075  |  |  * This function assumes that the input string is valid UTF-8.  | 
2076  |  |  *  | 
2077  |  |  * @param input         the UTF-8 string to convert  | 
2078  |  |  * @param length        the length of the string in bytes  | 
2079  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
2080  |  |  * @return the number of written char16_t  | 
2081  |  |  */  | 
2082  |  | simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(  | 
2083  |  |     const char *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
2084  |  |   #if SIMDUTF_SPAN  | 
2085  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(  | 
2086  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input,  | 
2087  |  |     std::span<char16_t> utf16_output) noexcept { | 
2088  |  |   return convert_valid_utf8_to_utf16le(  | 
2089  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2090  |  |       valid_utf8_input.size(), utf16_output.data());  | 
2091  |  | }  | 
2092  |  |   #endif // SIMDUTF_SPAN  | 
2093  |  |  | 
2094  |  | /**  | 
2095  |  |  * Convert valid UTF-8 string into UTF-16BE string.  | 
2096  |  |  *  | 
2097  |  |  * This function assumes that the input string is valid UTF-8.  | 
2098  |  |  *  | 
2099  |  |  * @param input         the UTF-8 string to convert  | 
2100  |  |  * @param length        the length of the string in bytes  | 
2101  |  |  * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
2102  |  |  * @return the number of written char16_t  | 
2103  |  |  */  | 
2104  |  | simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(  | 
2105  |  |     const char *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
2106  |  |   #if SIMDUTF_SPAN  | 
2107  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(  | 
2108  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input,  | 
2109  |  |     std::span<char16_t> utf16_output) noexcept { | 
2110  |  |   return convert_valid_utf8_to_utf16be(  | 
2111  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2112  |  |       valid_utf8_input.size(), utf16_output.data());  | 
2113  |  | }  | 
2114  |  |   #endif // SIMDUTF_SPAN  | 
2115  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2116  |  |  | 
2117  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
2118  |  | /**  | 
2119  |  |  * Convert valid UTF-8 string into UTF-32 string.  | 
2120  |  |  *  | 
2121  |  |  * This function assumes that the input string is valid UTF-8.  | 
2122  |  |  *  | 
2123  |  |  * @param input         the UTF-8 string to convert  | 
2124  |  |  * @param length        the length of the string in bytes  | 
2125  |  |  * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
2126  |  |  * @return the number of written char32_t  | 
2127  |  |  */  | 
2128  |  | simdutf_warn_unused size_t convert_valid_utf8_to_utf32(  | 
2129  |  |     const char *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2130  |  |   #if SIMDUTF_SPAN  | 
2131  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32(  | 
2132  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input,  | 
2133  |  |     std::span<char32_t> utf32_output) noexcept { | 
2134  |  |   return convert_valid_utf8_to_utf32(  | 
2135  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2136  |  |       valid_utf8_input.size(), utf32_output.data());  | 
2137  |  | }  | 
2138  |  |   #endif // SIMDUTF_SPAN  | 
2139  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
2140  |  |  | 
2141  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
2142  |  | /**  | 
2143  |  |  * Return the number of bytes that this Latin1 string would require in UTF-8  | 
2144  |  |  * format.  | 
2145  |  |  *  | 
2146  |  |  * @param input         the Latin1 string to convert  | 
2147  |  |  * @param length        the length of the string bytes  | 
2148  |  |  * @return the number of bytes required to encode the Latin1 string as UTF-8  | 
2149  |  |  */  | 
2150  |  | simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,  | 
2151  |  |                                                    size_t length) noexcept;  | 
2152  |  |   #if SIMDUTF_SPAN  | 
2153  |  | simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1(  | 
2154  |  |     const detail::input_span_of_byte_like auto &latin1_input) noexcept { | 
2155  |  |   return utf8_length_from_latin1(  | 
2156  |  |       reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size());  | 
2157  |  | }  | 
2158  |  |   #endif // SIMDUTF_SPAN  | 
2159  |  |  | 
2160  |  | /**  | 
2161  |  |  * Compute the number of bytes that this UTF-8 string would require in Latin1  | 
2162  |  |  * format.  | 
2163  |  |  *  | 
2164  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
2165  |  |  * UTF-8 strings but in such cases the result is implementation defined.  | 
2166  |  |  *  | 
2167  |  |  * This function is not BOM-aware.  | 
2168  |  |  *  | 
2169  |  |  * @param input         the UTF-8 string to convert  | 
2170  |  |  * @param length        the length of the string in byte  | 
2171  |  |  * @return the number of bytes required to encode the UTF-8 string as Latin1  | 
2172  |  |  */  | 
2173  |  | simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,  | 
2174  |  |                                                    size_t length) noexcept;  | 
2175  |  |   #if SIMDUTF_SPAN  | 
2176  |  | simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8(  | 
2177  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { | 
2178  |  |   return latin1_length_from_utf8(  | 
2179  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2180  |  |       valid_utf8_input.size());  | 
2181  |  | }  | 
2182  |  |   #endif // SIMDUTF_SPAN  | 
2183  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
2184  |  |  | 
2185  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2186  |  | /**  | 
2187  |  |  * Compute the number of 2-byte code units that this UTF-8 string would require  | 
2188  |  |  * in UTF-16LE format.  | 
2189  |  |  *  | 
2190  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
2191  |  |  * UTF-8 strings but in such cases the result is implementation defined.  | 
2192  |  |  *  | 
2193  |  |  * This function is not BOM-aware.  | 
2194  |  |  *  | 
2195  |  |  * @param input         the UTF-8 string to process  | 
2196  |  |  * @param length        the length of the string in bytes  | 
2197  |  |  * @return the number of char16_t code units required to encode the UTF-8 string  | 
2198  |  |  * as UTF-16LE  | 
2199  |  |  */  | 
2200  |  | simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,  | 
2201  |  |                                                   size_t length) noexcept;  | 
2202  |  |   #if SIMDUTF_SPAN  | 
2203  |  | simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8(  | 
2204  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { | 
2205  |  |   return utf16_length_from_utf8(  | 
2206  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2207  |  |       valid_utf8_input.size());  | 
2208  |  | }  | 
2209  |  |   #endif // SIMDUTF_SPAN  | 
2210  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2211  |  |  | 
2212  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
2213  |  | /**  | 
2214  |  |  * Compute the number of 4-byte code units that this UTF-8 string would require  | 
2215  |  |  * in UTF-32 format.  | 
2216  |  |  *  | 
2217  |  |  * This function is equivalent to count_utf8  | 
2218  |  |  *  | 
2219  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
2220  |  |  * UTF-8 strings but in such cases the result is implementation defined.  | 
2221  |  |  *  | 
2222  |  |  * This function is not BOM-aware.  | 
2223  |  |  *  | 
2224  |  |  * @param input         the UTF-8 string to process  | 
2225  |  |  * @param length        the length of the string in bytes  | 
2226  |  |  * @return the number of char32_t code units required to encode the UTF-8 string  | 
2227  |  |  * as UTF-32  | 
2228  |  |  */  | 
2229  |  | simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,  | 
2230  |  |                                                   size_t length) noexcept;  | 
2231  |  |   #if SIMDUTF_SPAN  | 
2232  |  | simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8(  | 
2233  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { | 
2234  |  |   return utf32_length_from_utf8(  | 
2235  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
2236  |  |       valid_utf8_input.size());  | 
2237  |  | }  | 
2238  |  |   #endif // SIMDUTF_SPAN  | 
2239  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
2240  |  |  | 
2241  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2242  |  | /**  | 
2243  |  |  * Using native endianness, convert possibly broken UTF-16 string into UTF-8  | 
2244  |  |  * string.  | 
2245  |  |  *  | 
2246  |  |  * During the conversion also validation of the input string is done.  | 
2247  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2248  |  |  *  | 
2249  |  |  * This function is not BOM-aware.  | 
2250  |  |  *  | 
2251  |  |  * @param input         the UTF-16 string to convert  | 
2252  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2253  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2254  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2255  |  |  * string  | 
2256  |  |  */  | 
2257  |  | simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,  | 
2258  |  |                                                  size_t length,  | 
2259  |  |                                                  char *utf8_buffer) noexcept;  | 
2260  |  |   #if SIMDUTF_SPAN  | 
2261  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8(  | 
2262  |  |     std::span<const char16_t> utf16_input,  | 
2263  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2264  |  |   return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),  | 
2265  |  |                                reinterpret_cast<char *>(utf8_output.data()));  | 
2266  |  | }  | 
2267  |  |   #endif // SIMDUTF_SPAN  | 
2268  |  |  | 
2269  |  | /**  | 
2270  |  |  * Using native endianness, convert possibly broken UTF-16 string into UTF-8  | 
2271  |  |  * string with output limit.  | 
2272  |  |  *  | 
2273  |  |  * We write as many characters as possible into the output buffer,  | 
2274  |  |  *  | 
2275  |  |  * During the conversion also validation of the input string is done.  | 
2276  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2277  |  |  *  | 
2278  |  |  * This function is not BOM-aware.  | 
2279  |  |  *  | 
2280  |  |  *  | 
2281  |  |  * @param input         the UTF-16 string to convert  | 
2282  |  |  * @param length        the length of the string in 16-bit code units (char16_t)  | 
2283  |  |  * @param utf8_output   the pointer to buffer that can hold conversion result  | 
2284  |  |  * @param utf8_len      the maximum output length  | 
2285  |  |  * @return the number of written char; 0 if conversion is not possible  | 
2286  |  |  */  | 
2287  |  | simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input,  | 
2288  |  |                                                       size_t length,  | 
2289  |  |                                                       char *utf8_output,  | 
2290  |  |                                                       size_t utf8_len) noexcept;  | 
2291  |  |   #if SIMDUTF_SPAN  | 
2292  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8_safe(  | 
2293  |  |     std::span<const char16_t> utf16_input,  | 
2294  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2295  |  |   // implementation note: outputspan is a forwarding ref to avoid copying and  | 
2296  |  |   // allow both lvalues and rvalues. std::span can be copied without problems,  | 
2297  |  |   // but std::vector should not, and this function should accept both. it will  | 
2298  |  |   // allow using an owning rvalue ref (example: passing a temporary std::string)  | 
2299  |  |   // as output, but the user will quickly find out that he has no way of getting  | 
2300  |  |   // the data out of the object in that case.  | 
2301  |  |   return convert_utf16_to_utf8_safe(  | 
2302  |  |       utf16_input.data(), utf16_input.size(),  | 
2303  |  |       reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());  | 
2304  |  | }  | 
2305  |  |   #endif // SIMDUTF_SPAN  | 
2306  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2307  |  |  | 
2308  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2309  |  | /**  | 
2310  |  |  * Using native endianness, convert possibly broken UTF-16 string into Latin1  | 
2311  |  |  * string.  | 
2312  |  |  *  | 
2313  |  |  * During the conversion also validation of the input string is done.  | 
2314  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2315  |  |  *  | 
2316  |  |  * This function is not BOM-aware.  | 
2317  |  |  *  | 
2318  |  |  * @param input         the UTF-16 string to convert  | 
2319  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2320  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2321  |  |  * @return number of written code units; 0 if input is not a valid UTF-16 string  | 
2322  |  |  * or if it cannot be represented as Latin1  | 
2323  |  |  */  | 
2324  |  | simdutf_warn_unused size_t convert_utf16_to_latin1(  | 
2325  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2326  |  |   #if SIMDUTF_SPAN  | 
2327  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1(  | 
2328  |  |     std::span<const char16_t> utf16_input,  | 
2329  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2330  |  |   return convert_utf16_to_latin1(  | 
2331  |  |       utf16_input.data(), utf16_input.size(),  | 
2332  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2333  |  | }  | 
2334  |  |   #endif // SIMDUTF_SPAN  | 
2335  |  |  | 
2336  |  | /**  | 
2337  |  |  * Convert possibly broken UTF-16LE string into Latin1 string.  | 
2338  |  |  * If the string cannot be represented as Latin1, an error  | 
2339  |  |  * is returned.  | 
2340  |  |  *  | 
2341  |  |  * During the conversion also validation of the input string is done.  | 
2342  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2343  |  |  *  | 
2344  |  |  * This function is not BOM-aware.  | 
2345  |  |  *  | 
2346  |  |  * @param input         the UTF-16LE string to convert  | 
2347  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2348  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2349  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2350  |  |  * string or if it cannot be represented as Latin1  | 
2351  |  |  */  | 
2352  |  | simdutf_warn_unused size_t convert_utf16le_to_latin1(  | 
2353  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2354  |  |   #if SIMDUTF_SPAN  | 
2355  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1(  | 
2356  |  |     std::span<const char16_t> utf16_input,  | 
2357  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2358  |  |   return convert_utf16le_to_latin1(  | 
2359  |  |       utf16_input.data(), utf16_input.size(),  | 
2360  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2361  |  | }  | 
2362  |  |   #endif // SIMDUTF_SPAN  | 
2363  |  |  | 
2364  |  | /**  | 
2365  |  |  * Convert possibly broken UTF-16BE string into Latin1 string.  | 
2366  |  |  *  | 
2367  |  |  * During the conversion also validation of the input string is done.  | 
2368  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2369  |  |  *  | 
2370  |  |  * This function is not BOM-aware.  | 
2371  |  |  *  | 
2372  |  |  * @param input         the UTF-16BE string to convert  | 
2373  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2374  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2375  |  |  * @return number of written code units; 0 if input is not a valid UTF-16BE  | 
2376  |  |  * string or if it cannot be represented as Latin1  | 
2377  |  |  */  | 
2378  |  | simdutf_warn_unused size_t convert_utf16be_to_latin1(  | 
2379  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2380  |  |   #if SIMDUTF_SPAN  | 
2381  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1(  | 
2382  |  |     std::span<const char16_t> utf16_input,  | 
2383  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2384  |  |   return convert_utf16be_to_latin1(  | 
2385  |  |       utf16_input.data(), utf16_input.size(),  | 
2386  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2387  |  | }  | 
2388  |  |   #endif // SIMDUTF_SPAN  | 
2389  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2390  |  |  | 
2391  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2392  |  | /**  | 
2393  |  |  * Convert possibly broken UTF-16LE string into UTF-8 string.  | 
2394  |  |  *  | 
2395  |  |  * During the conversion also validation of the input string is done.  | 
2396  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2397  |  |  *  | 
2398  |  |  * This function is not BOM-aware.  | 
2399  |  |  *  | 
2400  |  |  * @param input         the UTF-16LE string to convert  | 
2401  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2402  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2403  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2404  |  |  * string  | 
2405  |  |  */  | 
2406  |  | simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,  | 
2407  |  |                                                    size_t length,  | 
2408  |  |                                                    char *utf8_buffer) noexcept;  | 
2409  |  |   #if SIMDUTF_SPAN  | 
2410  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8(  | 
2411  |  |     std::span<const char16_t> utf16_input,  | 
2412  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2413  |  |   return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(),  | 
2414  |  |                                  reinterpret_cast<char *>(utf8_output.data()));  | 
2415  |  | }  | 
2416  |  |   #endif // SIMDUTF_SPAN  | 
2417  |  |  | 
2418  |  | /**  | 
2419  |  |  * Convert possibly broken UTF-16BE string into UTF-8 string.  | 
2420  |  |  *  | 
2421  |  |  * During the conversion also validation of the input string is done.  | 
2422  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2423  |  |  *  | 
2424  |  |  * This function is not BOM-aware.  | 
2425  |  |  *  | 
2426  |  |  * @param input         the UTF-16BE string to convert  | 
2427  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2428  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2429  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2430  |  |  * string  | 
2431  |  |  */  | 
2432  |  | simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,  | 
2433  |  |                                                    size_t length,  | 
2434  |  |                                                    char *utf8_buffer) noexcept;  | 
2435  |  |   #if SIMDUTF_SPAN  | 
2436  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8(  | 
2437  |  |     std::span<const char16_t> utf16_input,  | 
2438  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2439  |  |   return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(),  | 
2440  |  |                                  reinterpret_cast<char *>(utf8_output.data()));  | 
2441  |  | }  | 
2442  |  |   #endif // SIMDUTF_SPAN  | 
2443  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2444  |  |  | 
2445  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2446  |  | /**  | 
2447  |  |  * Using native endianness, convert possibly broken UTF-16 string into Latin1  | 
2448  |  |  * string.  | 
2449  |  |  *  | 
2450  |  |  * During the conversion also validation of the input string is done.  | 
2451  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2452  |  |  * This function is not BOM-aware.  | 
2453  |  |  *  | 
2454  |  |  * @param input         the UTF-16 string to convert  | 
2455  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2456  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2457  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2458  |  |  * fields error and count) with an error code and either position of the error  | 
2459  |  |  * (in the input in code units) if any, or the number of char written if  | 
2460  |  |  * successful.  | 
2461  |  |  */  | 
2462  |  | simdutf_warn_unused result convert_utf16_to_latin1_with_errors(  | 
2463  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2464  |  |   #if SIMDUTF_SPAN  | 
2465  |  | simdutf_really_inline simdutf_warn_unused result  | 
2466  |  | convert_utf16_to_latin1_with_errors(  | 
2467  |  |     std::span<const char16_t> utf16_input,  | 
2468  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2469  |  |   return convert_utf16_to_latin1_with_errors(  | 
2470  |  |       utf16_input.data(), utf16_input.size(),  | 
2471  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2472  |  | }  | 
2473  |  |   #endif // SIMDUTF_SPAN  | 
2474  |  |  | 
2475  |  | /**  | 
2476  |  |  * Convert possibly broken UTF-16LE string into Latin1 string.  | 
2477  |  |  *  | 
2478  |  |  * During the conversion also validation of the input string is done.  | 
2479  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2480  |  |  * This function is not BOM-aware.  | 
2481  |  |  *  | 
2482  |  |  * @param input         the UTF-16LE string to convert  | 
2483  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2484  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2485  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2486  |  |  * fields error and count) with an error code and either position of the error  | 
2487  |  |  * (in the input in code units) if any, or the number of char written if  | 
2488  |  |  * successful.  | 
2489  |  |  */  | 
2490  |  | simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(  | 
2491  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2492  |  |   #if SIMDUTF_SPAN  | 
2493  |  | simdutf_really_inline simdutf_warn_unused result  | 
2494  |  | convert_utf16le_to_latin1_with_errors(  | 
2495  |  |     std::span<const char16_t> utf16_input,  | 
2496  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2497  |  |   return convert_utf16le_to_latin1_with_errors(  | 
2498  |  |       utf16_input.data(), utf16_input.size(),  | 
2499  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2500  |  | }  | 
2501  |  |   #endif // SIMDUTF_SPAN  | 
2502  |  |  | 
2503  |  | /**  | 
2504  |  |  * Convert possibly broken UTF-16BE string into Latin1 string.  | 
2505  |  |  * If the string cannot be represented as Latin1, an error  | 
2506  |  |  * is returned.  | 
2507  |  |  *  | 
2508  |  |  * During the conversion also validation of the input string is done.  | 
2509  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2510  |  |  * This function is not BOM-aware.  | 
2511  |  |  *  | 
2512  |  |  * @param input         the UTF-16BE string to convert  | 
2513  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2514  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2515  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2516  |  |  * fields error and count) with an error code and either position of the error  | 
2517  |  |  * (in the input in code units) if any, or the number of char written if  | 
2518  |  |  * successful.  | 
2519  |  |  */  | 
2520  |  | simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(  | 
2521  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2522  |  |   #if SIMDUTF_SPAN  | 
2523  |  | simdutf_really_inline simdutf_warn_unused result  | 
2524  |  | convert_utf16be_to_latin1_with_errors(  | 
2525  |  |     std::span<const char16_t> utf16_input,  | 
2526  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2527  |  |   return convert_utf16be_to_latin1_with_errors(  | 
2528  |  |       utf16_input.data(), utf16_input.size(),  | 
2529  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2530  |  | }  | 
2531  |  |   #endif // SIMDUTF_SPAN  | 
2532  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2533  |  |  | 
2534  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2535  |  | /**  | 
2536  |  |  * Using native endianness, convert possibly broken UTF-16 string into UTF-8  | 
2537  |  |  * string and stop on error.  | 
2538  |  |  *  | 
2539  |  |  * During the conversion also validation of the input string is done.  | 
2540  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2541  |  |  *  | 
2542  |  |  * This function is not BOM-aware.  | 
2543  |  |  *  | 
2544  |  |  * @param input         the UTF-16 string to convert  | 
2545  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2546  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2547  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2548  |  |  * fields error and count) with an error code and either position of the error  | 
2549  |  |  * (in the input in code units) if any, or the number of char written if  | 
2550  |  |  * successful.  | 
2551  |  |  */  | 
2552  |  | simdutf_warn_unused result convert_utf16_to_utf8_with_errors(  | 
2553  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2554  |  |   #if SIMDUTF_SPAN  | 
2555  |  | simdutf_really_inline simdutf_warn_unused result  | 
2556  |  | convert_utf16_to_utf8_with_errors(  | 
2557  |  |     std::span<const char16_t> utf16_input,  | 
2558  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2559  |  |   return convert_utf16_to_utf8_with_errors(  | 
2560  |  |       utf16_input.data(), utf16_input.size(),  | 
2561  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2562  |  | }  | 
2563  |  |   #endif // SIMDUTF_SPAN  | 
2564  |  |  | 
2565  |  | /**  | 
2566  |  |  * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.  | 
2567  |  |  *  | 
2568  |  |  * During the conversion also validation of the input string is done.  | 
2569  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2570  |  |  *  | 
2571  |  |  * This function is not BOM-aware.  | 
2572  |  |  *  | 
2573  |  |  * @param input         the UTF-16LE string to convert  | 
2574  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2575  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2576  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2577  |  |  * fields error and count) with an error code and either position of the error  | 
2578  |  |  * (in the input in code units) if any, or the number of char written if  | 
2579  |  |  * successful.  | 
2580  |  |  */  | 
2581  |  | simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(  | 
2582  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2583  |  |   #if SIMDUTF_SPAN  | 
2584  |  | simdutf_really_inline simdutf_warn_unused result  | 
2585  |  | convert_utf16le_to_utf8_with_errors(  | 
2586  |  |     std::span<const char16_t> utf16_input,  | 
2587  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2588  |  |   return convert_utf16le_to_utf8_with_errors(  | 
2589  |  |       utf16_input.data(), utf16_input.size(),  | 
2590  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2591  |  | }  | 
2592  |  |   #endif // SIMDUTF_SPAN  | 
2593  |  |  | 
2594  |  | /**  | 
2595  |  |  * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.  | 
2596  |  |  *  | 
2597  |  |  * During the conversion also validation of the input string is done.  | 
2598  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2599  |  |  *  | 
2600  |  |  * This function is not BOM-aware.  | 
2601  |  |  *  | 
2602  |  |  * @param input         the UTF-16BE string to convert  | 
2603  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2604  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
2605  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2606  |  |  * fields error and count) with an error code and either position of the error  | 
2607  |  |  * (in the input in code units) if any, or the number of char written if  | 
2608  |  |  * successful.  | 
2609  |  |  */  | 
2610  |  | simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(  | 
2611  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2612  |  |   #if SIMDUTF_SPAN  | 
2613  |  | simdutf_really_inline simdutf_warn_unused result  | 
2614  |  | convert_utf16be_to_utf8_with_errors(  | 
2615  |  |     std::span<const char16_t> utf16_input,  | 
2616  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2617  |  |   return convert_utf16be_to_utf8_with_errors(  | 
2618  |  |       utf16_input.data(), utf16_input.size(),  | 
2619  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2620  |  | }  | 
2621  |  |   #endif // SIMDUTF_SPAN  | 
2622  |  |  | 
2623  |  | /**  | 
2624  |  |  * Using native endianness, convert valid UTF-16 string into UTF-8 string.  | 
2625  |  |  *  | 
2626  |  |  * This function assumes that the input string is valid UTF-16LE.  | 
2627  |  |  *  | 
2628  |  |  * This function is not BOM-aware.  | 
2629  |  |  *  | 
2630  |  |  * @param input         the UTF-16 string to convert  | 
2631  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2632  |  |  * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
2633  |  |  * result  | 
2634  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2635  |  |  */  | 
2636  |  | simdutf_warn_unused size_t convert_valid_utf16_to_utf8(  | 
2637  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2638  |  |   #if SIMDUTF_SPAN  | 
2639  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8(  | 
2640  |  |     std::span<const char16_t> valid_utf16_input,  | 
2641  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2642  |  |   return convert_valid_utf16_to_utf8(  | 
2643  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2644  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2645  |  | }  | 
2646  |  |   #endif // SIMDUTF_SPAN  | 
2647  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2648  |  |  | 
2649  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2650  |  | /**  | 
2651  |  |  * Using native endianness, convert UTF-16 string into Latin1 string.  | 
2652  |  |  *  | 
2653  |  |  * This function assumes that the input string is valid UTF-16 and that it can  | 
2654  |  |  * be represented as Latin1. If you violate this assumption, the result is  | 
2655  |  |  * implementation defined and may include system-dependent behavior such as  | 
2656  |  |  * crashes.  | 
2657  |  |  *  | 
2658  |  |  * This function is for expert users only and not part of our public API. Use  | 
2659  |  |  * convert_utf16_to_latin1 instead. The function may be removed from the library  | 
2660  |  |  * in the future.  | 
2661  |  |  *  | 
2662  |  |  * This function is not BOM-aware.  | 
2663  |  |  *  | 
2664  |  |  * @param input         the UTF-16 string to convert  | 
2665  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2666  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2667  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2668  |  |  */  | 
2669  |  | simdutf_warn_unused size_t convert_valid_utf16_to_latin1(  | 
2670  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2671  |  |   #if SIMDUTF_SPAN  | 
2672  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1(  | 
2673  |  |     std::span<const char16_t> valid_utf16_input,  | 
2674  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2675  |  |   return convert_valid_utf16_to_latin1(  | 
2676  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2677  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2678  |  | }  | 
2679  |  |   #endif // SIMDUTF_SPAN  | 
2680  |  |  | 
2681  |  | /**  | 
2682  |  |  * Convert valid UTF-16LE string into Latin1 string.  | 
2683  |  |  *  | 
2684  |  |  * This function assumes that the input string is valid UTF-16LE and that it can  | 
2685  |  |  * be represented as Latin1. If you violate this assumption, the result is  | 
2686  |  |  * implementation defined and may include system-dependent behavior such as  | 
2687  |  |  * crashes.  | 
2688  |  |  *  | 
2689  |  |  * This function is for expert users only and not part of our public API. Use  | 
2690  |  |  * convert_utf16le_to_latin1 instead. The function may be removed from the  | 
2691  |  |  * library in the future.  | 
2692  |  |  *  | 
2693  |  |  * This function is not BOM-aware.  | 
2694  |  |  *  | 
2695  |  |  * @param input         the UTF-16LE string to convert  | 
2696  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2697  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2698  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2699  |  |  */  | 
2700  |  | simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(  | 
2701  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2702  |  |   #if SIMDUTF_SPAN  | 
2703  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2704  |  | convert_valid_utf16le_to_latin1(  | 
2705  |  |     std::span<const char16_t> valid_utf16_input,  | 
2706  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2707  |  |   return convert_valid_utf16le_to_latin1(  | 
2708  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2709  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2710  |  | }  | 
2711  |  |   #endif // SIMDUTF_SPAN  | 
2712  |  |  | 
2713  |  | /**  | 
2714  |  |  * Convert valid UTF-16BE string into Latin1 string.  | 
2715  |  |  *  | 
2716  |  |  * This function assumes that the input string is valid UTF-16BE and that it can  | 
2717  |  |  * be represented as Latin1. If you violate this assumption, the result is  | 
2718  |  |  * implementation defined and may include system-dependent behavior such as  | 
2719  |  |  * crashes.  | 
2720  |  |  *  | 
2721  |  |  * This function is for expert users only and not part of our public API. Use  | 
2722  |  |  * convert_utf16be_to_latin1 instead. The function may be removed from the  | 
2723  |  |  * library in the future.  | 
2724  |  |  *  | 
2725  |  |  * This function is not BOM-aware.  | 
2726  |  |  *  | 
2727  |  |  * @param input         the UTF-16BE string to convert  | 
2728  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2729  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
2730  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2731  |  |  */  | 
2732  |  | simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(  | 
2733  |  |     const char16_t *input, size_t length, char *latin1_buffer) noexcept;  | 
2734  |  |   #if SIMDUTF_SPAN  | 
2735  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2736  |  | convert_valid_utf16be_to_latin1(  | 
2737  |  |     std::span<const char16_t> valid_utf16_input,  | 
2738  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
2739  |  |   return convert_valid_utf16be_to_latin1(  | 
2740  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2741  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
2742  |  | }  | 
2743  |  |   #endif // SIMDUTF_SPAN  | 
2744  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
2745  |  |  | 
2746  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2747  |  | /**  | 
2748  |  |  * Convert valid UTF-16LE string into UTF-8 string.  | 
2749  |  |  *  | 
2750  |  |  * This function assumes that the input string is valid UTF-16LE and that it can  | 
2751  |  |  * be represented as Latin1.  | 
2752  |  |  *  | 
2753  |  |  * This function is not BOM-aware.  | 
2754  |  |  *  | 
2755  |  |  * @param input         the UTF-16LE string to convert  | 
2756  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2757  |  |  * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
2758  |  |  * result  | 
2759  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2760  |  |  */  | 
2761  |  | simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(  | 
2762  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2763  |  |   #if SIMDUTF_SPAN  | 
2764  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(  | 
2765  |  |     std::span<const char16_t> valid_utf16_input,  | 
2766  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2767  |  |   return convert_valid_utf16le_to_utf8(  | 
2768  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2769  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2770  |  | }  | 
2771  |  |   #endif // SIMDUTF_SPAN  | 
2772  |  |  | 
2773  |  | /**  | 
2774  |  |  * Convert valid UTF-16BE string into UTF-8 string.  | 
2775  |  |  *  | 
2776  |  |  * This function assumes that the input string is valid UTF-16BE.  | 
2777  |  |  *  | 
2778  |  |  * This function is not BOM-aware.  | 
2779  |  |  *  | 
2780  |  |  * @param input         the UTF-16BE string to convert  | 
2781  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2782  |  |  * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
2783  |  |  * result  | 
2784  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2785  |  |  */  | 
2786  |  | simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(  | 
2787  |  |     const char16_t *input, size_t length, char *utf8_buffer) noexcept;  | 
2788  |  |   #if SIMDUTF_SPAN  | 
2789  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(  | 
2790  |  |     std::span<const char16_t> valid_utf16_input,  | 
2791  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
2792  |  |   return convert_valid_utf16be_to_utf8(  | 
2793  |  |       valid_utf16_input.data(), valid_utf16_input.size(),  | 
2794  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
2795  |  | }  | 
2796  |  |   #endif // SIMDUTF_SPAN  | 
2797  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
2798  |  |  | 
2799  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
2800  |  | /**  | 
2801  |  |  * Using native endianness, convert possibly broken UTF-16 string into UTF-32  | 
2802  |  |  * string.  | 
2803  |  |  *  | 
2804  |  |  * During the conversion also validation of the input string is done.  | 
2805  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2806  |  |  *  | 
2807  |  |  * This function is not BOM-aware.  | 
2808  |  |  *  | 
2809  |  |  * @param input         the UTF-16 string to convert  | 
2810  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2811  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2812  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2813  |  |  * string  | 
2814  |  |  */  | 
2815  |  | simdutf_warn_unused size_t convert_utf16_to_utf32(  | 
2816  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2817  |  |   #if SIMDUTF_SPAN  | 
2818  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2819  |  | convert_utf16_to_utf32(std::span<const char16_t> utf16_input,  | 
2820  | 0  |                        std::span<char32_t> utf32_output) noexcept { | 
2821  | 0  |   return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),  | 
2822  | 0  |                                 utf32_output.data());  | 
2823  | 0  | }  | 
2824  |  |   #endif // SIMDUTF_SPAN  | 
2825  |  |  | 
2826  |  | /**  | 
2827  |  |  * Convert possibly broken UTF-16LE string into UTF-32 string.  | 
2828  |  |  *  | 
2829  |  |  * During the conversion also validation of the input string is done.  | 
2830  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2831  |  |  *  | 
2832  |  |  * This function is not BOM-aware.  | 
2833  |  |  *  | 
2834  |  |  * @param input         the UTF-16LE string to convert  | 
2835  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2836  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2837  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2838  |  |  * string  | 
2839  |  |  */  | 
2840  |  | simdutf_warn_unused size_t convert_utf16le_to_utf32(  | 
2841  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2842  |  |   #if SIMDUTF_SPAN  | 
2843  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2844  |  | convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,  | 
2845  | 0  |                          std::span<char32_t> utf32_output) noexcept { | 
2846  | 0  |   return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),  | 
2847  | 0  |                                   utf32_output.data());  | 
2848  | 0  | }  | 
2849  |  |   #endif // SIMDUTF_SPAN  | 
2850  |  |  | 
2851  |  | /**  | 
2852  |  |  * Convert possibly broken UTF-16BE string into UTF-32 string.  | 
2853  |  |  *  | 
2854  |  |  * During the conversion also validation of the input string is done.  | 
2855  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2856  |  |  *  | 
2857  |  |  * This function is not BOM-aware.  | 
2858  |  |  *  | 
2859  |  |  * @param input         the UTF-16BE string to convert  | 
2860  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2861  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2862  |  |  * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
2863  |  |  * string  | 
2864  |  |  */  | 
2865  |  | simdutf_warn_unused size_t convert_utf16be_to_utf32(  | 
2866  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2867  |  |   #if SIMDUTF_SPAN  | 
2868  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2869  |  | convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,  | 
2870  | 0  |                          std::span<char32_t> utf32_output) noexcept { | 
2871  | 0  |   return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),  | 
2872  | 0  |                                   utf32_output.data());  | 
2873  | 0  | }  | 
2874  |  |   #endif // SIMDUTF_SPAN  | 
2875  |  |  | 
2876  |  | /**  | 
2877  |  |  * Using native endianness, convert possibly broken UTF-16 string into  | 
2878  |  |  * UTF-32 string and stop on error.  | 
2879  |  |  *  | 
2880  |  |  * During the conversion also validation of the input string is done.  | 
2881  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2882  |  |  *  | 
2883  |  |  * This function is not BOM-aware.  | 
2884  |  |  *  | 
2885  |  |  * @param input         the UTF-16 string to convert  | 
2886  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2887  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2888  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2889  |  |  * fields error and count) with an error code and either position of the error  | 
2890  |  |  * (in the input in code units) if any, or the number of char32_t written if  | 
2891  |  |  * successful.  | 
2892  |  |  */  | 
2893  |  | simdutf_warn_unused result convert_utf16_to_utf32_with_errors(  | 
2894  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2895  |  |   #if SIMDUTF_SPAN  | 
2896  |  | simdutf_really_inline simdutf_warn_unused result  | 
2897  |  | convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,  | 
2898  | 0  |                                    std::span<char32_t> utf32_output) noexcept { | 
2899  | 0  |   return convert_utf16_to_utf32_with_errors(  | 
2900  | 0  |       utf16_input.data(), utf16_input.size(), utf32_output.data());  | 
2901  | 0  | }  | 
2902  |  |   #endif // SIMDUTF_SPAN  | 
2903  |  |  | 
2904  |  | /**  | 
2905  |  |  * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.  | 
2906  |  |  *  | 
2907  |  |  * During the conversion also validation of the input string is done.  | 
2908  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2909  |  |  *  | 
2910  |  |  * This function is not BOM-aware.  | 
2911  |  |  *  | 
2912  |  |  * @param input         the UTF-16LE string to convert  | 
2913  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2914  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2915  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2916  |  |  * fields error and count) with an error code and either position of the error  | 
2917  |  |  * (in the input in code units) if any, or the number of char32_t written if  | 
2918  |  |  * successful.  | 
2919  |  |  */  | 
2920  |  | simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(  | 
2921  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2922  |  |   #if SIMDUTF_SPAN  | 
2923  |  | simdutf_really_inline simdutf_warn_unused result  | 
2924  |  | convert_utf16le_to_utf32_with_errors(  | 
2925  |  |     std::span<const char16_t> utf16_input,  | 
2926  | 0  |     std::span<char32_t> utf32_output) noexcept { | 
2927  | 0  |   return convert_utf16le_to_utf32_with_errors(  | 
2928  | 0  |       utf16_input.data(), utf16_input.size(), utf32_output.data());  | 
2929  | 0  | }  | 
2930  |  |   #endif // SIMDUTF_SPAN  | 
2931  |  |  | 
2932  |  | /**  | 
2933  |  |  * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.  | 
2934  |  |  *  | 
2935  |  |  * During the conversion also validation of the input string is done.  | 
2936  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
2937  |  |  *  | 
2938  |  |  * This function is not BOM-aware.  | 
2939  |  |  *  | 
2940  |  |  * @param input         the UTF-16BE string to convert  | 
2941  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2942  |  |  * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
2943  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
2944  |  |  * fields error and count) with an error code and either position of the error  | 
2945  |  |  * (in the input in code units) if any, or the number of char32_t written if  | 
2946  |  |  * successful.  | 
2947  |  |  */  | 
2948  |  | simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(  | 
2949  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2950  |  |   #if SIMDUTF_SPAN  | 
2951  |  | simdutf_really_inline simdutf_warn_unused result  | 
2952  |  | convert_utf16be_to_utf32_with_errors(  | 
2953  |  |     std::span<const char16_t> utf16_input,  | 
2954  | 0  |     std::span<char32_t> utf32_output) noexcept { | 
2955  | 0  |   return convert_utf16be_to_utf32_with_errors(  | 
2956  | 0  |       utf16_input.data(), utf16_input.size(), utf32_output.data());  | 
2957  | 0  | }  | 
2958  |  |   #endif // SIMDUTF_SPAN  | 
2959  |  |  | 
2960  |  | /**  | 
2961  |  |  * Using native endianness, convert valid UTF-16 string into UTF-32 string.  | 
2962  |  |  *  | 
2963  |  |  * This function assumes that the input string is valid UTF-16 (native  | 
2964  |  |  * endianness).  | 
2965  |  |  *  | 
2966  |  |  * This function is not BOM-aware.  | 
2967  |  |  *  | 
2968  |  |  * @param input         the UTF-16 string to convert  | 
2969  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2970  |  |  * @param utf32_buffer   the pointer to a buffer that can hold the conversion  | 
2971  |  |  * result  | 
2972  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2973  |  |  */  | 
2974  |  | simdutf_warn_unused size_t convert_valid_utf16_to_utf32(  | 
2975  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
2976  |  |   #if SIMDUTF_SPAN  | 
2977  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
2978  |  | convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,  | 
2979  | 0  |                              std::span<char32_t> utf32_output) noexcept { | 
2980  | 0  |   return convert_valid_utf16_to_utf32(  | 
2981  | 0  |       valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());  | 
2982  | 0  | }  | 
2983  |  |   #endif // SIMDUTF_SPAN  | 
2984  |  |  | 
2985  |  | /**  | 
2986  |  |  * Convert valid UTF-16LE string into UTF-32 string.  | 
2987  |  |  *  | 
2988  |  |  * This function assumes that the input string is valid UTF-16LE.  | 
2989  |  |  *  | 
2990  |  |  * This function is not BOM-aware.  | 
2991  |  |  *  | 
2992  |  |  * @param input         the UTF-16LE string to convert  | 
2993  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
2994  |  |  * @param utf32_buffer   the pointer to a buffer that can hold the conversion  | 
2995  |  |  * result  | 
2996  |  |  * @return number of written code units; 0 if conversion is not possible  | 
2997  |  |  */  | 
2998  |  | simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(  | 
2999  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
3000  |  |   #if SIMDUTF_SPAN  | 
3001  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3002  |  | convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,  | 
3003  | 0  |                                std::span<char32_t> utf32_output) noexcept { | 
3004  | 0  |   return convert_valid_utf16le_to_utf32(  | 
3005  | 0  |       valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());  | 
3006  | 0  | }  | 
3007  |  |   #endif // SIMDUTF_SPAN  | 
3008  |  |  | 
3009  |  | /**  | 
3010  |  |  * Convert valid UTF-16BE string into UTF-32 string.  | 
3011  |  |  *  | 
3012  |  |  * This function assumes that the input string is valid UTF-16LE.  | 
3013  |  |  *  | 
3014  |  |  * This function is not BOM-aware.  | 
3015  |  |  *  | 
3016  |  |  * @param input         the UTF-16BE string to convert  | 
3017  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3018  |  |  * @param utf32_buffer   the pointer to a buffer that can hold the conversion  | 
3019  |  |  * result  | 
3020  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3021  |  |  */  | 
3022  |  | simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(  | 
3023  |  |     const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;  | 
3024  |  |   #if SIMDUTF_SPAN  | 
3025  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3026  |  | convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,  | 
3027  | 0  |                                std::span<char32_t> utf32_output) noexcept { | 
3028  | 0  |   return convert_valid_utf16be_to_utf32(  | 
3029  | 0  |       valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());  | 
3030  | 0  | }  | 
3031  |  |   #endif // SIMDUTF_SPAN  | 
3032  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3033  |  |  | 
3034  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
3035  |  | /**  | 
3036  |  |  * Compute the number of bytes that this UTF-16LE/BE string would require in  | 
3037  |  |  * Latin1 format.  | 
3038  |  |  *  | 
3039  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3040  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3041  |  |  *  | 
3042  |  |  * This function is not BOM-aware.  | 
3043  |  |  *  | 
3044  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3045  |  |  * @return the number of bytes required to encode the UTF-16LE string as Latin1  | 
3046  |  |  */  | 
3047  |  | simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;  | 
3048  |  |  | 
3049  |  | /**  | 
3050  |  |  * Using native endianness; Compute the number of bytes that this UTF-16  | 
3051  |  |  * string would require in UTF-8 format.  | 
3052  |  |  *  | 
3053  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3054  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3055  |  |  *  | 
3056  |  |  * @param input         the UTF-16 string to convert  | 
3057  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3058  |  |  * @return the number of bytes required to encode the UTF-16LE string as UTF-8  | 
3059  |  |  */  | 
3060  |  | simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,  | 
3061  |  |                                                   size_t length) noexcept;  | 
3062  |  |   #if SIMDUTF_SPAN  | 
3063  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3064  | 0  | utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept { | 
3065  | 0  |   return utf8_length_from_utf16(valid_utf16_input.data(),  | 
3066  | 0  |                                 valid_utf16_input.size());  | 
3067  | 0  | }  | 
3068  |  |   #endif // SIMDUTF_SPAN  | 
3069  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
3070  |  |  | 
3071  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
3072  |  | /**  | 
3073  |  |  * Compute the number of bytes that this UTF-16LE string would require in UTF-8  | 
3074  |  |  * format.  | 
3075  |  |  *  | 
3076  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3077  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3078  |  |  *  | 
3079  |  |  * @param input         the UTF-16LE string to convert  | 
3080  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3081  |  |  * @return the number of bytes required to encode the UTF-16LE string as UTF-8  | 
3082  |  |  */  | 
3083  |  | simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,  | 
3084  |  |                                                     size_t length) noexcept;  | 
3085  |  |   #if SIMDUTF_SPAN  | 
3086  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3087  | 0  | utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept { | 
3088  | 0  |   return utf8_length_from_utf16le(valid_utf16_input.data(),  | 
3089  | 0  |                                   valid_utf16_input.size());  | 
3090  | 0  | }  | 
3091  |  |   #endif // SIMDUTF_SPAN  | 
3092  |  |  | 
3093  |  | /**  | 
3094  |  |  * Compute the number of bytes that this UTF-16BE string would require in UTF-8  | 
3095  |  |  * format.  | 
3096  |  |  *  | 
3097  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3098  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3099  |  |  *  | 
3100  |  |  * @param input         the UTF-16BE string to convert  | 
3101  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3102  |  |  * @return the number of bytes required to encode the UTF-16BE string as UTF-8  | 
3103  |  |  */  | 
3104  |  | simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,  | 
3105  |  |                                                     size_t length) noexcept;  | 
3106  |  |   #if SIMDUTF_SPAN  | 
3107  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3108  | 0  | utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept { | 
3109  | 0  |   return utf8_length_from_utf16be(valid_utf16_input.data(),  | 
3110  | 0  |                                   valid_utf16_input.size());  | 
3111  | 0  | }  | 
3112  |  |   #endif // SIMDUTF_SPAN  | 
3113  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
3114  |  |  | 
3115  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
3116  |  | /**  | 
3117  |  |  * Convert possibly broken UTF-32 string into UTF-8 string.  | 
3118  |  |  *  | 
3119  |  |  * During the conversion also validation of the input string is done.  | 
3120  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3121  |  |  *  | 
3122  |  |  * This function is not BOM-aware.  | 
3123  |  |  *  | 
3124  |  |  * @param input         the UTF-32 string to convert  | 
3125  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3126  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
3127  |  |  * @return number of written code units; 0 if input is not a valid UTF-32 string  | 
3128  |  |  */  | 
3129  |  | simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,  | 
3130  |  |                                                  size_t length,  | 
3131  |  |                                                  char *utf8_buffer) noexcept;  | 
3132  |  |   #if SIMDUTF_SPAN  | 
3133  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8(  | 
3134  |  |     std::span<const char32_t> utf32_input,  | 
3135  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
3136  |  |   return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),  | 
3137  |  |                                reinterpret_cast<char *>(utf8_output.data()));  | 
3138  |  | }  | 
3139  |  |   #endif // SIMDUTF_SPAN  | 
3140  |  |  | 
3141  |  | /**  | 
3142  |  |  * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.  | 
3143  |  |  *  | 
3144  |  |  * During the conversion also validation of the input string is done.  | 
3145  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3146  |  |  *  | 
3147  |  |  * This function is not BOM-aware.  | 
3148  |  |  *  | 
3149  |  |  * @param input         the UTF-32 string to convert  | 
3150  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3151  |  |  * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
3152  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
3153  |  |  * fields error and count) with an error code and either position of the error  | 
3154  |  |  * (in the input in code units) if any, or the number of char written if  | 
3155  |  |  * successful.  | 
3156  |  |  */  | 
3157  |  | simdutf_warn_unused result convert_utf32_to_utf8_with_errors(  | 
3158  |  |     const char32_t *input, size_t length, char *utf8_buffer) noexcept;  | 
3159  |  |   #if SIMDUTF_SPAN  | 
3160  |  | simdutf_really_inline simdutf_warn_unused result  | 
3161  |  | convert_utf32_to_utf8_with_errors(  | 
3162  |  |     std::span<const char32_t> utf32_input,  | 
3163  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
3164  |  |   return convert_utf32_to_utf8_with_errors(  | 
3165  |  |       utf32_input.data(), utf32_input.size(),  | 
3166  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
3167  |  | }  | 
3168  |  |   #endif // SIMDUTF_SPAN  | 
3169  |  |  | 
3170  |  | /**  | 
3171  |  |  * Convert valid UTF-32 string into UTF-8 string.  | 
3172  |  |  *  | 
3173  |  |  * This function assumes that the input string is valid UTF-32.  | 
3174  |  |  *  | 
3175  |  |  * This function is not BOM-aware.  | 
3176  |  |  *  | 
3177  |  |  * @param input         the UTF-32 string to convert  | 
3178  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3179  |  |  * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
3180  |  |  * result  | 
3181  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3182  |  |  */  | 
3183  |  | simdutf_warn_unused size_t convert_valid_utf32_to_utf8(  | 
3184  |  |     const char32_t *input, size_t length, char *utf8_buffer) noexcept;  | 
3185  |  |   #if SIMDUTF_SPAN  | 
3186  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8(  | 
3187  |  |     std::span<const char32_t> valid_utf32_input,  | 
3188  |  |     detail::output_span_of_byte_like auto &&utf8_output) noexcept { | 
3189  |  |   return convert_valid_utf32_to_utf8(  | 
3190  |  |       valid_utf32_input.data(), valid_utf32_input.size(),  | 
3191  |  |       reinterpret_cast<char *>(utf8_output.data()));  | 
3192  |  | }  | 
3193  |  |   #endif // SIMDUTF_SPAN  | 
3194  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
3195  |  |  | 
3196  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3197  |  | /**  | 
3198  |  |  * Using native endianness, convert possibly broken UTF-32 string into a UTF-16  | 
3199  |  |  * string.  | 
3200  |  |  *  | 
3201  |  |  * During the conversion also validation of the input string is done.  | 
3202  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3203  |  |  *  | 
3204  |  |  * This function is not BOM-aware.  | 
3205  |  |  *  | 
3206  |  |  * @param input         the UTF-32 string to convert  | 
3207  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3208  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3209  |  |  * @return number of written code units; 0 if input is not a valid UTF-32 string  | 
3210  |  |  */  | 
3211  |  | simdutf_warn_unused size_t convert_utf32_to_utf16(  | 
3212  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3213  |  |   #if SIMDUTF_SPAN  | 
3214  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3215  |  | convert_utf32_to_utf16(std::span<const char32_t> utf32_input,  | 
3216  | 0  |                        std::span<char16_t> utf16_output) noexcept { | 
3217  | 0  |   return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),  | 
3218  | 0  |                                 utf16_output.data());  | 
3219  | 0  | }  | 
3220  |  |   #endif // SIMDUTF_SPAN  | 
3221  |  |  | 
3222  |  | /**  | 
3223  |  |  * Convert possibly broken UTF-32 string into UTF-16LE string.  | 
3224  |  |  *  | 
3225  |  |  * During the conversion also validation of the input string is done.  | 
3226  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3227  |  |  *  | 
3228  |  |  * This function is not BOM-aware.  | 
3229  |  |  *  | 
3230  |  |  * @param input         the UTF-32 string to convert  | 
3231  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3232  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3233  |  |  * @return number of written code units; 0 if input is not a valid UTF-32 string  | 
3234  |  |  */  | 
3235  |  | simdutf_warn_unused size_t convert_utf32_to_utf16le(  | 
3236  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3237  |  |   #if SIMDUTF_SPAN  | 
3238  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3239  |  | convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,  | 
3240  | 0  |                          std::span<char16_t> utf16_output) noexcept { | 
3241  | 0  |   return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),  | 
3242  | 0  |                                   utf16_output.data());  | 
3243  | 0  | }  | 
3244  |  |   #endif // SIMDUTF_SPAN  | 
3245  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3246  |  |  | 
3247  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
3248  |  | /**  | 
3249  |  |  * Convert possibly broken UTF-32 string into Latin1 string.  | 
3250  |  |  *  | 
3251  |  |  * During the conversion also validation of the input string is done.  | 
3252  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3253  |  |  *  | 
3254  |  |  * This function is not BOM-aware.  | 
3255  |  |  *  | 
3256  |  |  * @param input         the UTF-32 string to convert  | 
3257  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3258  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
3259  |  |  * @return number of written code units; 0 if input is not a valid UTF-32 string  | 
3260  |  |  * or if it cannot be represented as Latin1  | 
3261  |  |  */  | 
3262  |  | simdutf_warn_unused size_t convert_utf32_to_latin1(  | 
3263  |  |     const char32_t *input, size_t length, char *latin1_buffer) noexcept;  | 
3264  |  |   #if SIMDUTF_SPAN  | 
3265  |  | simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1(  | 
3266  |  |     std::span<const char32_t> utf32_input,  | 
3267  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
3268  |  |   return convert_utf32_to_latin1(  | 
3269  |  |       utf32_input.data(), utf32_input.size(),  | 
3270  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
3271  |  | }  | 
3272  |  |   #endif // SIMDUTF_SPAN  | 
3273  |  |  | 
3274  |  | /**  | 
3275  |  |  * Convert possibly broken UTF-32 string into Latin1 string and stop on error.  | 
3276  |  |  * If the string cannot be represented as Latin1, an error is returned.  | 
3277  |  |  *  | 
3278  |  |  * During the conversion also validation of the input string is done.  | 
3279  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3280  |  |  *  | 
3281  |  |  * This function is not BOM-aware.  | 
3282  |  |  *  | 
3283  |  |  * @param input         the UTF-32 string to convert  | 
3284  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3285  |  |  * @param latin1_buffer   the pointer to buffer that can hold conversion result  | 
3286  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
3287  |  |  * fields error and count) with an error code and either position of the error  | 
3288  |  |  * (in the input in code units) if any, or the number of char written if  | 
3289  |  |  * successful.  | 
3290  |  |  */  | 
3291  |  | simdutf_warn_unused result convert_utf32_to_latin1_with_errors(  | 
3292  |  |     const char32_t *input, size_t length, char *latin1_buffer) noexcept;  | 
3293  |  |   #if SIMDUTF_SPAN  | 
3294  |  | simdutf_really_inline simdutf_warn_unused result  | 
3295  |  | convert_utf32_to_latin1_with_errors(  | 
3296  |  |     std::span<const char32_t> utf32_input,  | 
3297  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
3298  |  |   return convert_utf32_to_latin1_with_errors(  | 
3299  |  |       utf32_input.data(), utf32_input.size(),  | 
3300  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
3301  |  | }  | 
3302  |  |   #endif // SIMDUTF_SPAN  | 
3303  |  |  | 
3304  |  | /**  | 
3305  |  |  * Convert valid UTF-32 string into Latin1 string.  | 
3306  |  |  *  | 
3307  |  |  * This function assumes that the input string is valid UTF-32 and that it can  | 
3308  |  |  * be represented as Latin1. If you violate this assumption, the result is  | 
3309  |  |  * implementation defined and may include system-dependent behavior such as  | 
3310  |  |  * crashes.  | 
3311  |  |  *  | 
3312  |  |  * This function is for expert users only and not part of our public API. Use  | 
3313  |  |  * convert_utf32_to_latin1 instead. The function may be removed from the library  | 
3314  |  |  * in the future.  | 
3315  |  |  *  | 
3316  |  |  * This function is not BOM-aware.  | 
3317  |  |  *  | 
3318  |  |  * @param input         the UTF-32 string to convert  | 
3319  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3320  |  |  * @param latin1_buffer   the pointer to a buffer that can hold the conversion  | 
3321  |  |  * result  | 
3322  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3323  |  |  */  | 
3324  |  | simdutf_warn_unused size_t convert_valid_utf32_to_latin1(  | 
3325  |  |     const char32_t *input, size_t length, char *latin1_buffer) noexcept;  | 
3326  |  |   #if SIMDUTF_SPAN  | 
3327  |  | simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1(  | 
3328  |  |     std::span<const char32_t> valid_utf32_input,  | 
3329  |  |     detail::output_span_of_byte_like auto &&latin1_output) noexcept { | 
3330  |  |   return convert_valid_utf32_to_latin1(  | 
3331  |  |       valid_utf32_input.data(), valid_utf32_input.size(),  | 
3332  |  |       reinterpret_cast<char *>(latin1_output.data()));  | 
3333  |  | }  | 
3334  |  |   #endif // SIMDUTF_SPAN  | 
3335  |  |  | 
3336  |  | /**  | 
3337  |  |  * Compute the number of bytes that this UTF-32 string would require in Latin1  | 
3338  |  |  * format.  | 
3339  |  |  *  | 
3340  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3341  |  |  * UTF-32 strings but in such cases the result is implementation defined.  | 
3342  |  |  *  | 
3343  |  |  * This function is not BOM-aware.  | 
3344  |  |  *  | 
3345  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3346  |  |  * @return the number of bytes required to encode the UTF-32 string as Latin1  | 
3347  |  |  */  | 
3348  |  | simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept;  | 
3349  |  |  | 
3350  |  | /**  | 
3351  |  |  * Compute the number of bytes that this Latin1 string would require in UTF-32  | 
3352  |  |  * format.  | 
3353  |  |  *  | 
3354  |  |  * @param length        the length of the string in Latin1 code units (char)  | 
3355  |  |  * @return the length of the string in 4-byte code units (char32_t) required to  | 
3356  |  |  * encode the Latin1 string as UTF-32  | 
3357  |  |  */  | 
3358  |  | simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept;  | 
3359  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
3360  |  |  | 
3361  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3362  |  | /**  | 
3363  |  |  * Convert possibly broken UTF-32 string into UTF-16BE string.  | 
3364  |  |  *  | 
3365  |  |  * During the conversion also validation of the input string is done.  | 
3366  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3367  |  |  *  | 
3368  |  |  * This function is not BOM-aware.  | 
3369  |  |  *  | 
3370  |  |  * @param input         the UTF-32 string to convert  | 
3371  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3372  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3373  |  |  * @return number of written code units; 0 if input is not a valid UTF-32 string  | 
3374  |  |  */  | 
3375  |  | simdutf_warn_unused size_t convert_utf32_to_utf16be(  | 
3376  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3377  |  |   #if SIMDUTF_SPAN  | 
3378  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3379  |  | convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,  | 
3380  | 0  |                          std::span<char16_t> utf16_output) noexcept { | 
3381  | 0  |   return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),  | 
3382  | 0  |                                   utf16_output.data());  | 
3383  | 0  | }  | 
3384  |  |   #endif // SIMDUTF_SPAN  | 
3385  |  |  | 
3386  |  | /**  | 
3387  |  |  * Using native endianness, convert possibly broken UTF-32 string into UTF-16  | 
3388  |  |  * string and stop on error.  | 
3389  |  |  *  | 
3390  |  |  * During the conversion also validation of the input string is done.  | 
3391  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3392  |  |  *  | 
3393  |  |  * This function is not BOM-aware.  | 
3394  |  |  *  | 
3395  |  |  * @param input         the UTF-32 string to convert  | 
3396  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3397  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3398  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
3399  |  |  * fields error and count) with an error code and either position of the error  | 
3400  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
3401  |  |  * successful.  | 
3402  |  |  */  | 
3403  |  | simdutf_warn_unused result convert_utf32_to_utf16_with_errors(  | 
3404  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3405  |  |   #if SIMDUTF_SPAN  | 
3406  |  | simdutf_really_inline simdutf_warn_unused result  | 
3407  |  | convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,  | 
3408  | 0  |                                    std::span<char16_t> utf16_output) noexcept { | 
3409  | 0  |   return convert_utf32_to_utf16_with_errors(  | 
3410  | 0  |       utf32_input.data(), utf32_input.size(), utf16_output.data());  | 
3411  | 0  | }  | 
3412  |  |   #endif // SIMDUTF_SPAN  | 
3413  |  |  | 
3414  |  | /**  | 
3415  |  |  * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.  | 
3416  |  |  *  | 
3417  |  |  * During the conversion also validation of the input string is done.  | 
3418  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3419  |  |  *  | 
3420  |  |  * This function is not BOM-aware.  | 
3421  |  |  *  | 
3422  |  |  * @param input         the UTF-32 string to convert  | 
3423  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3424  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3425  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
3426  |  |  * fields error and count) with an error code and either position of the error  | 
3427  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
3428  |  |  * successful.  | 
3429  |  |  */  | 
3430  |  | simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(  | 
3431  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3432  |  |   #if SIMDUTF_SPAN  | 
3433  |  | simdutf_really_inline simdutf_warn_unused result  | 
3434  |  | convert_utf32_to_utf16le_with_errors(  | 
3435  |  |     std::span<const char32_t> utf32_input,  | 
3436  | 0  |     std::span<char16_t> utf16_output) noexcept { | 
3437  | 0  |   return convert_utf32_to_utf16le_with_errors(  | 
3438  | 0  |       utf32_input.data(), utf32_input.size(), utf16_output.data());  | 
3439  | 0  | }  | 
3440  |  |   #endif // SIMDUTF_SPAN  | 
3441  |  |  | 
3442  |  | /**  | 
3443  |  |  * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.  | 
3444  |  |  *  | 
3445  |  |  * During the conversion also validation of the input string is done.  | 
3446  |  |  * This function is suitable to work with inputs from untrusted sources.  | 
3447  |  |  *  | 
3448  |  |  * This function is not BOM-aware.  | 
3449  |  |  *  | 
3450  |  |  * @param input         the UTF-32 string to convert  | 
3451  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3452  |  |  * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
3453  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
3454  |  |  * fields error and count) with an error code and either position of the error  | 
3455  |  |  * (in the input in code units) if any, or the number of char16_t written if  | 
3456  |  |  * successful.  | 
3457  |  |  */  | 
3458  |  | simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(  | 
3459  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3460  |  |   #if SIMDUTF_SPAN  | 
3461  |  | simdutf_really_inline simdutf_warn_unused result  | 
3462  |  | convert_utf32_to_utf16be_with_errors(  | 
3463  |  |     std::span<const char32_t> utf32_input,  | 
3464  | 0  |     std::span<char16_t> utf16_output) noexcept { | 
3465  | 0  |   return convert_utf32_to_utf16be_with_errors(  | 
3466  | 0  |       utf32_input.data(), utf32_input.size(), utf16_output.data());  | 
3467  | 0  | }  | 
3468  |  |   #endif // SIMDUTF_SPAN  | 
3469  |  |  | 
3470  |  | /**  | 
3471  |  |  * Using native endianness, convert valid UTF-32 string into a UTF-16 string.  | 
3472  |  |  *  | 
3473  |  |  * This function assumes that the input string is valid UTF-32.  | 
3474  |  |  *  | 
3475  |  |  * This function is not BOM-aware.  | 
3476  |  |  *  | 
3477  |  |  * @param input         the UTF-32 string to convert  | 
3478  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3479  |  |  * @param utf16_buffer   the pointer to a buffer that can hold the conversion  | 
3480  |  |  * result  | 
3481  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3482  |  |  */  | 
3483  |  | simdutf_warn_unused size_t convert_valid_utf32_to_utf16(  | 
3484  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3485  |  |   #if SIMDUTF_SPAN  | 
3486  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3487  |  | convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,  | 
3488  | 0  |                              std::span<char16_t> utf16_output) noexcept { | 
3489  | 0  |   return convert_valid_utf32_to_utf16(  | 
3490  | 0  |       valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());  | 
3491  | 0  | }  | 
3492  |  |   #endif // SIMDUTF_SPAN  | 
3493  |  |  | 
3494  |  | /**  | 
3495  |  |  * Convert valid UTF-32 string into UTF-16LE string.  | 
3496  |  |  *  | 
3497  |  |  * This function assumes that the input string is valid UTF-32.  | 
3498  |  |  *  | 
3499  |  |  * This function is not BOM-aware.  | 
3500  |  |  *  | 
3501  |  |  * @param input         the UTF-32 string to convert  | 
3502  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3503  |  |  * @param utf16_buffer   the pointer to a buffer that can hold the conversion  | 
3504  |  |  * result  | 
3505  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3506  |  |  */  | 
3507  |  | simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(  | 
3508  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3509  |  |   #if SIMDUTF_SPAN  | 
3510  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3511  |  | convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,  | 
3512  | 0  |                                std::span<char16_t> utf16_output) noexcept { | 
3513  | 0  |   return convert_valid_utf32_to_utf16le(  | 
3514  | 0  |       valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());  | 
3515  | 0  | }  | 
3516  |  |   #endif // SIMDUTF_SPAN  | 
3517  |  |  | 
3518  |  | /**  | 
3519  |  |  * Convert valid UTF-32 string into UTF-16BE string.  | 
3520  |  |  *  | 
3521  |  |  * This function assumes that the input string is valid UTF-32.  | 
3522  |  |  *  | 
3523  |  |  * This function is not BOM-aware.  | 
3524  |  |  *  | 
3525  |  |  * @param input         the UTF-32 string to convert  | 
3526  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3527  |  |  * @param utf16_buffer   the pointer to a buffer that can hold the conversion  | 
3528  |  |  * result  | 
3529  |  |  * @return number of written code units; 0 if conversion is not possible  | 
3530  |  |  */  | 
3531  |  | simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(  | 
3532  |  |     const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;  | 
3533  |  |   #if SIMDUTF_SPAN  | 
3534  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3535  |  | convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,  | 
3536  | 0  |                                std::span<char16_t> utf16_output) noexcept { | 
3537  | 0  |   return convert_valid_utf32_to_utf16be(  | 
3538  | 0  |       valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());  | 
3539  | 0  | }  | 
3540  |  |   #endif // SIMDUTF_SPAN  | 
3541  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3542  |  |  | 
3543  |  | #if SIMDUTF_FEATURE_UTF16  | 
3544  |  | /**  | 
3545  |  |  * Change the endianness of the input. Can be used to go from UTF-16LE to  | 
3546  |  |  * UTF-16BE or from UTF-16BE to UTF-16LE.  | 
3547  |  |  *  | 
3548  |  |  * This function does not validate the input.  | 
3549  |  |  *  | 
3550  |  |  * This function is not BOM-aware.  | 
3551  |  |  *  | 
3552  |  |  * @param input         the UTF-16 string to process  | 
3553  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3554  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
3555  |  |  * result  | 
3556  |  |  */  | 
3557  |  | void change_endianness_utf16(const char16_t *input, size_t length,  | 
3558  |  |                              char16_t *output) noexcept;  | 
3559  |  |   #if SIMDUTF_SPAN  | 
3560  |  | simdutf_really_inline void  | 
3561  |  | change_endianness_utf16(std::span<const char16_t> utf16_input,  | 
3562  | 0  |                         std::span<char16_t> utf16_output) noexcept { | 
3563  | 0  |   return change_endianness_utf16(utf16_input.data(), utf16_input.size(),  | 
3564  | 0  |                                  utf16_output.data());  | 
3565  | 0  | }  | 
3566  |  |   #endif // SIMDUTF_SPAN  | 
3567  |  | #endif   // SIMDUTF_FEATURE_UTF16  | 
3568  |  |  | 
3569  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
3570  |  | /**  | 
3571  |  |  * Compute the number of bytes that this UTF-32 string would require in UTF-8  | 
3572  |  |  * format.  | 
3573  |  |  *  | 
3574  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3575  |  |  * UTF-32 strings but in such cases the result is implementation defined.  | 
3576  |  |  *  | 
3577  |  |  * @param input         the UTF-32 string to convert  | 
3578  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3579  |  |  * @return the number of bytes required to encode the UTF-32 string as UTF-8  | 
3580  |  |  */  | 
3581  |  | simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,  | 
3582  |  |                                                   size_t length) noexcept;  | 
3583  |  |   #if SIMDUTF_SPAN  | 
3584  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3585  | 0  | utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept { | 
3586  | 0  |   return utf8_length_from_utf32(valid_utf32_input.data(),  | 
3587  | 0  |                                 valid_utf32_input.size());  | 
3588  | 0  | }  | 
3589  |  |   #endif // SIMDUTF_SPAN  | 
3590  |  | #endif   // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
3591  |  |  | 
3592  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3593  |  | /**  | 
3594  |  |  * Compute the number of two-byte code units that this UTF-32 string would  | 
3595  |  |  * require in UTF-16 format.  | 
3596  |  |  *  | 
3597  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3598  |  |  * UTF-32 strings but in such cases the result is implementation defined.  | 
3599  |  |  *  | 
3600  |  |  * @param input         the UTF-32 string to convert  | 
3601  |  |  * @param length        the length of the string in 4-byte code units (char32_t)  | 
3602  |  |  * @return the number of bytes required to encode the UTF-32 string as UTF-16  | 
3603  |  |  */  | 
3604  |  | simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,  | 
3605  |  |                                                    size_t length) noexcept;  | 
3606  |  |   #if SIMDUTF_SPAN  | 
3607  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3608  | 0  | utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept { | 
3609  | 0  |   return utf16_length_from_utf32(valid_utf32_input.data(),  | 
3610  | 0  |                                  valid_utf32_input.size());  | 
3611  | 0  | }  | 
3612  |  |   #endif // SIMDUTF_SPAN  | 
3613  |  |  | 
3614  |  | /**  | 
3615  |  |  * Using native endianness; Compute the number of bytes that this UTF-16  | 
3616  |  |  * string would require in UTF-32 format.  | 
3617  |  |  *  | 
3618  |  |  * This function is equivalent to count_utf16.  | 
3619  |  |  *  | 
3620  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3621  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3622  |  |  *  | 
3623  |  |  * This function is not BOM-aware.  | 
3624  |  |  *  | 
3625  |  |  * @param input         the UTF-16 string to convert  | 
3626  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3627  |  |  * @return the number of bytes required to encode the UTF-16LE string as UTF-32  | 
3628  |  |  */  | 
3629  |  | simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,  | 
3630  |  |                                                    size_t length) noexcept;  | 
3631  |  |   #if SIMDUTF_SPAN  | 
3632  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3633  | 0  | utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept { | 
3634  | 0  |   return utf32_length_from_utf16(valid_utf16_input.data(),  | 
3635  | 0  |                                  valid_utf16_input.size());  | 
3636  | 0  | }  | 
3637  |  |   #endif // SIMDUTF_SPAN  | 
3638  |  |  | 
3639  |  | /**  | 
3640  |  |  * Compute the number of bytes that this UTF-16LE string would require in UTF-32  | 
3641  |  |  * format.  | 
3642  |  |  *  | 
3643  |  |  * This function is equivalent to count_utf16le.  | 
3644  |  |  *  | 
3645  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3646  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3647  |  |  *  | 
3648  |  |  * This function is not BOM-aware.  | 
3649  |  |  *  | 
3650  |  |  * @param input         the UTF-16LE string to convert  | 
3651  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3652  |  |  * @return the number of bytes required to encode the UTF-16LE string as UTF-32  | 
3653  |  |  */  | 
3654  |  | simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,  | 
3655  |  |                                                      size_t length) noexcept;  | 
3656  |  |   #if SIMDUTF_SPAN  | 
3657  |  | simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le(  | 
3658  | 0  |     std::span<const char16_t> valid_utf16_input) noexcept { | 
3659  | 0  |   return utf32_length_from_utf16le(valid_utf16_input.data(),  | 
3660  | 0  |                                    valid_utf16_input.size());  | 
3661  | 0  | }  | 
3662  |  |   #endif // SIMDUTF_SPAN  | 
3663  |  |  | 
3664  |  | /**  | 
3665  |  |  * Compute the number of bytes that this UTF-16BE string would require in UTF-32  | 
3666  |  |  * format.  | 
3667  |  |  *  | 
3668  |  |  * This function is equivalent to count_utf16be.  | 
3669  |  |  *  | 
3670  |  |  * This function does not validate the input. It is acceptable to pass invalid  | 
3671  |  |  * UTF-16 strings but in such cases the result is implementation defined.  | 
3672  |  |  *  | 
3673  |  |  * This function is not BOM-aware.  | 
3674  |  |  *  | 
3675  |  |  * @param input         the UTF-16BE string to convert  | 
3676  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3677  |  |  * @return the number of bytes required to encode the UTF-16BE string as UTF-32  | 
3678  |  |  */  | 
3679  |  | simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,  | 
3680  |  |                                                      size_t length) noexcept;  | 
3681  |  |   #if SIMDUTF_SPAN  | 
3682  |  | simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be(  | 
3683  | 0  |     std::span<const char16_t> valid_utf16_input) noexcept { | 
3684  | 0  |   return utf32_length_from_utf16be(valid_utf16_input.data(),  | 
3685  | 0  |                                    valid_utf16_input.size());  | 
3686  | 0  | }  | 
3687  |  |   #endif // SIMDUTF_SPAN  | 
3688  |  | #endif   // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
3689  |  |  | 
3690  |  | #if SIMDUTF_FEATURE_UTF16  | 
3691  |  | /**  | 
3692  |  |  * Count the number of code points (characters) in the string assuming that  | 
3693  |  |  * it is valid.  | 
3694  |  |  *  | 
3695  |  |  * This function assumes that the input string is valid UTF-16 (native  | 
3696  |  |  * endianness). It is acceptable to pass invalid UTF-16 strings but in such  | 
3697  |  |  * cases the result is implementation defined.  | 
3698  |  |  *  | 
3699  |  |  * This function is not BOM-aware.  | 
3700  |  |  *  | 
3701  |  |  * @param input         the UTF-16 string to process  | 
3702  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3703  |  |  * @return number of code points  | 
3704  |  |  */  | 
3705  |  | simdutf_warn_unused size_t count_utf16(const char16_t *input,  | 
3706  |  |                                        size_t length) noexcept;  | 
3707  |  |   #if SIMDUTF_SPAN  | 
3708  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3709  | 0  | count_utf16(std::span<const char16_t> valid_utf16_input) noexcept { | 
3710  | 0  |   return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());  | 
3711  | 0  | }  | 
3712  |  |   #endif // SIMDUTF_SPAN  | 
3713  |  |  | 
3714  |  | /**  | 
3715  |  |  * Count the number of code points (characters) in the string assuming that  | 
3716  |  |  * it is valid.  | 
3717  |  |  *  | 
3718  |  |  * This function assumes that the input string is valid UTF-16LE.  | 
3719  |  |  * It is acceptable to pass invalid UTF-16 strings but in such cases  | 
3720  |  |  * the result is implementation defined.  | 
3721  |  |  *  | 
3722  |  |  * This function is not BOM-aware.  | 
3723  |  |  *  | 
3724  |  |  * @param input         the UTF-16LE string to process  | 
3725  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3726  |  |  * @return number of code points  | 
3727  |  |  */  | 
3728  |  | simdutf_warn_unused size_t count_utf16le(const char16_t *input,  | 
3729  |  |                                          size_t length) noexcept;  | 
3730  |  |   #if SIMDUTF_SPAN  | 
3731  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3732  | 0  | count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept { | 
3733  | 0  |   return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());  | 
3734  | 0  | }  | 
3735  |  |   #endif // SIMDUTF_SPAN  | 
3736  |  |  | 
3737  |  | /**  | 
3738  |  |  * Count the number of code points (characters) in the string assuming that  | 
3739  |  |  * it is valid.  | 
3740  |  |  *  | 
3741  |  |  * This function assumes that the input string is valid UTF-16BE.  | 
3742  |  |  * It is acceptable to pass invalid UTF-16 strings but in such cases  | 
3743  |  |  * the result is implementation defined.  | 
3744  |  |  *  | 
3745  |  |  * This function is not BOM-aware.  | 
3746  |  |  *  | 
3747  |  |  * @param input         the UTF-16BE string to process  | 
3748  |  |  * @param length        the length of the string in 2-byte code units (char16_t)  | 
3749  |  |  * @return number of code points  | 
3750  |  |  */  | 
3751  |  | simdutf_warn_unused size_t count_utf16be(const char16_t *input,  | 
3752  |  |                                          size_t length) noexcept;  | 
3753  |  |   #if SIMDUTF_SPAN  | 
3754  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3755  | 0  | count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept { | 
3756  | 0  |   return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());  | 
3757  | 0  | }  | 
3758  |  |   #endif // SIMDUTF_SPAN  | 
3759  |  | #endif   // SIMDUTF_FEATURE_UTF16  | 
3760  |  |  | 
3761  |  | #if SIMDUTF_FEATURE_UTF8  | 
3762  |  | /**  | 
3763  |  |  * Count the number of code points (characters) in the string assuming that  | 
3764  |  |  * it is valid.  | 
3765  |  |  *  | 
3766  |  |  * This function assumes that the input string is valid UTF-8.  | 
3767  |  |  * It is acceptable to pass invalid UTF-8 strings but in such cases  | 
3768  |  |  * the result is implementation defined.  | 
3769  |  |  *  | 
3770  |  |  * @param input         the UTF-8 string to process  | 
3771  |  |  * @param length        the length of the string in bytes  | 
3772  |  |  * @return number of code points  | 
3773  |  |  */  | 
3774  |  | simdutf_warn_unused size_t count_utf8(const char *input,  | 
3775  |  |                                       size_t length) noexcept;  | 
3776  |  |   #if SIMDUTF_SPAN  | 
3777  |  | simdutf_really_inline simdutf_warn_unused size_t count_utf8(  | 
3778  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { | 
3779  |  |   return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
3780  |  |                     valid_utf8_input.size());  | 
3781  |  | }  | 
3782  |  |   #endif // SIMDUTF_SPAN  | 
3783  |  |  | 
3784  |  | /**  | 
3785  |  |  * Given a valid UTF-8 string having a possibly truncated last character,  | 
3786  |  |  * this function checks the end of string. If the last character is truncated  | 
3787  |  |  * (or partial), then it returns a shorter length (shorter by 1 to 3 bytes) so  | 
3788  |  |  * that the short UTF-8 strings only contain complete characters. If there is no  | 
3789  |  |  * truncated character, the original length is returned.  | 
3790  |  |  *  | 
3791  |  |  * This function assumes that the input string is valid UTF-8, but possibly  | 
3792  |  |  * truncated.  | 
3793  |  |  *  | 
3794  |  |  * @param input         the UTF-8 string to process  | 
3795  |  |  * @param length        the length of the string in bytes  | 
3796  |  |  * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes  | 
3797  |  |  */  | 
3798  |  | simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);  | 
3799  |  |   #if SIMDUTF_SPAN  | 
3800  |  | simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8(  | 
3801  |  |     const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { | 
3802  |  |   return trim_partial_utf8(  | 
3803  |  |       reinterpret_cast<const char *>(valid_utf8_input.data()),  | 
3804  |  |       valid_utf8_input.size());  | 
3805  |  | }  | 
3806  |  |   #endif // SIMDUTF_SPAN  | 
3807  |  | #endif   // SIMDUTF_FEATURE_UTF8  | 
3808  |  |  | 
3809  |  | #if SIMDUTF_FEATURE_UTF16  | 
3810  |  | /**  | 
3811  |  |  * Given a valid UTF-16BE string having a possibly truncated last character,  | 
3812  |  |  * this function checks the end of string. If the last character is truncated  | 
3813  |  |  * (or partial), then it returns a shorter length (shorter by 1 unit) so that  | 
3814  |  |  * the short UTF-16BE strings only contain complete characters. If there is no  | 
3815  |  |  * truncated character, the original length is returned.  | 
3816  |  |  *  | 
3817  |  |  * This function assumes that the input string is valid UTF-16BE, but possibly  | 
3818  |  |  * truncated.  | 
3819  |  |  *  | 
3820  |  |  * @param input         the UTF-16BE string to process  | 
3821  |  |  * @param length        the length of the string in bytes  | 
3822  |  |  * @return the length of the string in bytes, possibly shorter by 1 unit  | 
3823  |  |  */  | 
3824  |  | simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,  | 
3825  |  |                                                 size_t length);  | 
3826  |  |   #if SIMDUTF_SPAN  | 
3827  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3828  | 0  | trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept { | 
3829  | 0  |   return trim_partial_utf16be(valid_utf16_input.data(),  | 
3830  | 0  |                               valid_utf16_input.size());  | 
3831  | 0  | }  | 
3832  |  |   #endif // SIMDUTF_SPAN  | 
3833  |  |  | 
3834  |  | /**  | 
3835  |  |  * Given a valid UTF-16LE string having a possibly truncated last character,  | 
3836  |  |  * this function checks the end of string. If the last character is truncated  | 
3837  |  |  * (or partial), then it returns a shorter length (shorter by 1 unit) so that  | 
3838  |  |  * the short UTF-16LE strings only contain complete characters. If there is no  | 
3839  |  |  * truncated character, the original length is returned.  | 
3840  |  |  *  | 
3841  |  |  * This function assumes that the input string is valid UTF-16LE, but possibly  | 
3842  |  |  * truncated.  | 
3843  |  |  *  | 
3844  |  |  * @param input         the UTF-16LE string to process  | 
3845  |  |  * @param length        the length of the string in bytes  | 
3846  |  |  * @return the length of the string in unit, possibly shorter by 1 unit  | 
3847  |  |  */  | 
3848  |  | simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,  | 
3849  |  |                                                 size_t length);  | 
3850  |  |   #if SIMDUTF_SPAN  | 
3851  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3852  | 0  | trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept { | 
3853  | 0  |   return trim_partial_utf16le(valid_utf16_input.data(),  | 
3854  | 0  |                               valid_utf16_input.size());  | 
3855  | 0  | }  | 
3856  |  |   #endif // SIMDUTF_SPAN  | 
3857  |  |  | 
3858  |  | /**  | 
3859  |  |  * Given a valid UTF-16 string having a possibly truncated last character,  | 
3860  |  |  * this function checks the end of string. If the last character is truncated  | 
3861  |  |  * (or partial), then it returns a shorter length (shorter by 1 unit) so that  | 
3862  |  |  * the short UTF-16 strings only contain complete characters. If there is no  | 
3863  |  |  * truncated character, the original length is returned.  | 
3864  |  |  *  | 
3865  |  |  * This function assumes that the input string is valid UTF-16, but possibly  | 
3866  |  |  * truncated. We use the native endianness.  | 
3867  |  |  *  | 
3868  |  |  * @param input         the UTF-16 string to process  | 
3869  |  |  * @param length        the length of the string in bytes  | 
3870  |  |  * @return the length of the string in unit, possibly shorter by 1 unit  | 
3871  |  |  */  | 
3872  |  | simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,  | 
3873  |  |                                               size_t length);  | 
3874  |  |   #if SIMDUTF_SPAN  | 
3875  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3876  | 0  | trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept { | 
3877  | 0  |   return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size());  | 
3878  | 0  | }  | 
3879  |  |   #endif // SIMDUTF_SPAN  | 
3880  |  | #endif   // SIMDUTF_FEATURE_UTF16  | 
3881  |  |  | 
3882  |  | #if SIMDUTF_FEATURE_BASE64  | 
3883  |  |   #ifndef SIMDUTF_NEED_TRAILING_ZEROES  | 
3884  |  |     #define SIMDUTF_NEED_TRAILING_ZEROES 1  | 
3885  |  |   #endif  | 
3886  |  | // base64_options are used to specify the base64 encoding options.  | 
3887  |  | // ASCII spaces are ' ', '\t', '\n', '\r', '\f'  | 
3888  |  | // garbage characters are characters that are not part of the base64 alphabet  | 
3889  |  | // nor ASCII spaces.  | 
3890  |  | constexpr uint64_t base64_reverse_padding =  | 
3891  |  |     2; /* modifier for base64_default and base64_url */  | 
3892  |  | enum base64_options : uint64_t { | 
3893  |  |   base64_default = 0, /* standard base64 format (with padding) */  | 
3894  |  |   base64_url = 1,     /* base64url format (no padding) */  | 
3895  |  |   base64_default_no_padding =  | 
3896  |  |       base64_default |  | 
3897  |  |       base64_reverse_padding, /* standard base64 format without padding */  | 
3898  |  |   base64_url_with_padding =  | 
3899  |  |       base64_url | base64_reverse_padding, /* base64url with padding */  | 
3900  |  |   base64_default_accept_garbage =  | 
3901  |  |       4, /* standard base64 format accepting garbage characters, the input stops  | 
3902  |  |             with the first '=' if any */  | 
3903  |  |   base64_url_accept_garbage =  | 
3904  |  |       5, /* base64url format accepting garbage characters, the input stops with  | 
3905  |  |             the first '=' if any */  | 
3906  |  |   base64_default_or_url =  | 
3907  |  |       8, /* standard/base64url hybrid format (only meaningful for decoding!) */  | 
3908  |  |   base64_default_or_url_accept_garbage =  | 
3909  |  |       12, /* standard/base64url hybrid format accepting garbage characters  | 
3910  |  |              (only meaningful for decoding!), the input stops with the first '='  | 
3911  |  |              if any */  | 
3912  |  | };  | 
3913  |  |  | 
3914  |  |   #if SIMDUTF_CPLUSPLUS17  | 
3915  | 0  | inline std::string_view to_string(base64_options options) { | 
3916  | 0  |   switch (options) { | 
3917  | 0  |   case base64_default:  | 
3918  | 0  |     return "base64_default";  | 
3919  | 0  |   case base64_url:  | 
3920  | 0  |     return "base64_url";  | 
3921  | 0  |   case base64_reverse_padding:  | 
3922  | 0  |     return "base64_reverse_padding";  | 
3923  | 0  |   case base64_url_with_padding:  | 
3924  | 0  |     return "base64_url_with_padding";  | 
3925  | 0  |   case base64_default_accept_garbage:  | 
3926  | 0  |     return "base64_default_accept_garbage";  | 
3927  | 0  |   case base64_url_accept_garbage:  | 
3928  | 0  |     return "base64_url_accept_garbage";  | 
3929  | 0  |   case base64_default_or_url:  | 
3930  | 0  |     return "base64_default_or_url";  | 
3931  | 0  |   case base64_default_or_url_accept_garbage:  | 
3932  | 0  |     return "base64_default_or_url_accept_garbage";  | 
3933  | 0  |   }  | 
3934  | 0  |   return "<unknown>";  | 
3935  | 0  | }  | 
3936  |  |   #endif // SIMDUTF_CPLUSPLUS17  | 
3937  |  |  | 
3938  |  | // last_chunk_handling_options are used to specify the handling of the last  | 
3939  |  | // chunk in base64 decoding.  | 
3940  |  | // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64  | 
3941  |  | enum last_chunk_handling_options : uint64_t { | 
3942  |  |   loose = 0,  /* standard base64 format, decode partial final chunk */  | 
3943  |  |   strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and  | 
3944  |  |                  unpadded, or non-zero bit padding */  | 
3945  |  |   stop_before_partial =  | 
3946  |  |       2, /* if the last chunk is partial, ignore it (no error) */  | 
3947  |  |   only_full_chunks =  | 
3948  |  |       3 /* only decode full blocks (4 base64 characters, no padding) */  | 
3949  |  | };  | 
3950  |  |  | 
3951  | 0  | inline bool is_partial(last_chunk_handling_options options) { | 
3952  | 0  |   return (options == stop_before_partial) || (options == only_full_chunks);  | 
3953  | 0  | }  | 
3954  |  |  | 
3955  |  |   #if SIMDUTF_CPLUSPLUS17  | 
3956  | 0  | inline std::string_view to_string(last_chunk_handling_options options) { | 
3957  | 0  |   switch (options) { | 
3958  | 0  |   case loose:  | 
3959  | 0  |     return "loose";  | 
3960  | 0  |   case strict:  | 
3961  | 0  |     return "strict";  | 
3962  | 0  |   case stop_before_partial:  | 
3963  | 0  |     return "stop_before_partial";  | 
3964  | 0  |   case only_full_chunks:  | 
3965  | 0  |     return "only_full_chunks";  | 
3966  | 0  |   }  | 
3967  | 0  |   return "<unknown>";  | 
3968  | 0  | }  | 
3969  |  |   #endif  | 
3970  |  |  | 
3971  |  | /**  | 
3972  |  |  * Provide the maximal binary length in bytes given the base64 input.  | 
3973  |  |  * In general, if the input contains ASCII spaces, the result will be less than  | 
3974  |  |  * the maximum length.  | 
3975  |  |  *  | 
3976  |  |  * @param input         the base64 input to process  | 
3977  |  |  * @param length        the length of the base64 input in bytes  | 
3978  |  |  * @return maximum number of binary bytes  | 
3979  |  |  */  | 
3980  |  | simdutf_warn_unused size_t  | 
3981  |  | maximal_binary_length_from_base64(const char *input, size_t length) noexcept;  | 
3982  |  |   #if SIMDUTF_SPAN  | 
3983  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
3984  |  | maximal_binary_length_from_base64(  | 
3985  |  |     const detail::input_span_of_byte_like auto &input) noexcept { | 
3986  |  |   return maximal_binary_length_from_base64(  | 
3987  |  |       reinterpret_cast<const char *>(input.data()), input.size());  | 
3988  |  | }  | 
3989  |  |   #endif // SIMDUTF_SPAN  | 
3990  |  |  | 
3991  |  | /**  | 
3992  |  |  * Provide the maximal binary length in bytes given the base64 input.  | 
3993  |  |  * In general, if the input contains ASCII spaces, the result will be less than  | 
3994  |  |  * the maximum length.  | 
3995  |  |  *  | 
3996  |  |  * @param input         the base64 input to process, in ASCII stored as 16-bit  | 
3997  |  |  * units  | 
3998  |  |  * @param length        the length of the base64 input in 16-bit units  | 
3999  |  |  * @return maximal number of binary bytes  | 
4000  |  |  */  | 
4001  |  | simdutf_warn_unused size_t maximal_binary_length_from_base64(  | 
4002  |  |     const char16_t *input, size_t length) noexcept;  | 
4003  |  |   #if SIMDUTF_SPAN  | 
4004  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
4005  | 0  | maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept { | 
4006  | 0  |   return maximal_binary_length_from_base64(input.data(), input.size());  | 
4007  | 0  | }  | 
4008  |  |   #endif // SIMDUTF_SPAN  | 
4009  |  |  | 
4010  |  | /**  | 
4011  |  |  * Convert a base64 input to a binary output.  | 
4012  |  |  *  | 
4013  |  |  * This function follows the WHATWG forgiving-base64 format, which means that it  | 
4014  |  |  * will ignore any ASCII spaces in the input. You may provide a padded input  | 
4015  |  |  * (with one or two equal signs at the end) or an unpadded input (without any  | 
4016  |  |  * equal signs at the end).  | 
4017  |  |  *  | 
4018  |  |  * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
4019  |  |  *  | 
4020  |  |  * This function will fail in case of invalid input. When last_chunk_options =  | 
4021  |  |  * loose, there are two possible reasons for failure: the input contains a  | 
4022  |  |  * number of base64 characters that when divided by 4, leaves a single remainder  | 
4023  |  |  * character (BASE64_INPUT_REMAINDER), or the input contains a character that is  | 
4024  |  |  * not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
4025  |  |  *  | 
4026  |  |  * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the  | 
4027  |  |  * input where the invalid character was found. When the error is  | 
4028  |  |  * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded.  | 
4029  |  |  *  | 
4030  |  |  * The default option (simdutf::base64_default) expects the characters `+` and  | 
4031  |  |  * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the  | 
4032  |  |  * characters `-` and `_` as part of its alphabet.  | 
4033  |  |  *  | 
4034  |  |  * The padding (`=`) is validated if present. There may be at most two padding  | 
4035  |  |  * characters at the end of the input. If there are any padding characters, the  | 
4036  |  |  * total number of characters (excluding spaces but including padding  | 
4037  |  |  * characters) must be divisible by four.  | 
4038  |  |  *  | 
4039  |  |  * You should call this function with a buffer that is at least  | 
4040  |  |  * maximal_binary_length_from_base64(input, length) bytes long. If you fail to  | 
4041  |  |  * provide that much space, the function may cause a buffer overflow.  | 
4042  |  |  *  | 
4043  |  |  * Advanced users may want to tailor how the last chunk is handled. By default,  | 
4044  |  |  * we use a loose (forgiving) approach but we also support a strict approach  | 
4045  |  |  * as well as a stop_before_partial approach, as per the following proposal:  | 
4046  |  |  *  | 
4047  |  |  * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64  | 
4048  |  |  *  | 
4049  |  |  * @param input         the base64 string to process  | 
4050  |  |  * @param length        the length of the string in bytes  | 
4051  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
4052  |  |  * result (should be at least maximal_binary_length_from_base64(input, length)  | 
4053  |  |  * bytes long).  | 
4054  |  |  * @param options       the base64 options to use, usually base64_default or  | 
4055  |  |  * base64_url, and base64_default by default.  | 
4056  |  |  * @param last_chunk_options the last chunk handling options,  | 
4057  |  |  * last_chunk_handling_options::loose by default  | 
4058  |  |  * but can also be last_chunk_handling_options::strict or  | 
4059  |  |  * last_chunk_handling_options::stop_before_partial.  | 
4060  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
4061  |  |  * fields error and count) with an error code and either position of the error  | 
4062  |  |  * (in the input in bytes) if any, or the number of bytes written if successful.  | 
4063  |  |  */  | 
4064  |  | simdutf_warn_unused result base64_to_binary(  | 
4065  |  |     const char *input, size_t length, char *output,  | 
4066  |  |     base64_options options = base64_default,  | 
4067  |  |     last_chunk_handling_options last_chunk_options = loose) noexcept;  | 
4068  |  |   #if SIMDUTF_SPAN  | 
4069  |  | simdutf_really_inline simdutf_warn_unused result base64_to_binary(  | 
4070  |  |     const detail::input_span_of_byte_like auto &input,  | 
4071  |  |     detail::output_span_of_byte_like auto &&binary_output,  | 
4072  |  |     base64_options options = base64_default,  | 
4073  |  |     last_chunk_handling_options last_chunk_options = loose) noexcept { | 
4074  |  |   return base64_to_binary(reinterpret_cast<const char *>(input.data()),  | 
4075  |  |                           input.size(),  | 
4076  |  |                           reinterpret_cast<char *>(binary_output.data()),  | 
4077  |  |                           options, last_chunk_options);  | 
4078  |  | }  | 
4079  |  |   #endif // SIMDUTF_SPAN  | 
4080  |  |  | 
4081  |  | /**  | 
4082  |  |  * Provide the base64 length in bytes given the length of a binary input.  | 
4083  |  |  *  | 
4084  |  |  * @param length        the length of the input in bytes  | 
4085  |  |  * @return number of base64 bytes  | 
4086  |  |  */  | 
4087  |  | simdutf_warn_unused size_t base64_length_from_binary(  | 
4088  |  |     size_t length, base64_options options = base64_default) noexcept;  | 
4089  |  |  | 
4090  |  | /**  | 
4091  |  |  * Convert a binary input to a base64 output.  | 
4092  |  |  *  | 
4093  |  |  * The default option (simdutf::base64_default) uses the characters `+` and `/`  | 
4094  |  |  * as part of its alphabet. Further, it adds padding (`=`) at the end of the  | 
4095  |  |  * output to ensure that the output length is a multiple of four.  | 
4096  |  |  *  | 
4097  |  |  * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part  | 
4098  |  |  * of its alphabet. No padding is added at the end of the output.  | 
4099  |  |  *  | 
4100  |  |  * This function always succeeds.  | 
4101  |  |  *  | 
4102  |  |  * @param input         the binary to process  | 
4103  |  |  * @param length        the length of the input in bytes  | 
4104  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
4105  |  |  * result (should be at least base64_length_from_binary(length) bytes long)  | 
4106  |  |  * @param options       the base64 options to use, can be base64_default or  | 
4107  |  |  * base64_url, is base64_default by default.  | 
4108  |  |  * @return number of written bytes, will be equal to  | 
4109  |  |  * base64_length_from_binary(length, options)  | 
4110  |  |  */  | 
4111  |  | size_t binary_to_base64(const char *input, size_t length, char *output,  | 
4112  |  |                         base64_options options = base64_default) noexcept;  | 
4113  |  |   #if SIMDUTF_SPAN  | 
4114  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
4115  |  | binary_to_base64(const detail::input_span_of_byte_like auto &input,  | 
4116  |  |                  detail::output_span_of_byte_like auto &&binary_output,  | 
4117  |  |                  base64_options options = base64_default) noexcept { | 
4118  |  |   return binary_to_base64(  | 
4119  |  |       reinterpret_cast<const char *>(input.data()), input.size(),  | 
4120  |  |       reinterpret_cast<char *>(binary_output.data()), options);  | 
4121  |  | }  | 
4122  |  |   #endif // SIMDUTF_SPAN  | 
4123  |  |  | 
4124  |  |   #if SIMDUTF_ATOMIC_REF  | 
4125  |  | /**  | 
4126  |  |  * Convert a binary input to a base64 output, using atomic accesses.  | 
4127  |  |  * This function comes with a potentially significant performance  | 
4128  |  |  * penalty, but it may be useful in some cases where the input  | 
4129  |  |  * buffers are shared between threads, to avoid undefined  | 
4130  |  |  * behavior in case of data races.  | 
4131  |  |  *  | 
4132  |  |  * The function is for advanced users. Its main use case is when  | 
4133  |  |  * to silence sanitizer warnings. We have no documented use case  | 
4134  |  |  * where this function is actually necessary in terms of practical correctness.  | 
4135  |  |  *  | 
4136  |  |  * This function is only available when simdutf is compiled with  | 
4137  |  |  * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check  | 
4138  |  |  * the availability of this function by checking the macro  | 
4139  |  |  * SIMDUTF_ATOMIC_REF.  | 
4140  |  |  *  | 
4141  |  |  * The default option (simdutf::base64_default) uses the characters `+` and `/`  | 
4142  |  |  * as part of its alphabet. Further, it adds padding (`=`) at the end of the  | 
4143  |  |  * output to ensure that the output length is a multiple of four.  | 
4144  |  |  *  | 
4145  |  |  * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part  | 
4146  |  |  * of its alphabet. No padding is added at the end of the output.  | 
4147  |  |  *  | 
4148  |  |  * This function always succeeds.  | 
4149  |  |  *  | 
4150  |  |  * This function is considered experimental. It is not tested by default  | 
4151  |  |  * (see the CMake option SIMDUTF_ATOMIC_BASE64_TESTS) nor is it fuzz tested.  | 
4152  |  |  * It is not documented in the public API documentation (README). It is  | 
4153  |  |  * offered on a best effort basis. We rely on the community for further  | 
4154  |  |  * testing and feedback.  | 
4155  |  |  *  | 
4156  |  |  * @brief atomic_binary_to_base64  | 
4157  |  |  * @param input         the binary to process  | 
4158  |  |  * @param length        the length of the input in bytes  | 
4159  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
4160  |  |  * result (should be at least base64_length_from_binary(length) bytes long)  | 
4161  |  |  * @param options       the base64 options to use, can be base64_default or  | 
4162  |  |  * base64_url, is base64_default by default.  | 
4163  |  |  * @return number of written bytes, will be equal to  | 
4164  |  |  * base64_length_from_binary(length, options)  | 
4165  |  |  */  | 
4166  |  | size_t  | 
4167  |  | atomic_binary_to_base64(const char *input, size_t length, char *output,  | 
4168  |  |                         base64_options options = base64_default) noexcept;  | 
4169  |  |     #if SIMDUTF_SPAN  | 
4170  |  | simdutf_really_inline simdutf_warn_unused size_t  | 
4171  |  | atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,  | 
4172  |  |                         detail::output_span_of_byte_like auto &&binary_output,  | 
4173  |  |                         base64_options options = base64_default) noexcept { | 
4174  |  |   return atomic_binary_to_base64(  | 
4175  |  |       reinterpret_cast<const char *>(input.data()), input.size(),  | 
4176  |  |       reinterpret_cast<char *>(binary_output.data()), options);  | 
4177  |  | }  | 
4178  |  |     #endif // SIMDUTF_SPAN  | 
4179  |  |   #endif   // SIMDUTF_ATOMIC_REF  | 
4180  |  |  | 
4181  |  | /**  | 
4182  |  |  * Convert a base64 input to a binary output.  | 
4183  |  |  *  | 
4184  |  |  * This function follows the WHATWG forgiving-base64 format, which means that it  | 
4185  |  |  * will ignore any ASCII spaces in the input. You may provide a padded input  | 
4186  |  |  * (with one or two equal signs at the end) or an unpadded input (without any  | 
4187  |  |  * equal signs at the end).  | 
4188  |  |  *  | 
4189  |  |  * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
4190  |  |  *  | 
4191  |  |  * This function will fail in case of invalid input. When last_chunk_options =  | 
4192  |  |  * loose, there are two possible reasons for failure: the input contains a  | 
4193  |  |  * number of base64 characters that when divided by 4, leaves a single remainder  | 
4194  |  |  * character (BASE64_INPUT_REMAINDER), or the input contains a character that is  | 
4195  |  |  * not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
4196  |  |  *  | 
4197  |  |  * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the  | 
4198  |  |  * input where the invalid character was found. When the error is  | 
4199  |  |  * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded.  | 
4200  |  |  *  | 
4201  |  |  * The default option (simdutf::base64_default) expects the characters `+` and  | 
4202  |  |  * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the  | 
4203  |  |  * characters `-` and `_` as part of its alphabet.  | 
4204  |  |  *  | 
4205  |  |  * The padding (`=`) is validated if present. There may be at most two padding  | 
4206  |  |  * characters at the end of the input. If there are any padding characters, the  | 
4207  |  |  * total number of characters (excluding spaces but including padding  | 
4208  |  |  * characters) must be divisible by four.  | 
4209  |  |  *  | 
4210  |  |  * You should call this function with a buffer that is at least  | 
4211  |  |  * maximal_binary_length_from_base64(input, length) bytes long. If you fail  | 
4212  |  |  * to provide that much space, the function may cause a buffer overflow.  | 
4213  |  |  *  | 
4214  |  |  * Advanced users may want to tailor how the last chunk is handled. By default,  | 
4215  |  |  * we use a loose (forgiving) approach but we also support a strict approach  | 
4216  |  |  * as well as a stop_before_partial approach, as per the following proposal:  | 
4217  |  |  *  | 
4218  |  |  * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64  | 
4219  |  |  *  | 
4220  |  |  * @param input         the base64 string to process, in ASCII stored as 16-bit  | 
4221  |  |  * units  | 
4222  |  |  * @param length        the length of the string in 16-bit units  | 
4223  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
4224  |  |  * result (should be at least maximal_binary_length_from_base64(input, length)  | 
4225  |  |  * bytes long).  | 
4226  |  |  * @param options       the base64 options to use, can be base64_default or  | 
4227  |  |  * base64_url, is base64_default by default.  | 
4228  |  |  * @param last_chunk_options the last chunk handling options,  | 
4229  |  |  * last_chunk_handling_options::loose by default  | 
4230  |  |  * but can also be last_chunk_handling_options::strict or  | 
4231  |  |  * last_chunk_handling_options::stop_before_partial.  | 
4232  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
4233  |  |  * fields error and count) with an error code and position of the  | 
4234  |  |  * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number  | 
4235  |  |  * of bytes written if successful.  | 
4236  |  |  */  | 
4237  |  | simdutf_warn_unused result  | 
4238  |  | base64_to_binary(const char16_t *input, size_t length, char *output,  | 
4239  |  |                  base64_options options = base64_default,  | 
4240  |  |                  last_chunk_handling_options last_chunk_options =  | 
4241  |  |                      last_chunk_handling_options::loose) noexcept;  | 
4242  |  |   #if SIMDUTF_SPAN  | 
4243  |  | simdutf_really_inline simdutf_warn_unused result base64_to_binary(  | 
4244  |  |     std::span<const char16_t> input,  | 
4245  |  |     detail::output_span_of_byte_like auto &&binary_output,  | 
4246  |  |     base64_options options = base64_default,  | 
4247  |  |     last_chunk_handling_options last_chunk_options = loose) noexcept { | 
4248  |  |   return base64_to_binary(input.data(), input.size(),  | 
4249  |  |                           reinterpret_cast<char *>(binary_output.data()),  | 
4250  |  |                           options, last_chunk_options);  | 
4251  |  | }  | 
4252  |  |   #endif // SIMDUTF_SPAN  | 
4253  |  |  | 
4254  |  | /**  | 
4255  |  |  * Check if a character is an ignorabl base64 character.  | 
4256  |  |  * Checking a large input, character by character, is not computationally  | 
4257  |  |  * efficient.  | 
4258  |  |  *  | 
4259  |  |  * @param input         the character to check  | 
4260  |  |  * @param options       the base64 options to use, is base64_default by default.  | 
4261  |  |  * @return true if the character is an ignorablee base64 character, false  | 
4262  |  |  * otherwise.  | 
4263  |  |  */  | 
4264  |  | simdutf_warn_unused bool  | 
4265  |  | base64_ignorable(char input, base64_options options = base64_default) noexcept;  | 
4266  |  | simdutf_warn_unused bool  | 
4267  |  | base64_ignorable(char16_t input,  | 
4268  |  |                  base64_options options = base64_default) noexcept;  | 
4269  |  |  | 
4270  |  | /**  | 
4271  |  |  * Check if a character is a valid base64 character.  | 
4272  |  |  * Checking a large input, character by character, is not computationally  | 
4273  |  |  * efficient.  | 
4274  |  |  * Note that padding characters are not considered valid base64 characters in  | 
4275  |  |  * this context, nor are spaces.  | 
4276  |  |  *  | 
4277  |  |  * @param input         the character to check  | 
4278  |  |  * @param options       the base64 options to use, is base64_default by default.  | 
4279  |  |  * @return true if the character is a base64 character, false otherwise.  | 
4280  |  |  */  | 
4281  |  | simdutf_warn_unused bool  | 
4282  |  | base64_valid(char input, base64_options options = base64_default) noexcept;  | 
4283  |  | simdutf_warn_unused bool  | 
4284  |  | base64_valid(char16_t input, base64_options options = base64_default) noexcept;  | 
4285  |  |  | 
4286  |  | /**  | 
4287  |  |  * Check if a character is a valid base64 character or the padding character  | 
4288  |  |  * ('='). Checking a large input, character by character, is not computationally | 
4289  |  |  * efficient.  | 
4290  |  |  *  | 
4291  |  |  * @param input         the character to check  | 
4292  |  |  * @param options       the base64 options to use, is base64_default by default.  | 
4293  |  |  * @return true if the character is a base64 character, false otherwise.  | 
4294  |  |  */  | 
4295  |  | simdutf_warn_unused bool  | 
4296  |  | base64_valid_or_padding(char input,  | 
4297  |  |                         base64_options options = base64_default) noexcept;  | 
4298  |  | simdutf_warn_unused bool  | 
4299  |  | base64_valid_or_padding(char16_t input,  | 
4300  |  |                         base64_options options = base64_default) noexcept;  | 
4301  |  |  | 
4302  |  | /**  | 
4303  |  |  * Convert a base64 input to a binary output.  | 
4304  |  |  *  | 
4305  |  |  * This function follows the WHATWG forgiving-base64 format, which means that it  | 
4306  |  |  * will ignore any ASCII spaces in the input. You may provide a padded input  | 
4307  |  |  * (with one or two equal signs at the end) or an unpadded input (without any  | 
4308  |  |  * equal signs at the end).  | 
4309  |  |  *  | 
4310  |  |  * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
4311  |  |  *  | 
4312  |  |  * This function will fail in case of invalid input. When last_chunk_options =  | 
4313  |  |  * loose, there are three possible reasons for failure: the input contains a  | 
4314  |  |  * number of base64 characters that when divided by 4, leaves a single remainder  | 
4315  |  |  * character (BASE64_INPUT_REMAINDER), the input contains a character that is  | 
4316  |  |  * not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer  | 
4317  |  |  * is too small (OUTPUT_BUFFER_TOO_SMALL).  | 
4318  |  |  *  | 
4319  |  |  * When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written  | 
4320  |  |  * and the number of units processed, see description of the parameters and  | 
4321  |  |  * returned value.  | 
4322  |  |  *  | 
4323  |  |  * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the  | 
4324  |  |  * input where the invalid character was found. When the error is  | 
4325  |  |  * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded.  | 
4326  |  |  *  | 
4327  |  |  * The default option (simdutf::base64_default) expects the characters `+` and  | 
4328  |  |  * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the  | 
4329  |  |  * characters `-` and `_` as part of its alphabet.  | 
4330  |  |  *  | 
4331  |  |  * The padding (`=`) is validated if present. There may be at most two padding  | 
4332  |  |  * characters at the end of the input. If there are any padding characters, the  | 
4333  |  |  * total number of characters (excluding spaces but including padding  | 
4334  |  |  * characters) must be divisible by four.  | 
4335  |  |  *  | 
4336  |  |  * The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected  | 
4337  |  |  * to discard the output unless the parameter decode_up_to_bad_char is set to  | 
4338  |  |  * true. In that case, the function will decode up to the first invalid  | 
4339  |  |  * character. Extra padding characters ('=') are considered invalid characters. | 
4340  |  |  *  | 
4341  |  |  * Advanced users may want to tailor how the last chunk is handled. By default,  | 
4342  |  |  * we use a loose (forgiving) approach but we also support a strict approach  | 
4343  |  |  * as well as a stop_before_partial approach, as per the following proposal:  | 
4344  |  |  *  | 
4345  |  |  * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64  | 
4346  |  |  *  | 
4347  |  |  * @param input         the base64 string to process, in ASCII stored as 8-bit  | 
4348  |  |  * or 16-bit units  | 
4349  |  |  * @param length        the length of the string in 8-bit or 16-bit units.  | 
4350  |  |  * @param output        the pointer to a buffer that can hold the conversion  | 
4351  |  |  * result.  | 
4352  |  |  * @param outlen        the number of bytes that can be written in the output  | 
4353  |  |  * buffer. Upon return, it is modified to reflect how many bytes were written.  | 
4354  |  |  * @param options       the base64 options to use, can be base64_default or  | 
4355  |  |  * base64_url, is base64_default by default.  | 
4356  |  |  * @param last_chunk_options the last chunk handling options,  | 
4357  |  |  * last_chunk_handling_options::loose by default  | 
4358  |  |  * but can also be last_chunk_handling_options::strict or  | 
4359  |  |  * last_chunk_handling_options::stop_before_partial.  | 
4360  |  |  * @param decode_up_to_bad_char if true, the function will decode up to the  | 
4361  |  |  * first invalid character. By default (false), it is assumed that the output  | 
4362  |  |  * buffer is to be discarded. When there are multiple errors in the input,  | 
4363  |  |  * using decode_up_to_bad_char might trigger a different error.  | 
4364  |  |  * @return a result pair struct (of type simdutf::result containing the two  | 
4365  |  |  * fields error and count) with an error code and position of the  | 
4366  |  |  * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number  | 
4367  |  |  * of units processed if successful.  | 
4368  |  |  */  | 
4369  |  | simdutf_warn_unused result  | 
4370  |  | base64_to_binary_safe(const char *input, size_t length, char *output,  | 
4371  |  |                       size_t &outlen, base64_options options = base64_default,  | 
4372  |  |                       last_chunk_handling_options last_chunk_options =  | 
4373  |  |                           last_chunk_handling_options::loose,  | 
4374  |  |                       bool decode_up_to_bad_char = false) noexcept;  | 
4375  |  |   #if SIMDUTF_SPAN  | 
4376  |  | /**  | 
4377  |  |  * @brief span overload  | 
4378  |  |  * @return a tuple of result and outlen  | 
4379  |  |  */  | 
4380  |  | simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>  | 
4381  |  | base64_to_binary_safe(const detail::input_span_of_byte_like auto &input,  | 
4382  |  |                       detail::output_span_of_byte_like auto &&binary_output,  | 
4383  |  |                       base64_options options = base64_default,  | 
4384  |  |                       last_chunk_handling_options last_chunk_options = loose,  | 
4385  |  |                       bool decode_up_to_bad_char = false) noexcept { | 
4386  |  |   size_t outlen = binary_output.size();  | 
4387  |  |   auto r = base64_to_binary_safe(  | 
4388  |  |       reinterpret_cast<const char *>(input.data()), input.size(),  | 
4389  |  |       reinterpret_cast<char *>(binary_output.data()), outlen, options,  | 
4390  |  |       last_chunk_options, decode_up_to_bad_char);  | 
4391  |  |   return {r, outlen}; | 
4392  |  | }  | 
4393  |  |   #endif // SIMDUTF_SPAN  | 
4394  |  |  | 
4395  |  | simdutf_warn_unused result  | 
4396  |  | base64_to_binary_safe(const char16_t *input, size_t length, char *output,  | 
4397  |  |                       size_t &outlen, base64_options options = base64_default,  | 
4398  |  |                       last_chunk_handling_options last_chunk_options =  | 
4399  |  |                           last_chunk_handling_options::loose,  | 
4400  |  |                       bool decode_up_to_bad_char = false) noexcept;  | 
4401  |  |   #if SIMDUTF_SPAN  | 
4402  |  | /**  | 
4403  |  |  * @brief span overload  | 
4404  |  |  * @return a tuple of result and outlen  | 
4405  |  |  */  | 
4406  |  | simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>  | 
4407  |  | base64_to_binary_safe(std::span<const char16_t> input,  | 
4408  |  |                       detail::output_span_of_byte_like auto &&binary_output,  | 
4409  |  |                       base64_options options = base64_default,  | 
4410  |  |                       last_chunk_handling_options last_chunk_options = loose,  | 
4411  |  |                       bool decode_up_to_bad_char = false) noexcept { | 
4412  |  |   size_t outlen = binary_output.size();  | 
4413  |  |   auto r = base64_to_binary_safe(input.data(), input.size(),  | 
4414  |  |                                  reinterpret_cast<char *>(binary_output.data()),  | 
4415  |  |                                  outlen, options, last_chunk_options,  | 
4416  |  |                                  decode_up_to_bad_char);  | 
4417  |  |   return {r, outlen}; | 
4418  |  | }  | 
4419  |  |   #endif // SIMDUTF_SPAN  | 
4420  |  |  | 
4421  |  |   #if SIMDUTF_ATOMIC_REF  | 
4422  |  | /**  | 
4423  |  |  * Convert a base64 input to a binary output with a size limit and using atomic  | 
4424  |  |  * operations.  | 
4425  |  |  *  | 
4426  |  |  * Like `base64_to_binary_safe` but using atomic operations, this function is  | 
4427  |  |  * thread-safe for concurrent memory access, allowing the output  | 
4428  |  |  * buffers to be shared between threads without undefined behavior in case of  | 
4429  |  |  * data races.  | 
4430  |  |  *  | 
4431  |  |  * This function comes with a potentially significant performance penalty, but  | 
4432  |  |  * is useful when thread safety is needed during base64 decoding.  | 
4433  |  |  *  | 
4434  |  |  * This function is only available when simdutf is compiled with  | 
4435  |  |  * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check  | 
4436  |  |  * the availability of this function by checking the macro  | 
4437  |  |  * SIMDUTF_ATOMIC_REF.  | 
4438  |  |  *  | 
4439  |  |  * This function is considered experimental. It is not tested by default  | 
4440  |  |  * (see the CMake option SIMDUTF_ATOMIC_BASE64_TESTS) nor is it fuzz tested.  | 
4441  |  |  * It is not documented in the public API documentation (README). It is  | 
4442  |  |  * offered on a best effort basis. We rely on the community for further  | 
4443  |  |  * testing and feedback.  | 
4444  |  |  *  | 
4445  |  |  * @param input         the base64 input to decode  | 
4446  |  |  * @param length        the length of the input in bytes  | 
4447  |  |  * @param output        the pointer to buffer that can hold the conversion  | 
4448  |  |  * result  | 
4449  |  |  * @param outlen        the number of bytes that can be written in the output  | 
4450  |  |  * buffer. Upon return, it is modified to reflect how many bytes were written.  | 
4451  |  |  * @param options       the base64 options to use (default, url, etc.)  | 
4452  |  |  * @param last_chunk_options the last chunk handling options (loose, strict,  | 
4453  |  |  * stop_before_partial)  | 
4454  |  |  * @param decode_up_to_bad_char if true, the function will decode up to the  | 
4455  |  |  * first invalid character. By default (false), it is assumed that the output  | 
4456  |  |  * buffer is to be discarded. When there are multiple errors in the input,  | 
4457  |  |  * using decode_up_to_bad_char might trigger a different error.  | 
4458  |  |  * @return a result struct with an error code and count indicating error  | 
4459  |  |  * position or success  | 
4460  |  |  */  | 
4461  |  | simdutf_warn_unused result atomic_base64_to_binary_safe(  | 
4462  |  |     const char *input, size_t length, char *output, size_t &outlen,  | 
4463  |  |     base64_options options = base64_default,  | 
4464  |  |     last_chunk_handling_options last_chunk_options =  | 
4465  |  |         last_chunk_handling_options::loose,  | 
4466  |  |     bool decode_up_to_bad_char = false) noexcept;  | 
4467  |  | simdutf_warn_unused result atomic_base64_to_binary_safe(  | 
4468  |  |     const char16_t *input, size_t length, char *output, size_t &outlen,  | 
4469  |  |     base64_options options = base64_default,  | 
4470  |  |     last_chunk_handling_options last_chunk_options = loose,  | 
4471  |  |     bool decode_up_to_bad_char = false) noexcept;  | 
4472  |  |     #if SIMDUTF_SPAN  | 
4473  |  | /**  | 
4474  |  |  * @brief span overload  | 
4475  |  |  * @return a tuple of result and outlen  | 
4476  |  |  */  | 
4477  |  | simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>  | 
4478  |  | atomic_base64_to_binary_safe(  | 
4479  |  |     const detail::input_span_of_byte_like auto &binary_input,  | 
4480  |  |     detail::output_span_of_byte_like auto &&output,  | 
4481  |  |     base64_options options = base64_default,  | 
4482  |  |     last_chunk_handling_options last_chunk_options =  | 
4483  |  |         last_chunk_handling_options::loose,  | 
4484  |  |     bool decode_up_to_bad_char = false) noexcept { | 
4485  |  |   size_t outlen = output.size();  | 
4486  |  |   auto ret = atomic_base64_to_binary_safe(  | 
4487  |  |       reinterpret_cast<const char *>(binary_input.data()), binary_input.size(),  | 
4488  |  |       reinterpret_cast<char *>(output.data()), outlen, options,  | 
4489  |  |       last_chunk_options, decode_up_to_bad_char);  | 
4490  |  |   return {ret, outlen}; | 
4491  |  | }  | 
4492  |  | /**  | 
4493  |  |  * @brief span overload  | 
4494  |  |  * @return a tuple of result and outlen  | 
4495  |  |  */  | 
4496  |  | simdutf_warn_unused std::tuple<result, std::size_t>  | 
4497  |  | atomic_base64_to_binary_safe(  | 
4498  |  |     std::span<const char16_t> base64_input,  | 
4499  |  |     detail::output_span_of_byte_like auto &&binary_output,  | 
4500  |  |     base64_options options = base64_default,  | 
4501  |  |     last_chunk_handling_options last_chunk_options = loose,  | 
4502  |  |     bool decode_up_to_bad_char = false) noexcept { | 
4503  |  |   size_t outlen = binary_output.size();  | 
4504  |  |   auto ret = atomic_base64_to_binary_safe(  | 
4505  |  |       base64_input.data(), base64_input.size(),  | 
4506  |  |       reinterpret_cast<char *>(binary_output.data()), outlen, options,  | 
4507  |  |       last_chunk_options, decode_up_to_bad_char);  | 
4508  |  |   return {ret, outlen}; | 
4509  |  | }  | 
4510  |  |     #endif // SIMDUTF_SPAN  | 
4511  |  |   #endif   // SIMDUTF_ATOMIC_REF  | 
4512  |  |  | 
4513  |  | /**  | 
4514  |  |  * Find the first occurrence of a character in a string. If the character is  | 
4515  |  |  * not found, return a pointer to the end of the string.  | 
4516  |  |  * @param start        the start of the string  | 
4517  |  |  * @param end          the end of the string  | 
4518  |  |  * @param character    the character to find  | 
4519  |  |  * @return a pointer to the first occurrence of the character in the string,  | 
4520  |  |  * or a pointer to the end of the string if the character is not found.  | 
4521  |  |  *  | 
4522  |  |  */  | 
4523  |  | simdutf_warn_unused const char *find(const char *start, const char *end,  | 
4524  |  |                                      char character) noexcept;  | 
4525  |  | simdutf_warn_unused const char16_t *  | 
4526  |  | find(const char16_t *start, const char16_t *end, char16_t character) noexcept;  | 
4527  |  | #endif // SIMDUTF_FEATURE_BASE64  | 
4528  |  |  | 
4529  |  | /**  | 
4530  |  |  * An implementation of simdutf for a particular CPU architecture.  | 
4531  |  |  *  | 
4532  |  |  * Also used to maintain the currently active implementation. The active  | 
4533  |  |  * implementation is automatically initialized on first use to the most advanced  | 
4534  |  |  * implementation supported by the host.  | 
4535  |  |  */  | 
4536  |  | class implementation { | 
4537  |  | public:  | 
4538  |  |   /**  | 
4539  |  |    * The name of this implementation.  | 
4540  |  |    *  | 
4541  |  |    *     const implementation *impl = simdutf::active_implementation;  | 
4542  |  |    *     cout << "simdutf is optimized for " << impl->name() << "(" << | 
4543  |  |    * impl->description() << ")" << endl;  | 
4544  |  |    *  | 
4545  |  |    * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"  | 
4546  |  |    */  | 
4547  | 0  |   virtual std::string name() const { return std::string(_name); } | 
4548  |  |  | 
4549  |  |   /**  | 
4550  |  |    * The description of this implementation.  | 
4551  |  |    *  | 
4552  |  |    *     const implementation *impl = simdutf::active_implementation;  | 
4553  |  |    *     cout << "simdutf is optimized for " << impl->name() << "(" << | 
4554  |  |    * impl->description() << ")" << endl;  | 
4555  |  |    *  | 
4556  |  |    * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"  | 
4557  |  |    */  | 
4558  | 0  |   virtual std::string description() const { return std::string(_description); } | 
4559  |  |  | 
4560  |  |   /**  | 
4561  |  |    * The instruction sets this implementation is compiled against  | 
4562  |  |    * and the current CPU match. This function may poll the current CPU/system  | 
4563  |  |    * and should therefore not be called too often if performance is a concern.  | 
4564  |  |    *  | 
4565  |  |    *  | 
4566  |  |    * @return true if the implementation can be safely used on the current system  | 
4567  |  |    * (determined at runtime)  | 
4568  |  |    */  | 
4569  |  |   bool supported_by_runtime_system() const;  | 
4570  |  |  | 
4571  |  | #if SIMDUTF_FEATURE_DETECT_ENCODING  | 
4572  |  |   /**  | 
4573  |  |    * This function will try to detect the encoding  | 
4574  |  |    * @param input the string to identify  | 
4575  |  |    * @param length the length of the string in bytes.  | 
4576  |  |    * @return the encoding type detected  | 
4577  |  |    */  | 
4578  |  |   virtual encoding_type autodetect_encoding(const char *input,  | 
4579  |  |                                             size_t length) const noexcept;  | 
4580  |  |  | 
4581  |  |   /**  | 
4582  |  |    * This function will try to detect the possible encodings in one pass  | 
4583  |  |    * @param input the string to identify  | 
4584  |  |    * @param length the length of the string in bytes.  | 
4585  |  |    * @return the encoding type detected  | 
4586  |  |    */  | 
4587  |  |   virtual int detect_encodings(const char *input,  | 
4588  |  |                                size_t length) const noexcept = 0;  | 
4589  |  | #endif // SIMDUTF_FEATURE_DETECT_ENCODING  | 
4590  |  |  | 
4591  |  |   /**  | 
4592  |  |    * @private For internal implementation use  | 
4593  |  |    *  | 
4594  |  |    * The instruction sets this implementation is compiled against.  | 
4595  |  |    *  | 
4596  |  |    * @return a mask of all required `internal::instruction_set::` values  | 
4597  |  |    */  | 
4598  | 0  |   virtual uint32_t required_instruction_sets() const { | 
4599  | 0  |     return _required_instruction_sets;  | 
4600  | 0  |   }  | 
4601  |  |  | 
4602  |  | #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4603  |  |   /**  | 
4604  |  |    * Validate the UTF-8 string.  | 
4605  |  |    *  | 
4606  |  |    * Overridden by each implementation.  | 
4607  |  |    *  | 
4608  |  |    * @param buf the UTF-8 string to validate.  | 
4609  |  |    * @param len the length of the string in bytes.  | 
4610  |  |    * @return true if and only if the string is valid UTF-8.  | 
4611  |  |    */  | 
4612  |  |   simdutf_warn_unused virtual bool validate_utf8(const char *buf,  | 
4613  |  |                                                  size_t len) const noexcept = 0;  | 
4614  |  | #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4615  |  |  | 
4616  |  | #if SIMDUTF_FEATURE_UTF8  | 
4617  |  |   /**  | 
4618  |  |    * Validate the UTF-8 string and stop on errors.  | 
4619  |  |    *  | 
4620  |  |    * Overridden by each implementation.  | 
4621  |  |    *  | 
4622  |  |    * @param buf the UTF-8 string to validate.  | 
4623  |  |    * @param len the length of the string in bytes.  | 
4624  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4625  |  |    * fields error and count) with an error code and either position of the error  | 
4626  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4627  |  |    * if successful.  | 
4628  |  |    */  | 
4629  |  |   simdutf_warn_unused virtual result  | 
4630  |  |   validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;  | 
4631  |  | #endif // SIMDUTF_FEATURE_UTF8  | 
4632  |  |  | 
4633  |  | #if SIMDUTF_FEATURE_ASCII  | 
4634  |  |   /**  | 
4635  |  |    * Validate the ASCII string.  | 
4636  |  |    *  | 
4637  |  |    * Overridden by each implementation.  | 
4638  |  |    *  | 
4639  |  |    * @param buf the ASCII string to validate.  | 
4640  |  |    * @param len the length of the string in bytes.  | 
4641  |  |    * @return true if and only if the string is valid ASCII.  | 
4642  |  |    */  | 
4643  |  |   simdutf_warn_unused virtual bool  | 
4644  |  |   validate_ascii(const char *buf, size_t len) const noexcept = 0;  | 
4645  |  |  | 
4646  |  |   /**  | 
4647  |  |    * Validate the ASCII string and stop on error.  | 
4648  |  |    *  | 
4649  |  |    * Overridden by each implementation.  | 
4650  |  |    *  | 
4651  |  |    * @param buf the ASCII string to validate.  | 
4652  |  |    * @param len the length of the string in bytes.  | 
4653  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4654  |  |    * fields error and count) with an error code and either position of the error  | 
4655  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4656  |  |    * if successful.  | 
4657  |  |    */  | 
4658  |  |   simdutf_warn_unused virtual result  | 
4659  |  |   validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;  | 
4660  |  | #endif // SIMDUTF_FEATURE_ASCII  | 
4661  |  |  | 
4662  |  | #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4663  |  |   /**  | 
4664  |  |    * Validate the UTF-16LE string.This function may be best when you expect  | 
4665  |  |    * the input to be almost always valid. Otherwise, consider using  | 
4666  |  |    * validate_utf16le_with_errors.  | 
4667  |  |    *  | 
4668  |  |    * Overridden by each implementation.  | 
4669  |  |    *  | 
4670  |  |    * This function is not BOM-aware.  | 
4671  |  |    *  | 
4672  |  |    * @param buf the UTF-16LE string to validate.  | 
4673  |  |    * @param len the length of the string in number of 2-byte code units  | 
4674  |  |    * (char16_t).  | 
4675  |  |    * @return true if and only if the string is valid UTF-16LE.  | 
4676  |  |    */  | 
4677  |  |   simdutf_warn_unused virtual bool  | 
4678  |  |   validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;  | 
4679  |  | #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4680  |  |  | 
4681  |  | #if SIMDUTF_FEATURE_UTF16  | 
4682  |  |   /**  | 
4683  |  |    * Validate the UTF-16BE string. This function may be best when you expect  | 
4684  |  |    * the input to be almost always valid. Otherwise, consider using  | 
4685  |  |    * validate_utf16be_with_errors.  | 
4686  |  |    *  | 
4687  |  |    * Overridden by each implementation.  | 
4688  |  |    *  | 
4689  |  |    * This function is not BOM-aware.  | 
4690  |  |    *  | 
4691  |  |    * @param buf the UTF-16BE string to validate.  | 
4692  |  |    * @param len the length of the string in number of 2-byte code units  | 
4693  |  |    * (char16_t).  | 
4694  |  |    * @return true if and only if the string is valid UTF-16BE.  | 
4695  |  |    */  | 
4696  |  |   simdutf_warn_unused virtual bool  | 
4697  |  |   validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;  | 
4698  |  |  | 
4699  |  |   /**  | 
4700  |  |    * Validate the UTF-16LE string and stop on error.  It might be faster than  | 
4701  |  |    * validate_utf16le when an error is expected to occur early.  | 
4702  |  |    *  | 
4703  |  |    * Overridden by each implementation.  | 
4704  |  |    *  | 
4705  |  |    * This function is not BOM-aware.  | 
4706  |  |    *  | 
4707  |  |    * @param buf the UTF-16LE string to validate.  | 
4708  |  |    * @param len the length of the string in number of 2-byte code units  | 
4709  |  |    * (char16_t).  | 
4710  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4711  |  |    * fields error and count) with an error code and either position of the error  | 
4712  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4713  |  |    * if successful.  | 
4714  |  |    */  | 
4715  |  |   simdutf_warn_unused virtual result  | 
4716  |  |   validate_utf16le_with_errors(const char16_t *buf,  | 
4717  |  |                                size_t len) const noexcept = 0;  | 
4718  |  |  | 
4719  |  |   /**  | 
4720  |  |    * Validate the UTF-16BE string and stop on error. It might be faster than  | 
4721  |  |    * validate_utf16be when an error is expected to occur early.  | 
4722  |  |    *  | 
4723  |  |    * Overridden by each implementation.  | 
4724  |  |    *  | 
4725  |  |    * This function is not BOM-aware.  | 
4726  |  |    *  | 
4727  |  |    * @param buf the UTF-16BE string to validate.  | 
4728  |  |    * @param len the length of the string in number of 2-byte code units  | 
4729  |  |    * (char16_t).  | 
4730  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4731  |  |    * fields error and count) with an error code and either position of the error  | 
4732  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4733  |  |    * if successful.  | 
4734  |  |    */  | 
4735  |  |   simdutf_warn_unused virtual result  | 
4736  |  |   validate_utf16be_with_errors(const char16_t *buf,  | 
4737  |  |                                size_t len) const noexcept = 0;  | 
4738  |  |   /**  | 
4739  |  |    * Copies the UTF-16LE string while replacing mismatched surrogates with the  | 
4740  |  |    * Unicode replacement character U+FFFD. We allow the input and output to be  | 
4741  |  |    * the same buffer so that the correction is done in-place.  | 
4742  |  |    *  | 
4743  |  |    * Overridden by each implementation.  | 
4744  |  |    *  | 
4745  |  |    * @param input the UTF-16LE string to correct.  | 
4746  |  |    * @param len the length of the string in number of 2-byte code units  | 
4747  |  |    * (char16_t).  | 
4748  |  |    * @param output the output buffer.  | 
4749  |  |    */  | 
4750  |  |   virtual void to_well_formed_utf16le(const char16_t *input, size_t len,  | 
4751  |  |                                       char16_t *output) const noexcept = 0;  | 
4752  |  |   /**  | 
4753  |  |    * Copies the UTF-16BE string while replacing mismatched surrogates with the  | 
4754  |  |    * Unicode replacement character U+FFFD. We allow the input and output to be  | 
4755  |  |    * the same buffer so that the correction is done in-place.  | 
4756  |  |    *  | 
4757  |  |    * Overridden by each implementation.  | 
4758  |  |    *  | 
4759  |  |    * @param input the UTF-16BE string to correct.  | 
4760  |  |    * @param len the length of the string in number of 2-byte code units  | 
4761  |  |    * (char16_t).  | 
4762  |  |    * @param output the output buffer.  | 
4763  |  |    */  | 
4764  |  |   virtual void to_well_formed_utf16be(const char16_t *input, size_t len,  | 
4765  |  |                                       char16_t *output) const noexcept = 0;  | 
4766  |  | #endif // SIMDUTF_FEATURE_UTF16  | 
4767  |  |  | 
4768  |  | #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4769  |  |   /**  | 
4770  |  |    * Validate the UTF-32 string.  | 
4771  |  |    *  | 
4772  |  |    * Overridden by each implementation.  | 
4773  |  |    *  | 
4774  |  |    * This function is not BOM-aware.  | 
4775  |  |    *  | 
4776  |  |    * @param buf the UTF-32 string to validate.  | 
4777  |  |    * @param len the length of the string in number of 4-byte code units  | 
4778  |  |    * (char32_t).  | 
4779  |  |    * @return true if and only if the string is valid UTF-32.  | 
4780  |  |    */  | 
4781  |  |   simdutf_warn_unused virtual bool  | 
4782  |  |   validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;  | 
4783  |  | #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING  | 
4784  |  |  | 
4785  |  | #if SIMDUTF_FEATURE_UTF32  | 
4786  |  |   /**  | 
4787  |  |    * Validate the UTF-32 string and stop on error.  | 
4788  |  |    *  | 
4789  |  |    * Overridden by each implementation.  | 
4790  |  |    *  | 
4791  |  |    * This function is not BOM-aware.  | 
4792  |  |    *  | 
4793  |  |    * @param buf the UTF-32 string to validate.  | 
4794  |  |    * @param len the length of the string in number of 4-byte code units  | 
4795  |  |    * (char32_t).  | 
4796  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4797  |  |    * fields error and count) with an error code and either position of the error  | 
4798  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4799  |  |    * if successful.  | 
4800  |  |    */  | 
4801  |  |   simdutf_warn_unused virtual result  | 
4802  |  |   validate_utf32_with_errors(const char32_t *buf,  | 
4803  |  |                              size_t len) const noexcept = 0;  | 
4804  |  | #endif // SIMDUTF_FEATURE_UTF32  | 
4805  |  |  | 
4806  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
4807  |  |   /**  | 
4808  |  |    * Convert Latin1 string into UTF-8 string.  | 
4809  |  |    *  | 
4810  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4811  |  |    *  | 
4812  |  |    * @param input         the Latin1 string to convert  | 
4813  |  |    * @param length        the length of the string in bytes  | 
4814  |  |    * @param utf8_output  the pointer to buffer that can hold conversion result  | 
4815  |  |    * @return the number of written char; 0 if conversion is not possible  | 
4816  |  |    */  | 
4817  |  |   simdutf_warn_unused virtual size_t  | 
4818  |  |   convert_latin1_to_utf8(const char *input, size_t length,  | 
4819  |  |                          char *utf8_output) const noexcept = 0;  | 
4820  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
4821  |  |  | 
4822  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
4823  |  |   /**  | 
4824  |  |    * Convert possibly Latin1 string into UTF-16LE string.  | 
4825  |  |    *  | 
4826  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4827  |  |    *  | 
4828  |  |    * @param input         the Latin1  string to convert  | 
4829  |  |    * @param length        the length of the string in bytes  | 
4830  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4831  |  |    * @return the number of written char16_t; 0 if conversion is not possible  | 
4832  |  |    */  | 
4833  |  |   simdutf_warn_unused virtual size_t  | 
4834  |  |   convert_latin1_to_utf16le(const char *input, size_t length,  | 
4835  |  |                             char16_t *utf16_output) const noexcept = 0;  | 
4836  |  |  | 
4837  |  |   /**  | 
4838  |  |    * Convert Latin1 string into UTF-16BE string.  | 
4839  |  |    *  | 
4840  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4841  |  |    *  | 
4842  |  |    * @param input         the Latin1 string to convert  | 
4843  |  |    * @param length        the length of the string in bytes  | 
4844  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4845  |  |    * @return the number of written char16_t; 0 if conversion is not possible  | 
4846  |  |    */  | 
4847  |  |   simdutf_warn_unused virtual size_t  | 
4848  |  |   convert_latin1_to_utf16be(const char *input, size_t length,  | 
4849  |  |                             char16_t *utf16_output) const noexcept = 0;  | 
4850  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
4851  |  |  | 
4852  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
4853  |  |   /**  | 
4854  |  |    * Convert Latin1 string into UTF-32 string.  | 
4855  |  |    *  | 
4856  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4857  |  |    *  | 
4858  |  |    * @param input         the Latin1 string to convert  | 
4859  |  |    * @param length        the length of the string in bytes  | 
4860  |  |    * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
4861  |  |    * @return the number of written char32_t; 0 if conversion is not possible  | 
4862  |  |    */  | 
4863  |  |   simdutf_warn_unused virtual size_t  | 
4864  |  |   convert_latin1_to_utf32(const char *input, size_t length,  | 
4865  |  |                           char32_t *utf32_buffer) const noexcept = 0;  | 
4866  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
4867  |  |  | 
4868  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
4869  |  |   /**  | 
4870  |  |    * Convert possibly broken UTF-8 string into latin1 string.  | 
4871  |  |    *  | 
4872  |  |    * During the conversion also validation of the input string is done.  | 
4873  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4874  |  |    *  | 
4875  |  |    * @param input         the UTF-8 string to convert  | 
4876  |  |    * @param length        the length of the string in bytes  | 
4877  |  |    * @param latin1_output  the pointer to buffer that can hold conversion result  | 
4878  |  |    * @return the number of written char; 0 if the input was not valid UTF-8  | 
4879  |  |    * string or if it cannot be represented as Latin1  | 
4880  |  |    */  | 
4881  |  |   simdutf_warn_unused virtual size_t  | 
4882  |  |   convert_utf8_to_latin1(const char *input, size_t length,  | 
4883  |  |                          char *latin1_output) const noexcept = 0;  | 
4884  |  |  | 
4885  |  |   /**  | 
4886  |  |    * Convert possibly broken UTF-8 string into latin1 string with errors.  | 
4887  |  |    * If the string cannot be represented as Latin1, an error  | 
4888  |  |    * code is returned.  | 
4889  |  |    *  | 
4890  |  |    * During the conversion also validation of the input string is done.  | 
4891  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4892  |  |    *  | 
4893  |  |    * @param input         the UTF-8 string to convert  | 
4894  |  |    * @param length        the length of the string in bytes  | 
4895  |  |    * @param latin1_output  the pointer to buffer that can hold conversion result  | 
4896  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4897  |  |    * fields error and count) with an error code and either position of the error  | 
4898  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4899  |  |    * if successful.  | 
4900  |  |    */  | 
4901  |  |   simdutf_warn_unused virtual result  | 
4902  |  |   convert_utf8_to_latin1_with_errors(const char *input, size_t length,  | 
4903  |  |                                      char *latin1_output) const noexcept = 0;  | 
4904  |  |  | 
4905  |  |   /**  | 
4906  |  |    * Convert valid UTF-8 string into latin1 string.  | 
4907  |  |    *  | 
4908  |  |    * This function assumes that the input string is valid UTF-8 and that it can  | 
4909  |  |    * be represented as Latin1. If you violate this assumption, the result is  | 
4910  |  |    * implementation defined and may include system-dependent behavior such as  | 
4911  |  |    * crashes.  | 
4912  |  |    *  | 
4913  |  |    * This function is for expert users only and not part of our public API. Use  | 
4914  |  |    * convert_utf8_to_latin1 instead.  | 
4915  |  |    *  | 
4916  |  |    * This function is not BOM-aware.  | 
4917  |  |    *  | 
4918  |  |    * @param input         the UTF-8 string to convert  | 
4919  |  |    * @param length        the length of the string in bytes  | 
4920  |  |    * @param latin1_output  the pointer to buffer that can hold conversion result  | 
4921  |  |    * @return the number of written char; 0 if the input was not valid UTF-8  | 
4922  |  |    * string  | 
4923  |  |    */  | 
4924  |  |   simdutf_warn_unused virtual size_t  | 
4925  |  |   convert_valid_utf8_to_latin1(const char *input, size_t length,  | 
4926  |  |                                char *latin1_output) const noexcept = 0;  | 
4927  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
4928  |  |  | 
4929  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
4930  |  |   /**  | 
4931  |  |    * Convert possibly broken UTF-8 string into UTF-16LE string.  | 
4932  |  |    *  | 
4933  |  |    * During the conversion also validation of the input string is done.  | 
4934  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4935  |  |    *  | 
4936  |  |    * @param input         the UTF-8 string to convert  | 
4937  |  |    * @param length        the length of the string in bytes  | 
4938  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4939  |  |    * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
4940  |  |    * string  | 
4941  |  |    */  | 
4942  |  |   simdutf_warn_unused virtual size_t  | 
4943  |  |   convert_utf8_to_utf16le(const char *input, size_t length,  | 
4944  |  |                           char16_t *utf16_output) const noexcept = 0;  | 
4945  |  |  | 
4946  |  |   /**  | 
4947  |  |    * Convert possibly broken UTF-8 string into UTF-16BE string.  | 
4948  |  |    *  | 
4949  |  |    * During the conversion also validation of the input string is done.  | 
4950  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4951  |  |    *  | 
4952  |  |    * @param input         the UTF-8 string to convert  | 
4953  |  |    * @param length        the length of the string in bytes  | 
4954  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4955  |  |    * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
4956  |  |    * string  | 
4957  |  |    */  | 
4958  |  |   simdutf_warn_unused virtual size_t  | 
4959  |  |   convert_utf8_to_utf16be(const char *input, size_t length,  | 
4960  |  |                           char16_t *utf16_output) const noexcept = 0;  | 
4961  |  |  | 
4962  |  |   /**  | 
4963  |  |    * Convert possibly broken UTF-8 string into UTF-16LE string and stop on  | 
4964  |  |    * error.  | 
4965  |  |    *  | 
4966  |  |    * During the conversion also validation of the input string is done.  | 
4967  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4968  |  |    *  | 
4969  |  |    * @param input         the UTF-8 string to convert  | 
4970  |  |    * @param length        the length of the string in bytes  | 
4971  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4972  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4973  |  |    * fields error and count) with an error code and either position of the error  | 
4974  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4975  |  |    * if successful.  | 
4976  |  |    */  | 
4977  |  |   simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(  | 
4978  |  |       const char *input, size_t length,  | 
4979  |  |       char16_t *utf16_output) const noexcept = 0;  | 
4980  |  |  | 
4981  |  |   /**  | 
4982  |  |    * Convert possibly broken UTF-8 string into UTF-16BE string and stop on  | 
4983  |  |    * error.  | 
4984  |  |    *  | 
4985  |  |    * During the conversion also validation of the input string is done.  | 
4986  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
4987  |  |    *  | 
4988  |  |    * @param input         the UTF-8 string to convert  | 
4989  |  |    * @param length        the length of the string in bytes  | 
4990  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
4991  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
4992  |  |    * fields error and count) with an error code and either position of the error  | 
4993  |  |    * (in the input in code units) if any, or the number of code units validated  | 
4994  |  |    * if successful.  | 
4995  |  |    */  | 
4996  |  |   simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(  | 
4997  |  |       const char *input, size_t length,  | 
4998  |  |       char16_t *utf16_output) const noexcept = 0;  | 
4999  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5000  |  |  | 
5001  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5002  |  |   /**  | 
5003  |  |    * Convert possibly broken UTF-8 string into UTF-32 string.  | 
5004  |  |    *  | 
5005  |  |    * During the conversion also validation of the input string is done.  | 
5006  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5007  |  |    *  | 
5008  |  |    * @param input         the UTF-8 string to convert  | 
5009  |  |    * @param length        the length of the string in bytes  | 
5010  |  |    * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
5011  |  |    * @return the number of written char16_t; 0 if the input was not valid UTF-8  | 
5012  |  |    * string  | 
5013  |  |    */  | 
5014  |  |   simdutf_warn_unused virtual size_t  | 
5015  |  |   convert_utf8_to_utf32(const char *input, size_t length,  | 
5016  |  |                         char32_t *utf32_output) const noexcept = 0;  | 
5017  |  |  | 
5018  |  |   /**  | 
5019  |  |    * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.  | 
5020  |  |    *  | 
5021  |  |    * During the conversion also validation of the input string is done.  | 
5022  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5023  |  |    *  | 
5024  |  |    * @param input         the UTF-8 string to convert  | 
5025  |  |    * @param length        the length of the string in bytes  | 
5026  |  |    * @param utf32_buffer  the pointer to buffer that can hold conversion result  | 
5027  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5028  |  |    * fields error and count) with an error code and either position of the error  | 
5029  |  |    * (in the input in code units) if any, or the number of char32_t written if  | 
5030  |  |    * successful.  | 
5031  |  |    */  | 
5032  |  |   simdutf_warn_unused virtual result  | 
5033  |  |   convert_utf8_to_utf32_with_errors(const char *input, size_t length,  | 
5034  |  |                                     char32_t *utf32_output) const noexcept = 0;  | 
5035  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5036  |  |  | 
5037  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5038  |  |   /**  | 
5039  |  |    * Convert valid UTF-8 string into UTF-16LE string.  | 
5040  |  |    *  | 
5041  |  |    * This function assumes that the input string is valid UTF-8.  | 
5042  |  |    *  | 
5043  |  |    * @param input         the UTF-8 string to convert  | 
5044  |  |    * @param length        the length of the string in bytes  | 
5045  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
5046  |  |    * @return the number of written char16_t  | 
5047  |  |    */  | 
5048  |  |   simdutf_warn_unused virtual size_t  | 
5049  |  |   convert_valid_utf8_to_utf16le(const char *input, size_t length,  | 
5050  |  |                                 char16_t *utf16_buffer) const noexcept = 0;  | 
5051  |  |  | 
5052  |  |   /**  | 
5053  |  |    * Convert valid UTF-8 string into UTF-16BE string.  | 
5054  |  |    *  | 
5055  |  |    * This function assumes that the input string is valid UTF-8.  | 
5056  |  |    *  | 
5057  |  |    * @param input         the UTF-8 string to convert  | 
5058  |  |    * @param length        the length of the string in bytes  | 
5059  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
5060  |  |    * @return the number of written char16_t  | 
5061  |  |    */  | 
5062  |  |   simdutf_warn_unused virtual size_t  | 
5063  |  |   convert_valid_utf8_to_utf16be(const char *input, size_t length,  | 
5064  |  |                                 char16_t *utf16_buffer) const noexcept = 0;  | 
5065  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5066  |  |  | 
5067  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5068  |  |   /**  | 
5069  |  |    * Convert valid UTF-8 string into UTF-32 string.  | 
5070  |  |    *  | 
5071  |  |    * This function assumes that the input string is valid UTF-8.  | 
5072  |  |    *  | 
5073  |  |    * @param input         the UTF-8 string to convert  | 
5074  |  |    * @param length        the length of the string in bytes  | 
5075  |  |    * @param utf16_buffer  the pointer to buffer that can hold conversion result  | 
5076  |  |    * @return the number of written char32_t  | 
5077  |  |    */  | 
5078  |  |   simdutf_warn_unused virtual size_t  | 
5079  |  |   convert_valid_utf8_to_utf32(const char *input, size_t length,  | 
5080  |  |                               char32_t *utf32_buffer) const noexcept = 0;  | 
5081  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5082  |  |  | 
5083  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5084  |  |   /**  | 
5085  |  |    * Compute the number of 2-byte code units that this UTF-8 string would  | 
5086  |  |    * require in UTF-16LE format.  | 
5087  |  |    *  | 
5088  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5089  |  |    * UTF-8 strings but in such cases the result is implementation defined.  | 
5090  |  |    *  | 
5091  |  |    * @param input         the UTF-8 string to process  | 
5092  |  |    * @param length        the length of the string in bytes  | 
5093  |  |    * @return the number of char16_t code units required to encode the UTF-8  | 
5094  |  |    * string as UTF-16LE  | 
5095  |  |    */  | 
5096  |  |   simdutf_warn_unused virtual size_t  | 
5097  |  |   utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;  | 
5098  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5099  |  |  | 
5100  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5101  |  |   /**  | 
5102  |  |    * Compute the number of 4-byte code units that this UTF-8 string would  | 
5103  |  |    * require in UTF-32 format.  | 
5104  |  |    *  | 
5105  |  |    * This function is equivalent to count_utf8. It is acceptable to pass invalid  | 
5106  |  |    * UTF-8 strings but in such cases the result is implementation defined.  | 
5107  |  |    *  | 
5108  |  |    * This function does not validate the input.  | 
5109  |  |    *  | 
5110  |  |    * @param input         the UTF-8 string to process  | 
5111  |  |    * @param length        the length of the string in bytes  | 
5112  |  |    * @return the number of char32_t code units required to encode the UTF-8  | 
5113  |  |    * string as UTF-32  | 
5114  |  |    */  | 
5115  |  |   simdutf_warn_unused virtual size_t  | 
5116  |  |   utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;  | 
5117  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5118  |  |  | 
5119  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5120  |  |   /**  | 
5121  |  |    * Convert possibly broken UTF-16LE string into Latin1 string.  | 
5122  |  |    *  | 
5123  |  |    * During the conversion also validation of the input string is done.  | 
5124  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5125  |  |    *  | 
5126  |  |    * This function is not BOM-aware.  | 
5127  |  |    *  | 
5128  |  |    * @param input         the UTF-16LE string to convert  | 
5129  |  |    * @param length        the length of the string in 2-byte code units  | 
5130  |  |    * (char16_t)  | 
5131  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5132  |  |    * result  | 
5133  |  |    * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
5134  |  |    * string or if it cannot be represented as Latin1  | 
5135  |  |    */  | 
5136  |  |   simdutf_warn_unused virtual size_t  | 
5137  |  |   convert_utf16le_to_latin1(const char16_t *input, size_t length,  | 
5138  |  |                             char *latin1_buffer) const noexcept = 0;  | 
5139  |  |  | 
5140  |  |   /**  | 
5141  |  |    * Convert possibly broken UTF-16BE string into Latin1 string.  | 
5142  |  |    *  | 
5143  |  |    * During the conversion also validation of the input string is done.  | 
5144  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5145  |  |    *  | 
5146  |  |    * This function is not BOM-aware.  | 
5147  |  |    *  | 
5148  |  |    * @param input         the UTF-16BE string to convert  | 
5149  |  |    * @param length        the length of the string in 2-byte code units  | 
5150  |  |    * (char16_t)  | 
5151  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5152  |  |    * result  | 
5153  |  |    * @return number of written code units; 0 if input is not a valid UTF-16BE  | 
5154  |  |    * string or if it cannot be represented as Latin1  | 
5155  |  |    */  | 
5156  |  |   simdutf_warn_unused virtual size_t  | 
5157  |  |   convert_utf16be_to_latin1(const char16_t *input, size_t length,  | 
5158  |  |                             char *latin1_buffer) const noexcept = 0;  | 
5159  |  |  | 
5160  |  |   /**  | 
5161  |  |    * Convert possibly broken UTF-16LE string into Latin1 string.  | 
5162  |  |    * If the string cannot be represented as Latin1, an error  | 
5163  |  |    * is returned.  | 
5164  |  |    *  | 
5165  |  |    * During the conversion also validation of the input string is done.  | 
5166  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5167  |  |    * This function is not BOM-aware.  | 
5168  |  |    *  | 
5169  |  |    * @param input         the UTF-16LE string to convert  | 
5170  |  |    * @param length        the length of the string in 2-byte code units  | 
5171  |  |    * (char16_t)  | 
5172  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5173  |  |    * result  | 
5174  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5175  |  |    * fields error and count) with an error code and either position of the error  | 
5176  |  |    * (in the input in code units) if any, or the number of char written if  | 
5177  |  |    * successful.  | 
5178  |  |    */  | 
5179  |  |   simdutf_warn_unused virtual result  | 
5180  |  |   convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,  | 
5181  |  |                                         char *latin1_buffer) const noexcept = 0;  | 
5182  |  |  | 
5183  |  |   /**  | 
5184  |  |    * Convert possibly broken UTF-16BE string into Latin1 string.  | 
5185  |  |    * If the string cannot be represented as Latin1, an error  | 
5186  |  |    * is returned.  | 
5187  |  |    *  | 
5188  |  |    * During the conversion also validation of the input string is done.  | 
5189  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5190  |  |    * This function is not BOM-aware.  | 
5191  |  |    *  | 
5192  |  |    * @param input         the UTF-16BE string to convert  | 
5193  |  |    * @param length        the length of the string in 2-byte code units  | 
5194  |  |    * (char16_t)  | 
5195  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5196  |  |    * result  | 
5197  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5198  |  |    * fields error and count) with an error code and either position of the error  | 
5199  |  |    * (in the input in code units) if any, or the number of char written if  | 
5200  |  |    * successful.  | 
5201  |  |    */  | 
5202  |  |   simdutf_warn_unused virtual result  | 
5203  |  |   convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,  | 
5204  |  |                                         char *latin1_buffer) const noexcept = 0;  | 
5205  |  |  | 
5206  |  |   /**  | 
5207  |  |    * Convert valid UTF-16LE string into Latin1 string.  | 
5208  |  |    *  | 
5209  |  |    * This function assumes that the input string is valid UTF-L16LE and that it  | 
5210  |  |    * can be represented as Latin1. If you violate this assumption, the result is  | 
5211  |  |    * implementation defined and may include system-dependent behavior such as  | 
5212  |  |    * crashes.  | 
5213  |  |    *  | 
5214  |  |    * This function is for expert users only and not part of our public API. Use  | 
5215  |  |    * convert_utf16le_to_latin1 instead.  | 
5216  |  |    *  | 
5217  |  |    * This function is not BOM-aware.  | 
5218  |  |    *  | 
5219  |  |    * @param input         the UTF-16LE string to convert  | 
5220  |  |    * @param length        the length of the string in 2-byte code units  | 
5221  |  |    * (char16_t)  | 
5222  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5223  |  |    * result  | 
5224  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5225  |  |    */  | 
5226  |  |   simdutf_warn_unused virtual size_t  | 
5227  |  |   convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,  | 
5228  |  |                                   char *latin1_buffer) const noexcept = 0;  | 
5229  |  |  | 
5230  |  |   /**  | 
5231  |  |    * Convert valid UTF-16BE string into Latin1 string.  | 
5232  |  |    *  | 
5233  |  |    * This function assumes that the input string is valid UTF16-BE and that it  | 
5234  |  |    * can be represented as Latin1. If you violate this assumption, the result is  | 
5235  |  |    * implementation defined and may include system-dependent behavior such as  | 
5236  |  |    * crashes.  | 
5237  |  |    *  | 
5238  |  |    * This function is for expert users only and not part of our public API. Use  | 
5239  |  |    * convert_utf16be_to_latin1 instead.  | 
5240  |  |    *  | 
5241  |  |    * This function is not BOM-aware.  | 
5242  |  |    *  | 
5243  |  |    * @param input         the UTF-16BE string to convert  | 
5244  |  |    * @param length        the length of the string in 2-byte code units  | 
5245  |  |    * (char16_t)  | 
5246  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5247  |  |    * result  | 
5248  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5249  |  |    */  | 
5250  |  |   simdutf_warn_unused virtual size_t  | 
5251  |  |   convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,  | 
5252  |  |                                   char *latin1_buffer) const noexcept = 0;  | 
5253  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5254  |  |  | 
5255  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5256  |  |   /**  | 
5257  |  |    * Convert possibly broken UTF-16LE string into UTF-8 string.  | 
5258  |  |    *  | 
5259  |  |    * During the conversion also validation of the input string is done.  | 
5260  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5261  |  |    *  | 
5262  |  |    * This function is not BOM-aware.  | 
5263  |  |    *  | 
5264  |  |    * @param input         the UTF-16LE string to convert  | 
5265  |  |    * @param length        the length of the string in 2-byte code units  | 
5266  |  |    * (char16_t)  | 
5267  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5268  |  |    * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
5269  |  |    * string  | 
5270  |  |    */  | 
5271  |  |   simdutf_warn_unused virtual size_t  | 
5272  |  |   convert_utf16le_to_utf8(const char16_t *input, size_t length,  | 
5273  |  |                           char *utf8_buffer) const noexcept = 0;  | 
5274  |  |  | 
5275  |  |   /**  | 
5276  |  |    * Convert possibly broken UTF-16BE string into UTF-8 string.  | 
5277  |  |    *  | 
5278  |  |    * During the conversion also validation of the input string is done.  | 
5279  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5280  |  |    *  | 
5281  |  |    * This function is not BOM-aware.  | 
5282  |  |    *  | 
5283  |  |    * @param input         the UTF-16BE string to convert  | 
5284  |  |    * @param length        the length of the string in 2-byte code units  | 
5285  |  |    * (char16_t)  | 
5286  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5287  |  |    * @return number of written code units; 0 if input is not a valid UTF-16BE  | 
5288  |  |    * string  | 
5289  |  |    */  | 
5290  |  |   simdutf_warn_unused virtual size_t  | 
5291  |  |   convert_utf16be_to_utf8(const char16_t *input, size_t length,  | 
5292  |  |                           char *utf8_buffer) const noexcept = 0;  | 
5293  |  |  | 
5294  |  |   /**  | 
5295  |  |    * Convert possibly broken UTF-16LE string into UTF-8 string and stop on  | 
5296  |  |    * error.  | 
5297  |  |    *  | 
5298  |  |    * During the conversion also validation of the input string is done.  | 
5299  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5300  |  |    *  | 
5301  |  |    * This function is not BOM-aware.  | 
5302  |  |    *  | 
5303  |  |    * @param input         the UTF-16LE string to convert  | 
5304  |  |    * @param length        the length of the string in 2-byte code units  | 
5305  |  |    * (char16_t)  | 
5306  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5307  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5308  |  |    * fields error and count) with an error code and either position of the error  | 
5309  |  |    * (in the input in code units) if any, or the number of char written if  | 
5310  |  |    * successful.  | 
5311  |  |    */  | 
5312  |  |   simdutf_warn_unused virtual result  | 
5313  |  |   convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,  | 
5314  |  |                                       char *utf8_buffer) const noexcept = 0;  | 
5315  |  |  | 
5316  |  |   /**  | 
5317  |  |    * Convert possibly broken UTF-16BE string into UTF-8 string and stop on  | 
5318  |  |    * error.  | 
5319  |  |    *  | 
5320  |  |    * During the conversion also validation of the input string is done.  | 
5321  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5322  |  |    *  | 
5323  |  |    * This function is not BOM-aware.  | 
5324  |  |    *  | 
5325  |  |    * @param input         the UTF-16BE string to convert  | 
5326  |  |    * @param length        the length of the string in 2-byte code units  | 
5327  |  |    * (char16_t)  | 
5328  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5329  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5330  |  |    * fields error and count) with an error code and either position of the error  | 
5331  |  |    * (in the input in code units) if any, or the number of char written if  | 
5332  |  |    * successful.  | 
5333  |  |    */  | 
5334  |  |   simdutf_warn_unused virtual result  | 
5335  |  |   convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,  | 
5336  |  |                                       char *utf8_buffer) const noexcept = 0;  | 
5337  |  |  | 
5338  |  |   /**  | 
5339  |  |    * Convert valid UTF-16LE string into UTF-8 string.  | 
5340  |  |    *  | 
5341  |  |    * This function assumes that the input string is valid UTF-16LE.  | 
5342  |  |    *  | 
5343  |  |    * This function is not BOM-aware.  | 
5344  |  |    *  | 
5345  |  |    * @param input         the UTF-16LE string to convert  | 
5346  |  |    * @param length        the length of the string in 2-byte code units  | 
5347  |  |    * (char16_t)  | 
5348  |  |    * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
5349  |  |    * result  | 
5350  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5351  |  |    */  | 
5352  |  |   simdutf_warn_unused virtual size_t  | 
5353  |  |   convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,  | 
5354  |  |                                 char *utf8_buffer) const noexcept = 0;  | 
5355  |  |  | 
5356  |  |   /**  | 
5357  |  |    * Convert valid UTF-16BE string into UTF-8 string.  | 
5358  |  |    *  | 
5359  |  |    * This function assumes that the input string is valid UTF-16BE.  | 
5360  |  |    *  | 
5361  |  |    * This function is not BOM-aware.  | 
5362  |  |    *  | 
5363  |  |    * @param input         the UTF-16BE string to convert  | 
5364  |  |    * @param length        the length of the string in 2-byte code units  | 
5365  |  |    * (char16_t)  | 
5366  |  |    * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
5367  |  |    * result  | 
5368  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5369  |  |    */  | 
5370  |  |   simdutf_warn_unused virtual size_t  | 
5371  |  |   convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,  | 
5372  |  |                                 char *utf8_buffer) const noexcept = 0;  | 
5373  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5374  |  |  | 
5375  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5376  |  |   /**  | 
5377  |  |    * Convert possibly broken UTF-16LE string into UTF-32 string.  | 
5378  |  |    *  | 
5379  |  |    * During the conversion also validation of the input string is done.  | 
5380  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5381  |  |    *  | 
5382  |  |    * This function is not BOM-aware.  | 
5383  |  |    *  | 
5384  |  |    * @param input         the UTF-16LE string to convert  | 
5385  |  |    * @param length        the length of the string in 2-byte code units  | 
5386  |  |    * (char16_t)  | 
5387  |  |    * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
5388  |  |    * @return number of written code units; 0 if input is not a valid UTF-16LE  | 
5389  |  |    * string  | 
5390  |  |    */  | 
5391  |  |   simdutf_warn_unused virtual size_t  | 
5392  |  |   convert_utf16le_to_utf32(const char16_t *input, size_t length,  | 
5393  |  |                            char32_t *utf32_buffer) const noexcept = 0;  | 
5394  |  |  | 
5395  |  |   /**  | 
5396  |  |    * Convert possibly broken UTF-16BE string into UTF-32 string.  | 
5397  |  |    *  | 
5398  |  |    * During the conversion also validation of the input string is done.  | 
5399  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5400  |  |    *  | 
5401  |  |    * This function is not BOM-aware.  | 
5402  |  |    *  | 
5403  |  |    * @param input         the UTF-16BE string to convert  | 
5404  |  |    * @param length        the length of the string in 2-byte code units  | 
5405  |  |    * (char16_t)  | 
5406  |  |    * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
5407  |  |    * @return number of written code units; 0 if input is not a valid UTF-16BE  | 
5408  |  |    * string  | 
5409  |  |    */  | 
5410  |  |   simdutf_warn_unused virtual size_t  | 
5411  |  |   convert_utf16be_to_utf32(const char16_t *input, size_t length,  | 
5412  |  |                            char32_t *utf32_buffer) const noexcept = 0;  | 
5413  |  |  | 
5414  |  |   /**  | 
5415  |  |    * Convert possibly broken UTF-16LE string into UTF-32 string and stop on  | 
5416  |  |    * error.  | 
5417  |  |    *  | 
5418  |  |    * During the conversion also validation of the input string is done.  | 
5419  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5420  |  |    *  | 
5421  |  |    * This function is not BOM-aware.  | 
5422  |  |    *  | 
5423  |  |    * @param input         the UTF-16LE string to convert  | 
5424  |  |    * @param length        the length of the string in 2-byte code units  | 
5425  |  |    * (char16_t)  | 
5426  |  |    * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
5427  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5428  |  |    * fields error and count) with an error code and either position of the error  | 
5429  |  |    * (in the input in code units) if any, or the number of char32_t written if  | 
5430  |  |    * successful.  | 
5431  |  |    */  | 
5432  |  |   simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(  | 
5433  |  |       const char16_t *input, size_t length,  | 
5434  |  |       char32_t *utf32_buffer) const noexcept = 0;  | 
5435  |  |  | 
5436  |  |   /**  | 
5437  |  |    * Convert possibly broken UTF-16BE string into UTF-32 string and stop on  | 
5438  |  |    * error.  | 
5439  |  |    *  | 
5440  |  |    * During the conversion also validation of the input string is done.  | 
5441  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5442  |  |    *  | 
5443  |  |    * This function is not BOM-aware.  | 
5444  |  |    *  | 
5445  |  |    * @param input         the UTF-16BE string to convert  | 
5446  |  |    * @param length        the length of the string in 2-byte code units  | 
5447  |  |    * (char16_t)  | 
5448  |  |    * @param utf32_buffer   the pointer to buffer that can hold conversion result  | 
5449  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5450  |  |    * fields error and count) with an error code and either position of the error  | 
5451  |  |    * (in the input in code units) if any, or the number of char32_t written if  | 
5452  |  |    * successful.  | 
5453  |  |    */  | 
5454  |  |   simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(  | 
5455  |  |       const char16_t *input, size_t length,  | 
5456  |  |       char32_t *utf32_buffer) const noexcept = 0;  | 
5457  |  |  | 
5458  |  |   /**  | 
5459  |  |    * Convert valid UTF-16LE string into UTF-32 string.  | 
5460  |  |    *  | 
5461  |  |    * This function assumes that the input string is valid UTF-16LE.  | 
5462  |  |    *  | 
5463  |  |    * This function is not BOM-aware.  | 
5464  |  |    *  | 
5465  |  |    * @param input         the UTF-16LE string to convert  | 
5466  |  |    * @param length        the length of the string in 2-byte code units  | 
5467  |  |    * (char16_t)  | 
5468  |  |    * @param utf32_buffer   the pointer to a buffer that can hold the conversion  | 
5469  |  |    * result  | 
5470  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5471  |  |    */  | 
5472  |  |   simdutf_warn_unused virtual size_t  | 
5473  |  |   convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,  | 
5474  |  |                                  char32_t *utf32_buffer) const noexcept = 0;  | 
5475  |  |  | 
5476  |  |   /**  | 
5477  |  |    * Convert valid UTF-16LE string into UTF-32BE string.  | 
5478  |  |    *  | 
5479  |  |    * This function assumes that the input string is valid UTF-16BE.  | 
5480  |  |    *  | 
5481  |  |    * This function is not BOM-aware.  | 
5482  |  |    *  | 
5483  |  |    * @param input         the UTF-16BE string to convert  | 
5484  |  |    * @param length        the length of the string in 2-byte code units  | 
5485  |  |    * (char16_t)  | 
5486  |  |    * @param utf32_buffer   the pointer to a buffer that can hold the conversion  | 
5487  |  |    * result  | 
5488  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5489  |  |    */  | 
5490  |  |   simdutf_warn_unused virtual size_t  | 
5491  |  |   convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,  | 
5492  |  |                                  char32_t *utf32_buffer) const noexcept = 0;  | 
5493  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5494  |  |  | 
5495  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5496  |  |   /**  | 
5497  |  |    * Compute the number of bytes that this UTF-16LE string would require in  | 
5498  |  |    * UTF-8 format.  | 
5499  |  |    *  | 
5500  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5501  |  |    * UTF-16 strings but in such cases the result is implementation defined.  | 
5502  |  |    *  | 
5503  |  |    * This function is not BOM-aware.  | 
5504  |  |    *  | 
5505  |  |    * @param input         the UTF-16LE string to convert  | 
5506  |  |    * @param length        the length of the string in 2-byte code units  | 
5507  |  |    * (char16_t)  | 
5508  |  |    * @return the number of bytes required to encode the UTF-16LE string as UTF-8  | 
5509  |  |    */  | 
5510  |  |   simdutf_warn_unused virtual size_t  | 
5511  |  |   utf8_length_from_utf16le(const char16_t *input,  | 
5512  |  |                            size_t length) const noexcept = 0;  | 
5513  |  |  | 
5514  |  |   /**  | 
5515  |  |    * Compute the number of bytes that this UTF-16BE string would require in  | 
5516  |  |    * UTF-8 format.  | 
5517  |  |    *  | 
5518  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5519  |  |    * UTF-16 strings but in such cases the result is implementation defined.  | 
5520  |  |    *  | 
5521  |  |    * This function is not BOM-aware.  | 
5522  |  |    *  | 
5523  |  |    * @param input         the UTF-16BE string to convert  | 
5524  |  |    * @param length        the length of the string in 2-byte code units  | 
5525  |  |    * (char16_t)  | 
5526  |  |    * @return the number of bytes required to encode the UTF-16BE string as UTF-8  | 
5527  |  |    */  | 
5528  |  |   simdutf_warn_unused virtual size_t  | 
5529  |  |   utf8_length_from_utf16be(const char16_t *input,  | 
5530  |  |                            size_t length) const noexcept = 0;  | 
5531  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16  | 
5532  |  |  | 
5533  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5534  |  |   /**  | 
5535  |  |    * Convert possibly broken UTF-32 string into Latin1 string.  | 
5536  |  |    *  | 
5537  |  |    * During the conversion also validation of the input string is done.  | 
5538  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5539  |  |    *  | 
5540  |  |    * This function is not BOM-aware.  | 
5541  |  |    *  | 
5542  |  |    * @param input         the UTF-32 string to convert  | 
5543  |  |    * @param length        the length of the string in 4-byte code units  | 
5544  |  |    * (char32_t)  | 
5545  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5546  |  |    * result  | 
5547  |  |    * @return number of written code units; 0 if input is not a valid UTF-32  | 
5548  |  |    * string  | 
5549  |  |    */  | 
5550  |  |   simdutf_warn_unused virtual size_t  | 
5551  |  |   convert_utf32_to_latin1(const char32_t *input, size_t length,  | 
5552  |  |                           char *latin1_buffer) const noexcept = 0;  | 
5553  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5554  |  |  | 
5555  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5556  |  |   /**  | 
5557  |  |    * Convert possibly broken UTF-32 string into Latin1 string and stop on error.  | 
5558  |  |    * If the string cannot be represented as Latin1, an error is returned.  | 
5559  |  |    *  | 
5560  |  |    * During the conversion also validation of the input string is done.  | 
5561  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5562  |  |    *  | 
5563  |  |    * This function is not BOM-aware.  | 
5564  |  |    *  | 
5565  |  |    * @param input         the UTF-32 string to convert  | 
5566  |  |    * @param length        the length of the string in 4-byte code units  | 
5567  |  |    * (char32_t)  | 
5568  |  |    * @param latin1_buffer   the pointer to buffer that can hold conversion  | 
5569  |  |    * result  | 
5570  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5571  |  |    * fields error and count) with an error code and either position of the error  | 
5572  |  |    * (in the input in code units) if any, or the number of char written if  | 
5573  |  |    * successful.  | 
5574  |  |    */  | 
5575  |  |   simdutf_warn_unused virtual result  | 
5576  |  |   convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,  | 
5577  |  |                                       char *latin1_buffer) const noexcept = 0;  | 
5578  |  |  | 
5579  |  |   /**  | 
5580  |  |    * Convert valid UTF-32 string into Latin1 string.  | 
5581  |  |    *  | 
5582  |  |    * This function assumes that the input string is valid UTF-32 and can be  | 
5583  |  |    * represented as Latin1. If you violate this assumption, the result is  | 
5584  |  |    * implementation defined and may include system-dependent behavior such as  | 
5585  |  |    * crashes.  | 
5586  |  |    *  | 
5587  |  |    * This function is for expert users only and not part of our public API. Use  | 
5588  |  |    * convert_utf32_to_latin1 instead.  | 
5589  |  |    *  | 
5590  |  |    * This function is not BOM-aware.  | 
5591  |  |    *  | 
5592  |  |    * @param input         the UTF-32 string to convert  | 
5593  |  |    * @param length        the length of the string in 4-byte code units  | 
5594  |  |    * (char32_t)  | 
5595  |  |    * @param latin1_buffer   the pointer to a buffer that can hold the conversion  | 
5596  |  |    * result  | 
5597  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5598  |  |    */  | 
5599  |  |   simdutf_warn_unused virtual size_t  | 
5600  |  |   convert_valid_utf32_to_latin1(const char32_t *input, size_t length,  | 
5601  |  |                                 char *latin1_buffer) const noexcept = 0;  | 
5602  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5603  |  |  | 
5604  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5605  |  |   /**  | 
5606  |  |    * Convert possibly broken UTF-32 string into UTF-8 string.  | 
5607  |  |    *  | 
5608  |  |    * During the conversion also validation of the input string is done.  | 
5609  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5610  |  |    *  | 
5611  |  |    * This function is not BOM-aware.  | 
5612  |  |    *  | 
5613  |  |    * @param input         the UTF-32 string to convert  | 
5614  |  |    * @param length        the length of the string in 4-byte code units  | 
5615  |  |    * (char32_t)  | 
5616  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5617  |  |    * @return number of written code units; 0 if input is not a valid UTF-32  | 
5618  |  |    * string  | 
5619  |  |    */  | 
5620  |  |   simdutf_warn_unused virtual size_t  | 
5621  |  |   convert_utf32_to_utf8(const char32_t *input, size_t length,  | 
5622  |  |                         char *utf8_buffer) const noexcept = 0;  | 
5623  |  |  | 
5624  |  |   /**  | 
5625  |  |    * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.  | 
5626  |  |    *  | 
5627  |  |    * During the conversion also validation of the input string is done.  | 
5628  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5629  |  |    *  | 
5630  |  |    * This function is not BOM-aware.  | 
5631  |  |    *  | 
5632  |  |    * @param input         the UTF-32 string to convert  | 
5633  |  |    * @param length        the length of the string in 4-byte code units  | 
5634  |  |    * (char32_t)  | 
5635  |  |    * @param utf8_buffer   the pointer to buffer that can hold conversion result  | 
5636  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5637  |  |    * fields error and count) with an error code and either position of the error  | 
5638  |  |    * (in the input in code units) if any, or the number of char written if  | 
5639  |  |    * successful.  | 
5640  |  |    */  | 
5641  |  |   simdutf_warn_unused virtual result  | 
5642  |  |   convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,  | 
5643  |  |                                     char *utf8_buffer) const noexcept = 0;  | 
5644  |  |  | 
5645  |  |   /**  | 
5646  |  |    * Convert valid UTF-32 string into UTF-8 string.  | 
5647  |  |    *  | 
5648  |  |    * This function assumes that the input string is valid UTF-32.  | 
5649  |  |    *  | 
5650  |  |    * This function is not BOM-aware.  | 
5651  |  |    *  | 
5652  |  |    * @param input         the UTF-32 string to convert  | 
5653  |  |    * @param length        the length of the string in 4-byte code units  | 
5654  |  |    * (char32_t)  | 
5655  |  |    * @param utf8_buffer   the pointer to a buffer that can hold the conversion  | 
5656  |  |    * result  | 
5657  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5658  |  |    */  | 
5659  |  |   simdutf_warn_unused virtual size_t  | 
5660  |  |   convert_valid_utf32_to_utf8(const char32_t *input, size_t length,  | 
5661  |  |                               char *utf8_buffer) const noexcept = 0;  | 
5662  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5663  |  |  | 
5664  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5665  |  |   /**  | 
5666  |  |    * Return the number of bytes that this UTF-16 string would require in Latin1  | 
5667  |  |    * format.  | 
5668  |  |    *  | 
5669  |  |    *  | 
5670  |  |    * @param input         the UTF-16 string to convert  | 
5671  |  |    * @param length        the length of the string in 2-byte code units  | 
5672  |  |    * (char16_t)  | 
5673  |  |    * @return the number of bytes required to encode the UTF-16 string as Latin1  | 
5674  |  |    */  | 
5675  |  |   simdutf_warn_unused virtual size_t  | 
5676  | 0  |   utf16_length_from_latin1(size_t length) const noexcept { | 
5677  | 0  |     return length;  | 
5678  | 0  |   }  | 
5679  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5680  |  |  | 
5681  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5682  |  |   /**  | 
5683  |  |    * Convert possibly broken UTF-32 string into UTF-16LE string.  | 
5684  |  |    *  | 
5685  |  |    * During the conversion also validation of the input string is done.  | 
5686  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5687  |  |    *  | 
5688  |  |    * This function is not BOM-aware.  | 
5689  |  |    *  | 
5690  |  |    * @param input         the UTF-32 string to convert  | 
5691  |  |    * @param length        the length of the string in 4-byte code units  | 
5692  |  |    * (char32_t)  | 
5693  |  |    * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
5694  |  |    * @return number of written code units; 0 if input is not a valid UTF-32  | 
5695  |  |    * string  | 
5696  |  |    */  | 
5697  |  |   simdutf_warn_unused virtual size_t  | 
5698  |  |   convert_utf32_to_utf16le(const char32_t *input, size_t length,  | 
5699  |  |                            char16_t *utf16_buffer) const noexcept = 0;  | 
5700  |  |  | 
5701  |  |   /**  | 
5702  |  |    * Convert possibly broken UTF-32 string into UTF-16BE string.  | 
5703  |  |    *  | 
5704  |  |    * During the conversion also validation of the input string is done.  | 
5705  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5706  |  |    *  | 
5707  |  |    * This function is not BOM-aware.  | 
5708  |  |    *  | 
5709  |  |    * @param input         the UTF-32 string to convert  | 
5710  |  |    * @param length        the length of the string in 4-byte code units  | 
5711  |  |    * (char32_t)  | 
5712  |  |    * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
5713  |  |    * @return number of written code units; 0 if input is not a valid UTF-32  | 
5714  |  |    * string  | 
5715  |  |    */  | 
5716  |  |   simdutf_warn_unused virtual size_t  | 
5717  |  |   convert_utf32_to_utf16be(const char32_t *input, size_t length,  | 
5718  |  |                            char16_t *utf16_buffer) const noexcept = 0;  | 
5719  |  |  | 
5720  |  |   /**  | 
5721  |  |    * Convert possibly broken UTF-32 string into UTF-16LE string and stop on  | 
5722  |  |    * error.  | 
5723  |  |    *  | 
5724  |  |    * During the conversion also validation of the input string is done.  | 
5725  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5726  |  |    *  | 
5727  |  |    * This function is not BOM-aware.  | 
5728  |  |    *  | 
5729  |  |    * @param input         the UTF-32 string to convert  | 
5730  |  |    * @param length        the length of the string in 4-byte code units  | 
5731  |  |    * (char32_t)  | 
5732  |  |    * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
5733  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5734  |  |    * fields error and count) with an error code and either position of the error  | 
5735  |  |    * (in the input in code units) if any, or the number of char16_t written if  | 
5736  |  |    * successful.  | 
5737  |  |    */  | 
5738  |  |   simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(  | 
5739  |  |       const char32_t *input, size_t length,  | 
5740  |  |       char16_t *utf16_buffer) const noexcept = 0;  | 
5741  |  |  | 
5742  |  |   /**  | 
5743  |  |    * Convert possibly broken UTF-32 string into UTF-16BE string and stop on  | 
5744  |  |    * error.  | 
5745  |  |    *  | 
5746  |  |    * During the conversion also validation of the input string is done.  | 
5747  |  |    * This function is suitable to work with inputs from untrusted sources.  | 
5748  |  |    *  | 
5749  |  |    * This function is not BOM-aware.  | 
5750  |  |    *  | 
5751  |  |    * @param input         the UTF-32 string to convert  | 
5752  |  |    * @param length        the length of the string in 4-byte code units  | 
5753  |  |    * (char32_t)  | 
5754  |  |    * @param utf16_buffer   the pointer to buffer that can hold conversion result  | 
5755  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
5756  |  |    * fields error and count) with an error code and either position of the error  | 
5757  |  |    * (in the input in code units) if any, or the number of char16_t written if  | 
5758  |  |    * successful.  | 
5759  |  |    */  | 
5760  |  |   simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(  | 
5761  |  |       const char32_t *input, size_t length,  | 
5762  |  |       char16_t *utf16_buffer) const noexcept = 0;  | 
5763  |  |  | 
5764  |  |   /**  | 
5765  |  |    * Convert valid UTF-32 string into UTF-16LE string.  | 
5766  |  |    *  | 
5767  |  |    * This function assumes that the input string is valid UTF-32.  | 
5768  |  |    *  | 
5769  |  |    * This function is not BOM-aware.  | 
5770  |  |    *  | 
5771  |  |    * @param input         the UTF-32 string to convert  | 
5772  |  |    * @param length        the length of the string in 4-byte code units  | 
5773  |  |    * (char32_t)  | 
5774  |  |    * @param utf16_buffer   the pointer to a buffer that can hold the conversion  | 
5775  |  |    * result  | 
5776  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5777  |  |    */  | 
5778  |  |   simdutf_warn_unused virtual size_t  | 
5779  |  |   convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,  | 
5780  |  |                                  char16_t *utf16_buffer) const noexcept = 0;  | 
5781  |  |  | 
5782  |  |   /**  | 
5783  |  |    * Convert valid UTF-32 string into UTF-16BE string.  | 
5784  |  |    *  | 
5785  |  |    * This function assumes that the input string is valid UTF-32.  | 
5786  |  |    *  | 
5787  |  |    * This function is not BOM-aware.  | 
5788  |  |    *  | 
5789  |  |    * @param input         the UTF-32 string to convert  | 
5790  |  |    * @param length        the length of the string in 4-byte code units  | 
5791  |  |    * (char32_t)  | 
5792  |  |    * @param utf16_buffer   the pointer to a buffer that can hold the conversion  | 
5793  |  |    * result  | 
5794  |  |    * @return number of written code units; 0 if conversion is not possible  | 
5795  |  |    */  | 
5796  |  |   simdutf_warn_unused virtual size_t  | 
5797  |  |   convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,  | 
5798  |  |                                  char16_t *utf16_buffer) const noexcept = 0;  | 
5799  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5800  |  |  | 
5801  |  | #if SIMDUTF_FEATURE_UTF16  | 
5802  |  |   /**  | 
5803  |  |    * Change the endianness of the input. Can be used to go from UTF-16LE to  | 
5804  |  |    * UTF-16BE or from UTF-16BE to UTF-16LE.  | 
5805  |  |    *  | 
5806  |  |    * This function does not validate the input.  | 
5807  |  |    *  | 
5808  |  |    * This function is not BOM-aware.  | 
5809  |  |    *  | 
5810  |  |    * @param input         the UTF-16 string to process  | 
5811  |  |    * @param length        the length of the string in 2-byte code units  | 
5812  |  |    * (char16_t)  | 
5813  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
5814  |  |    * result  | 
5815  |  |    */  | 
5816  |  |   virtual void change_endianness_utf16(const char16_t *input, size_t length,  | 
5817  |  |                                        char16_t *output) const noexcept = 0;  | 
5818  |  | #endif // SIMDUTF_FEATURE_UTF16  | 
5819  |  |  | 
5820  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
5821  |  |   /**  | 
5822  |  |    * Return the number of bytes that this Latin1 string would require in UTF-8  | 
5823  |  |    * format.  | 
5824  |  |    *  | 
5825  |  |    * @param input         the Latin1 string to convert  | 
5826  |  |    * @param length        the length of the string bytes  | 
5827  |  |    * @return the number of bytes required to encode the Latin1 string as UTF-8  | 
5828  |  |    */  | 
5829  |  |   simdutf_warn_unused virtual size_t  | 
5830  |  |   utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;  | 
5831  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
5832  |  |  | 
5833  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5834  |  |   /**  | 
5835  |  |    * Compute the number of bytes that this UTF-32 string would require in UTF-8  | 
5836  |  |    * format.  | 
5837  |  |    *  | 
5838  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5839  |  |    * UTF-32 strings but in such cases the result is implementation defined.  | 
5840  |  |    *  | 
5841  |  |    * @param input         the UTF-32 string to convert  | 
5842  |  |    * @param length        the length of the string in 4-byte code units  | 
5843  |  |    * (char32_t)  | 
5844  |  |    * @return the number of bytes required to encode the UTF-32 string as UTF-8  | 
5845  |  |    */  | 
5846  |  |   simdutf_warn_unused virtual size_t  | 
5847  |  |   utf8_length_from_utf32(const char32_t *input,  | 
5848  |  |                          size_t length) const noexcept = 0;  | 
5849  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32  | 
5850  |  |  | 
5851  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5852  |  |   /**  | 
5853  |  |    * Compute the number of bytes that this UTF-32 string would require in Latin1  | 
5854  |  |    * format.  | 
5855  |  |    *  | 
5856  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5857  |  |    * UTF-32 strings but in such cases the result is implementation defined.  | 
5858  |  |    *  | 
5859  |  |    * @param length        the length of the string in 4-byte code units  | 
5860  |  |    * (char32_t)  | 
5861  |  |    * @return the number of bytes required to encode the UTF-32 string as Latin1  | 
5862  |  |    */  | 
5863  |  |   simdutf_warn_unused virtual size_t  | 
5864  | 0  |   latin1_length_from_utf32(size_t length) const noexcept { | 
5865  | 0  |     return length;  | 
5866  | 0  |   }  | 
5867  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5868  |  |  | 
5869  |  | #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
5870  |  |   /**  | 
5871  |  |    * Compute the number of bytes that this UTF-8 string would require in Latin1  | 
5872  |  |    * format.  | 
5873  |  |    *  | 
5874  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5875  |  |    * UTF-8 strings but in such cases the result is implementation defined.  | 
5876  |  |    *  | 
5877  |  |    * @param input         the UTF-8 string to convert  | 
5878  |  |    * @param length        the length of the string in byte  | 
5879  |  |    * @return the number of bytes required to encode the UTF-8 string as Latin1  | 
5880  |  |    */  | 
5881  |  |   simdutf_warn_unused virtual size_t  | 
5882  |  |   latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;  | 
5883  |  | #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1  | 
5884  |  |  | 
5885  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5886  |  |   /**  | 
5887  |  |    * Compute the number of bytes that this UTF-16LE/BE string would require in  | 
5888  |  |    * Latin1 format.  | 
5889  |  |    *  | 
5890  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5891  |  |    * UTF-16 strings but in such cases the result is implementation defined.  | 
5892  |  |    *  | 
5893  |  |    * This function is not BOM-aware.  | 
5894  |  |    *  | 
5895  |  |    * @param input         the UTF-16LE string to convert  | 
5896  |  |    * @param length        the length of the string in 2-byte code units  | 
5897  |  |    * (char16_t)  | 
5898  |  |    * @return the number of bytes required to encode the UTF-16LE string as  | 
5899  |  |    * Latin1  | 
5900  |  |    */  | 
5901  |  |   simdutf_warn_unused virtual size_t  | 
5902  | 0  |   latin1_length_from_utf16(size_t length) const noexcept { | 
5903  | 0  |     return length;  | 
5904  | 0  |   }  | 
5905  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1  | 
5906  |  |  | 
5907  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5908  |  |   /**  | 
5909  |  |    * Compute the number of two-byte code units that this UTF-32 string would  | 
5910  |  |    * require in UTF-16 format.  | 
5911  |  |    *  | 
5912  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5913  |  |    * UTF-32 strings but in such cases the result is implementation defined.  | 
5914  |  |    *  | 
5915  |  |    * @param input         the UTF-32 string to convert  | 
5916  |  |    * @param length        the length of the string in 4-byte code units  | 
5917  |  |    * (char32_t)  | 
5918  |  |    * @return the number of bytes required to encode the UTF-32 string as UTF-16  | 
5919  |  |    */  | 
5920  |  |   simdutf_warn_unused virtual size_t  | 
5921  |  |   utf16_length_from_utf32(const char32_t *input,  | 
5922  |  |                           size_t length) const noexcept = 0;  | 
5923  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5924  |  |  | 
5925  |  | #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5926  |  |   /**  | 
5927  |  |    * Return the number of bytes that this UTF-32 string would require in Latin1  | 
5928  |  |    * format.  | 
5929  |  |    *  | 
5930  |  |    * @param length        the length of the string in 4-byte code units  | 
5931  |  |    * (char32_t)  | 
5932  |  |    * @return the number of bytes required to encode the UTF-32 string as Latin1  | 
5933  |  |    */  | 
5934  |  |   simdutf_warn_unused virtual size_t  | 
5935  | 0  |   utf32_length_from_latin1(size_t length) const noexcept { | 
5936  | 0  |     return length;  | 
5937  | 0  |   }  | 
5938  |  | #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1  | 
5939  |  |  | 
5940  |  | #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5941  |  |   /**  | 
5942  |  |    * Compute the number of bytes that this UTF-16LE string would require in  | 
5943  |  |    * UTF-32 format.  | 
5944  |  |    *  | 
5945  |  |    * This function is equivalent to count_utf16le.  | 
5946  |  |    *  | 
5947  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5948  |  |    * UTF-16 strings but in such cases the result is implementation defined.  | 
5949  |  |    *  | 
5950  |  |    * This function is not BOM-aware.  | 
5951  |  |    *  | 
5952  |  |    * @param input         the UTF-16LE string to convert  | 
5953  |  |    * @param length        the length of the string in 2-byte code units  | 
5954  |  |    * (char16_t)  | 
5955  |  |    * @return the number of bytes required to encode the UTF-16LE string as  | 
5956  |  |    * UTF-32  | 
5957  |  |    */  | 
5958  |  |   simdutf_warn_unused virtual size_t  | 
5959  |  |   utf32_length_from_utf16le(const char16_t *input,  | 
5960  |  |                             size_t length) const noexcept = 0;  | 
5961  |  |  | 
5962  |  |   /**  | 
5963  |  |    * Compute the number of bytes that this UTF-16BE string would require in  | 
5964  |  |    * UTF-32 format.  | 
5965  |  |    *  | 
5966  |  |    * This function is equivalent to count_utf16be.  | 
5967  |  |    *  | 
5968  |  |    * This function does not validate the input. It is acceptable to pass invalid  | 
5969  |  |    * UTF-16 strings but in such cases the result is implementation defined.  | 
5970  |  |    *  | 
5971  |  |    * This function is not BOM-aware.  | 
5972  |  |    *  | 
5973  |  |    * @param input         the UTF-16BE string to convert  | 
5974  |  |    * @param length        the length of the string in 2-byte code units  | 
5975  |  |    * (char16_t)  | 
5976  |  |    * @return the number of bytes required to encode the UTF-16BE string as  | 
5977  |  |    * UTF-32  | 
5978  |  |    */  | 
5979  |  |   simdutf_warn_unused virtual size_t  | 
5980  |  |   utf32_length_from_utf16be(const char16_t *input,  | 
5981  |  |                             size_t length) const noexcept = 0;  | 
5982  |  | #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32  | 
5983  |  |  | 
5984  |  | #if SIMDUTF_FEATURE_UTF16  | 
5985  |  |   /**  | 
5986  |  |    * Count the number of code points (characters) in the string assuming that  | 
5987  |  |    * it is valid.  | 
5988  |  |    *  | 
5989  |  |    * This function assumes that the input string is valid UTF-16LE.  | 
5990  |  |    * It is acceptable to pass invalid UTF-16 strings but in such cases  | 
5991  |  |    * the result is implementation defined.  | 
5992  |  |    *  | 
5993  |  |    * This function is not BOM-aware.  | 
5994  |  |    *  | 
5995  |  |    * @param input         the UTF-16LE string to process  | 
5996  |  |    * @param length        the length of the string in 2-byte code units  | 
5997  |  |    * (char16_t)  | 
5998  |  |    * @return number of code points  | 
5999  |  |    */  | 
6000  |  |   simdutf_warn_unused virtual size_t  | 
6001  |  |   count_utf16le(const char16_t *input, size_t length) const noexcept = 0;  | 
6002  |  |  | 
6003  |  |   /**  | 
6004  |  |    * Count the number of code points (characters) in the string assuming that  | 
6005  |  |    * it is valid.  | 
6006  |  |    *  | 
6007  |  |    * This function assumes that the input string is valid UTF-16BE.  | 
6008  |  |    * It is acceptable to pass invalid UTF-16 strings but in such cases  | 
6009  |  |    * the result is implementation defined.  | 
6010  |  |    *  | 
6011  |  |    * This function is not BOM-aware.  | 
6012  |  |    *  | 
6013  |  |    * @param input         the UTF-16BE string to process  | 
6014  |  |    * @param length        the length of the string in 2-byte code units  | 
6015  |  |    * (char16_t)  | 
6016  |  |    * @return number of code points  | 
6017  |  |    */  | 
6018  |  |   simdutf_warn_unused virtual size_t  | 
6019  |  |   count_utf16be(const char16_t *input, size_t length) const noexcept = 0;  | 
6020  |  | #endif // SIMDUTF_FEATURE_UTF16  | 
6021  |  |  | 
6022  |  | #if SIMDUTF_FEATURE_UTF8  | 
6023  |  |   /**  | 
6024  |  |    * Count the number of code points (characters) in the string assuming that  | 
6025  |  |    * it is valid.  | 
6026  |  |    *  | 
6027  |  |    * This function assumes that the input string is valid UTF-8.  | 
6028  |  |    * It is acceptable to pass invalid UTF-8 strings but in such cases  | 
6029  |  |    * the result is implementation defined.  | 
6030  |  |    *  | 
6031  |  |    * @param input         the UTF-8 string to process  | 
6032  |  |    * @param length        the length of the string in bytes  | 
6033  |  |    * @return number of code points  | 
6034  |  |    */  | 
6035  |  |   simdutf_warn_unused virtual size_t  | 
6036  |  |   count_utf8(const char *input, size_t length) const noexcept = 0;  | 
6037  |  | #endif // SIMDUTF_FEATURE_UTF8  | 
6038  |  |  | 
6039  |  | #if SIMDUTF_FEATURE_BASE64  | 
6040  |  |   /**  | 
6041  |  |    * Provide the maximal binary length in bytes given the base64 input.  | 
6042  |  |    * In general, if the input contains ASCII spaces, the result will be less  | 
6043  |  |    * than the maximum length. It is acceptable to pass invalid base64 strings  | 
6044  |  |    * but in such cases the result is implementation defined.  | 
6045  |  |    *  | 
6046  |  |    * @param input         the base64 input to process  | 
6047  |  |    * @param length        the length of the base64 input in bytes  | 
6048  |  |    * @return maximal number of binary bytes  | 
6049  |  |    */  | 
6050  |  |   simdutf_warn_unused size_t maximal_binary_length_from_base64(  | 
6051  |  |       const char *input, size_t length) const noexcept;  | 
6052  |  |  | 
6053  |  |   /**  | 
6054  |  |    * Provide the maximal binary length in bytes given the base64 input.  | 
6055  |  |    * In general, if the input contains ASCII spaces, the result will be less  | 
6056  |  |    * than the maximum length. It is acceptable to pass invalid base64 strings  | 
6057  |  |    * but in such cases the result is implementation defined.  | 
6058  |  |    *  | 
6059  |  |    * @param input         the base64 input to process, in ASCII stored as 16-bit  | 
6060  |  |    * units  | 
6061  |  |    * @param length        the length of the base64 input in 16-bit units  | 
6062  |  |    * @return maximal number of binary bytes  | 
6063  |  |    */  | 
6064  |  |   simdutf_warn_unused size_t maximal_binary_length_from_base64(  | 
6065  |  |       const char16_t *input, size_t length) const noexcept;  | 
6066  |  |  | 
6067  |  |   /**  | 
6068  |  |    * Convert a base64 input to a binary output.  | 
6069  |  |    *  | 
6070  |  |    * This function follows the WHATWG forgiving-base64 format, which means that  | 
6071  |  |    * it will ignore any ASCII spaces in the input. You may provide a padded  | 
6072  |  |    * input (with one or two equal signs at the end) or an unpadded input  | 
6073  |  |    * (without any equal signs at the end).  | 
6074  |  |    *  | 
6075  |  |    * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
6076  |  |    *  | 
6077  |  |    * This function will fail in case of invalid input. When last_chunk_options =  | 
6078  |  |    * loose, there are two possible reasons for failure: the input contains a  | 
6079  |  |    * number of base64 characters that when divided by 4, leaves a single  | 
6080  |  |    * remainder character (BASE64_INPUT_REMAINDER), or the input contains a  | 
6081  |  |    * character that is not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
6082  |  |    *  | 
6083  |  |    * You should call this function with a buffer that is at least  | 
6084  |  |    * maximal_binary_length_from_base64(input, length) bytes long. If you fail to  | 
6085  |  |    * provide that much space, the function may cause a buffer overflow.  | 
6086  |  |    *  | 
6087  |  |    * @param input         the base64 string to process  | 
6088  |  |    * @param length        the length of the string in bytes  | 
6089  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
6090  |  |    * result (should be at least maximal_binary_length_from_base64(input, length)  | 
6091  |  |    * bytes long).  | 
6092  |  |    * @param options       the base64 options to use, can be base64_default or  | 
6093  |  |    * base64_url, is base64_default by default.  | 
6094  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
6095  |  |    * fields error and count) with an error code and either position of the error  | 
6096  |  |    * (in the input in bytes) if any, or the number of bytes written if  | 
6097  |  |    * successful.  | 
6098  |  |    */  | 
6099  |  |   simdutf_warn_unused virtual result  | 
6100  |  |   base64_to_binary(const char *input, size_t length, char *output,  | 
6101  |  |                    base64_options options = base64_default,  | 
6102  |  |                    last_chunk_handling_options last_chunk_options =  | 
6103  |  |                        last_chunk_handling_options::loose) const noexcept = 0;  | 
6104  |  |  | 
6105  |  |   /**  | 
6106  |  |    * Convert a base64 input to a binary output while returning more details  | 
6107  |  |    * than base64_to_binary.  | 
6108  |  |    *  | 
6109  |  |    * This function follows the WHATWG forgiving-base64 format, which means that  | 
6110  |  |    * it will ignore any ASCII spaces in the input. You may provide a padded  | 
6111  |  |    * input (with one or two equal signs at the end) or an unpadded input  | 
6112  |  |    * (without any equal signs at the end).  | 
6113  |  |    *  | 
6114  |  |    * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
6115  |  |    *  | 
6116  |  |    * This function will fail in case of invalid input. When last_chunk_options =  | 
6117  |  |    * loose, there are two possible reasons for failure: the input contains a  | 
6118  |  |    * number of base64 characters that when divided by 4, leaves a single  | 
6119  |  |    * remainder character (BASE64_INPUT_REMAINDER), or the input contains a  | 
6120  |  |    * character that is not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
6121  |  |    *  | 
6122  |  |    * You should call this function with a buffer that is at least  | 
6123  |  |    * maximal_binary_length_from_base64(input, length) bytes long. If you fail to  | 
6124  |  |    * provide that much space, the function may cause a buffer overflow.  | 
6125  |  |    *  | 
6126  |  |    * @param input         the base64 string to process  | 
6127  |  |    * @param length        the length of the string in bytes  | 
6128  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
6129  |  |    * result (should be at least maximal_binary_length_from_base64(input, length)  | 
6130  |  |    * bytes long).  | 
6131  |  |    * @param options       the base64 options to use, can be base64_default or  | 
6132  |  |    * base64_url, is base64_default by default.  | 
6133  |  |    * @return a full_result pair struct (of type simdutf::result containing the  | 
6134  |  |    * three fields error, input_count and output_count).  | 
6135  |  |    */  | 
6136  |  |   simdutf_warn_unused virtual full_result base64_to_binary_details(  | 
6137  |  |       const char *input, size_t length, char *output,  | 
6138  |  |       base64_options options = base64_default,  | 
6139  |  |       last_chunk_handling_options last_chunk_options =  | 
6140  |  |           last_chunk_handling_options::loose) const noexcept = 0;  | 
6141  |  |   /**  | 
6142  |  |    * Convert a base64 input to a binary output.  | 
6143  |  |    *  | 
6144  |  |    * This function follows the WHATWG forgiving-base64 format, which means that  | 
6145  |  |    * it will ignore any ASCII spaces in the input. You may provide a padded  | 
6146  |  |    * input (with one or two equal signs at the end) or an unpadded input  | 
6147  |  |    * (without any equal signs at the end).  | 
6148  |  |    *  | 
6149  |  |    * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
6150  |  |    *  | 
6151  |  |    * This function will fail in case of invalid input. When last_chunk_options =  | 
6152  |  |    * loose, there are two possible reasons for failure: the input contains a  | 
6153  |  |    * number of base64 characters that when divided by 4, leaves a single  | 
6154  |  |    * remainder character (BASE64_INPUT_REMAINDER), or the input contains a  | 
6155  |  |    * character that is not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
6156  |  |    *  | 
6157  |  |    * You should call this function with a buffer that is at least  | 
6158  |  |    * maximal_binary_length_from_base64(input, length) bytes long. If you  | 
6159  |  |    * fail to provide that much space, the function may cause a buffer overflow.  | 
6160  |  |    *  | 
6161  |  |    * @param input         the base64 string to process, in ASCII stored as  | 
6162  |  |    * 16-bit units  | 
6163  |  |    * @param length        the length of the string in 16-bit units  | 
6164  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
6165  |  |    * result (should be at least maximal_binary_length_from_base64(input, length)  | 
6166  |  |    * bytes long).  | 
6167  |  |    * @param options       the base64 options to use, can be base64_default or  | 
6168  |  |    * base64_url, is base64_default by default.  | 
6169  |  |    * @return a result pair struct (of type simdutf::result containing the two  | 
6170  |  |    * fields error and count) with an error code and position of the  | 
6171  |  |    * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the  | 
6172  |  |    * number of bytes written if successful.  | 
6173  |  |    */  | 
6174  |  |   simdutf_warn_unused virtual result  | 
6175  |  |   base64_to_binary(const char16_t *input, size_t length, char *output,  | 
6176  |  |                    base64_options options = base64_default,  | 
6177  |  |                    last_chunk_handling_options last_chunk_options =  | 
6178  |  |                        last_chunk_handling_options::loose) const noexcept = 0;  | 
6179  |  |  | 
6180  |  |   /**  | 
6181  |  |    * Convert a base64 input to a binary output while returning more details  | 
6182  |  |    * than base64_to_binary.  | 
6183  |  |    *  | 
6184  |  |    * This function follows the WHATWG forgiving-base64 format, which means that  | 
6185  |  |    * it will ignore any ASCII spaces in the input. You may provide a padded  | 
6186  |  |    * input (with one or two equal signs at the end) or an unpadded input  | 
6187  |  |    * (without any equal signs at the end).  | 
6188  |  |    *  | 
6189  |  |    * See https://infra.spec.whatwg.org/#forgiving-base64-decode  | 
6190  |  |    *  | 
6191  |  |    * This function will fail in case of invalid input. When last_chunk_options =  | 
6192  |  |    * loose, there are two possible reasons for failure: the input contains a  | 
6193  |  |    * number of base64 characters that when divided by 4, leaves a single  | 
6194  |  |    * remainder character (BASE64_INPUT_REMAINDER), or the input contains a  | 
6195  |  |    * character that is not a valid base64 character (INVALID_BASE64_CHARACTER).  | 
6196  |  |    *  | 
6197  |  |    * You should call this function with a buffer that is at least  | 
6198  |  |    * maximal_binary_length_from_base64(input, length) bytes long. If you fail to  | 
6199  |  |    * provide that much space, the function may cause a buffer overflow.  | 
6200  |  |    *  | 
6201  |  |    * @param input         the base64 string to process  | 
6202  |  |    * @param length        the length of the string in bytes  | 
6203  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
6204  |  |    * result (should be at least maximal_binary_length_from_base64(input, length)  | 
6205  |  |    * bytes long).  | 
6206  |  |    * @param options       the base64 options to use, can be base64_default or  | 
6207  |  |    * base64_url, is base64_default by default.  | 
6208  |  |    * @return a full_result pair struct (of type simdutf::result containing the  | 
6209  |  |    * three fields error, input_count and output_count).  | 
6210  |  |    */  | 
6211  |  |   simdutf_warn_unused virtual full_result base64_to_binary_details(  | 
6212  |  |       const char16_t *input, size_t length, char *output,  | 
6213  |  |       base64_options options = base64_default,  | 
6214  |  |       last_chunk_handling_options last_chunk_options =  | 
6215  |  |           last_chunk_handling_options::loose) const noexcept = 0;  | 
6216  |  |   /**  | 
6217  |  |    * Provide the base64 length in bytes given the length of a binary input.  | 
6218  |  |    *  | 
6219  |  |    * @param length        the length of the input in bytes  | 
6220  |  |    * @parem options       the base64 options to use, can be base64_default or  | 
6221  |  |    * base64_url, is base64_default by default.  | 
6222  |  |    * @return number of base64 bytes  | 
6223  |  |    */  | 
6224  |  |   simdutf_warn_unused size_t base64_length_from_binary(  | 
6225  |  |       size_t length, base64_options options = base64_default) const noexcept;  | 
6226  |  |  | 
6227  |  |   /**  | 
6228  |  |    * Convert a binary input to a base64 output.  | 
6229  |  |    *  | 
6230  |  |    * The default option (simdutf::base64_default) uses the characters `+` and  | 
6231  |  |    * `/` as part of its alphabet. Further, it adds padding (`=`) at the end of  | 
6232  |  |    * the output to ensure that the output length is a multiple of four.  | 
6233  |  |    *  | 
6234  |  |    * The URL option (simdutf::base64_url) uses the characters `-` and `_` as  | 
6235  |  |    * part of its alphabet. No padding is added at the end of the output.  | 
6236  |  |    *  | 
6237  |  |    * This function always succeeds.  | 
6238  |  |    *  | 
6239  |  |    * @param input         the binary to process  | 
6240  |  |    * @param length        the length of the input in bytes  | 
6241  |  |    * @param output        the pointer to a buffer that can hold the conversion  | 
6242  |  |    * result (should be at least base64_length_from_binary(length) bytes long)  | 
6243  |  |    * @param options       the base64 options to use, can be base64_default or  | 
6244  |  |    * base64_url, is base64_default by default.  | 
6245  |  |    * @return number of written bytes, will be equal to  | 
6246  |  |    * base64_length_from_binary(length, options)  | 
6247  |  |    */  | 
6248  |  |   virtual size_t  | 
6249  |  |   binary_to_base64(const char *input, size_t length, char *output,  | 
6250  |  |                    base64_options options = base64_default) const noexcept = 0;  | 
6251  |  |   /**  | 
6252  |  |    * Find the first occurrence of a character in a string. If the character is  | 
6253  |  |    * not found, return a pointer to the end of the string.  | 
6254  |  |    * @param start        the start of the string  | 
6255  |  |    * @param end          the end of the string  | 
6256  |  |    * @param character    the character to find  | 
6257  |  |    * @return a pointer to the first occurrence of the character in the string,  | 
6258  |  |    * or a pointer to the end of the string if the character is not found.  | 
6259  |  |    *  | 
6260  |  |    */  | 
6261  |  |   virtual const char *find(const char *start, const char *end,  | 
6262  |  |                            char character) const noexcept = 0;  | 
6263  |  |   virtual const char16_t *find(const char16_t *start, const char16_t *end,  | 
6264  |  |                                char16_t character) const noexcept = 0;  | 
6265  |  | #endif // SIMDUTF_FEATURE_BASE64  | 
6266  |  |  | 
6267  |  | #ifdef SIMDUTF_INTERNAL_TESTS  | 
6268  |  |   // This method is exported only in developer mode, its purpose  | 
6269  |  |   // is to expose some internal test procedures from the given  | 
6270  |  |   // implementation and then use them through our standard test  | 
6271  |  |   // framework.  | 
6272  |  |   //  | 
6273  |  |   // Regular users should not use it, the tests of the public  | 
6274  |  |   // API are enough.  | 
6275  |  |  | 
6276  |  |   struct TestProcedure { | 
6277  |  |     // display name  | 
6278  |  |     std::string name;  | 
6279  |  |  | 
6280  |  |     // procedure should return whether given test pass or not  | 
6281  |  |     void (*procedure)(const implementation &);  | 
6282  |  |   };  | 
6283  |  |  | 
6284  |  |   virtual std::vector<TestProcedure> internal_tests() const;  | 
6285  |  | #endif  | 
6286  |  |  | 
6287  |  | protected:  | 
6288  |  |   /** @private Construct an implementation with the given name and description.  | 
6289  |  |    * For subclasses. */  | 
6290  |  |   simdutf_really_inline implementation(const char *name,  | 
6291  |  |                                        const char *description,  | 
6292  |  |                                        uint32_t required_instruction_sets)  | 
6293  |  |       : _name(name), _description(description),  | 
6294  | 0  |         _required_instruction_sets(required_instruction_sets) {} | 
6295  |  |  | 
6296  |  | protected:  | 
6297  |  |   ~implementation() = default;  | 
6298  |  |  | 
6299  |  | private:  | 
6300  |  |   /**  | 
6301  |  |    * The name of this implementation.  | 
6302  |  |    */  | 
6303  |  |   const char *_name;  | 
6304  |  |  | 
6305  |  |   /**  | 
6306  |  |    * The description of this implementation.  | 
6307  |  |    */  | 
6308  |  |   const char *_description;  | 
6309  |  |  | 
6310  |  |   /**  | 
6311  |  |    * Instruction sets required for this implementation.  | 
6312  |  |    */  | 
6313  |  |   const uint32_t _required_instruction_sets;  | 
6314  |  | };  | 
6315  |  |  | 
6316  |  | /** @private */  | 
6317  |  | namespace internal { | 
6318  |  |  | 
6319  |  | /**  | 
6320  |  |  * The list of available implementations compiled into simdutf.  | 
6321  |  |  */  | 
6322  |  | class available_implementation_list { | 
6323  |  | public:  | 
6324  |  |   /** Get the list of available implementations compiled into simdutf */  | 
6325  | 0  |   simdutf_really_inline available_implementation_list() {} | 
6326  |  |   /** Number of implementations */  | 
6327  |  |   size_t size() const noexcept;  | 
6328  |  |   /** STL const begin() iterator */  | 
6329  |  |   const implementation *const *begin() const noexcept;  | 
6330  |  |   /** STL const end() iterator */  | 
6331  |  |   const implementation *const *end() const noexcept;  | 
6332  |  |  | 
6333  |  |   /**  | 
6334  |  |    * Get the implementation with the given name.  | 
6335  |  |    *  | 
6336  |  |    * Case sensitive.  | 
6337  |  |    *  | 
6338  |  |    *     const implementation *impl =  | 
6339  |  |    * simdutf::available_implementations["westmere"]; if (!impl) { exit(1); } if | 
6340  |  |    * (!imp->supported_by_runtime_system()) { exit(1); } | 
6341  |  |    *     simdutf::active_implementation = impl;  | 
6342  |  |    *  | 
6343  |  |    * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"  | 
6344  |  |    * @return the implementation, or nullptr if the parse failed.  | 
6345  |  |    */  | 
6346  | 0  |   const implementation *operator[](const std::string &name) const noexcept { | 
6347  | 0  |     for (const implementation *impl : *this) { | 
6348  | 0  |       if (impl->name() == name) { | 
6349  | 0  |         return impl;  | 
6350  | 0  |       }  | 
6351  | 0  |     }  | 
6352  | 0  |     return nullptr;  | 
6353  | 0  |   }  | 
6354  |  |  | 
6355  |  |   /**  | 
6356  |  |    * Detect the most advanced implementation supported by the current host.  | 
6357  |  |    *  | 
6358  |  |    * This is used to initialize the implementation on startup.  | 
6359  |  |    *  | 
6360  |  |    *     const implementation *impl =  | 
6361  |  |    * simdutf::available_implementation::detect_best_supported();  | 
6362  |  |    *     simdutf::active_implementation = impl;  | 
6363  |  |    *  | 
6364  |  |    * @return the most advanced supported implementation for the current host, or  | 
6365  |  |    * an implementation that returns UNSUPPORTED_ARCHITECTURE if there is no  | 
6366  |  |    * supported implementation. Will never return nullptr.  | 
6367  |  |    */  | 
6368  |  |   const implementation *detect_best_supported() const noexcept;  | 
6369  |  | };  | 
6370  |  |  | 
6371  |  | template <typename T> class atomic_ptr { | 
6372  |  | public:  | 
6373  |  |   atomic_ptr(T *_ptr) : ptr{_ptr} {} | 
6374  |  |  | 
6375  |  | #if defined(SIMDUTF_NO_THREADS)  | 
6376  |  |   operator const T *() const { return ptr; } | 
6377  |  |   const T &operator*() const { return *ptr; } | 
6378  |  |   const T *operator->() const { return ptr; } | 
6379  |  |  | 
6380  |  |   operator T *() { return ptr; } | 
6381  |  |   T &operator*() { return *ptr; } | 
6382  |  |   T *operator->() { return ptr; } | 
6383  |  |   atomic_ptr &operator=(T *_ptr) { | 
6384  |  |     ptr = _ptr;  | 
6385  |  |     return *this;  | 
6386  |  |   }  | 
6387  |  |  | 
6388  |  | #else  | 
6389  |  |   operator const T *() const { return ptr.load(); } | 
6390  |  |   const T &operator*() const { return *ptr; } | 
6391  |  |   const T *operator->() const { return ptr.load(); } | 
6392  |  |  | 
6393  |  |   operator T *() { return ptr.load(); } | 
6394  |  |   T &operator*() { return *ptr; } | 
6395  |  |   T *operator->() { return ptr.load(); } | 
6396  |  |   atomic_ptr &operator=(T *_ptr) { | 
6397  |  |     ptr = _ptr;  | 
6398  |  |     return *this;  | 
6399  |  |   }  | 
6400  |  |  | 
6401  |  | #endif  | 
6402  |  |  | 
6403  |  | private:  | 
6404  |  | #if defined(SIMDUTF_NO_THREADS)  | 
6405  |  |   T *ptr;  | 
6406  |  | #else  | 
6407  |  |   std::atomic<T *> ptr;  | 
6408  |  | #endif  | 
6409  |  | };  | 
6410  |  |  | 
6411  |  | class detect_best_supported_implementation_on_first_use;  | 
6412  |  |  | 
6413  |  | } // namespace internal  | 
6414  |  |  | 
6415  |  | /**  | 
6416  |  |  * The list of available implementations compiled into simdutf.  | 
6417  |  |  */  | 
6418  |  | extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &  | 
6419  |  | get_available_implementations();  | 
6420  |  |  | 
6421  |  | /**  | 
6422  |  |  * The active implementation.  | 
6423  |  |  *  | 
6424  |  |  * Automatically initialized on first use to the most advanced implementation  | 
6425  |  |  * supported by this hardware.  | 
6426  |  |  */  | 
6427  |  | extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &  | 
6428  |  | get_active_implementation();  | 
6429  |  |  | 
6430  |  | } // namespace simdutf  | 
6431  |  |  | 
6432  |  | #endif // SIMDUTF_IMPLEMENTATION_H  | 
6433  |  | /* end file include/simdutf/implementation.h */  | 
6434  |  |  | 
6435  |  | // Implementation-internal files (must be included before the implementations  | 
6436  |  | // themselves, to keep amalgamation working--otherwise, the first time a file is  | 
6437  |  | // included, it might be put inside the #ifdef  | 
6438  |  | // SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other  | 
6439  |  | // implementations can't compile unless that implementation is turned on).  | 
6440  |  |  | 
6441  |  | SIMDUTF_POP_DISABLE_WARNINGS  | 
6442  |  |  | 
6443  |  | #endif // SIMDUTF_H  | 
6444  |  | /* end file include/simdutf.h */  |