Coverage Report

Created: 2026-01-17 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/src/westmere/implementation.cpp
Line
Count
Source
1
#include "simdutf/westmere/begin.h"
2
3
namespace simdutf {
4
namespace SIMDUTF_IMPLEMENTATION {
5
namespace {
6
#ifndef SIMDUTF_WESTMERE_H
7
  #error "westmere.h must be included"
8
#endif
9
using namespace simd;
10
11
#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING ||                \
12
    SIMDUTF_FEATURE_UTF8
13
33.5k
simdutf_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
14
33.5k
  return input.reduce_or().is_ascii();
15
33.5k
}
16
#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING ||
17
       // SIMDUTF_FEATURE_UTF8
18
19
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
20
simdutf_really_inline simd8<bool>
21
must_be_2_3_continuation(const simd8<uint8_t> prev2,
22
56.8k
                         const simd8<uint8_t> prev3) {
23
56.8k
  simd8<uint8_t> is_third_byte =
24
56.8k
      prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80
25
56.8k
  simd8<uint8_t> is_fourth_byte =
26
56.8k
      prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80
27
56.8k
  return simd8<bool>(is_third_byte | is_fourth_byte);
28
56.8k
}
29
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
30
31
#if SIMDUTF_FEATURE_UTF8
32
  #include "westmere/internal/loader.cpp"
33
#endif // SIMDUTF_FEATURE_UTF8
34
35
#if SIMDUTF_FEATURE_UTF16
36
  #include "westmere/sse_utf16fix.cpp"
37
#endif // SIMDUTF_FEATURE_UTF16
38
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
39
  #include "westmere/sse_validate_utf16.cpp"
40
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
41
42
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
43
  #include "westmere/sse_convert_latin1_to_utf8.cpp"
44
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
45
46
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
47
  #include "westmere/sse_convert_latin1_to_utf16.cpp"
48
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
49
50
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
51
  #include "westmere/sse_convert_latin1_to_utf32.cpp"
52
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
53
54
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
55
  #include "westmere/sse_convert_utf8_to_utf16.cpp"
56
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
57
58
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
59
  #include "westmere/sse_convert_utf8_to_utf32.cpp"
60
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
61
62
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
63
  #include "westmere/sse_convert_utf8_to_latin1.cpp"
64
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
65
66
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
67
  #include "westmere/sse_convert_utf16_to_latin1.cpp"
68
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
69
70
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
71
  #include "westmere/sse_convert_utf16_to_utf8.cpp"
72
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
73
74
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
75
  #include "westmere/sse_convert_utf16_to_utf32.cpp"
76
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
77
78
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
79
  #include "westmere/sse_convert_utf32_to_latin1.cpp"
80
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
81
82
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
83
  #include "westmere/sse_convert_utf32_to_utf8.cpp"
84
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
85
86
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
87
  #include "westmere/sse_convert_utf32_to_utf16.cpp"
88
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
89
90
#if SIMDUTF_FEATURE_BASE64
91
  #include "westmere/sse_base64.cpp"
92
#endif // SIMDUTF_FEATURE_BASE64
93
94
} // unnamed namespace
95
} // namespace SIMDUTF_IMPLEMENTATION
96
} // namespace simdutf
97
98
#include "generic/buf_block_reader.h"
99
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
100
  #include "generic/utf8_validation/utf8_lookup4_algorithm.h"
101
  #include "generic/utf8_validation/utf8_validator.h"
102
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
103
#if SIMDUTF_FEATURE_ASCII
104
  #include "generic/ascii_validation.h"
105
#endif // SIMDUTF_FEATURE_ASCII
106
107
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
108
  // transcoding from UTF-8 to UTF-16
109
  #include "generic/utf8_to_utf16/valid_utf8_to_utf16.h"
110
  #include "generic/utf8_to_utf16/utf8_to_utf16.h"
111
  #include "generic/utf8/utf16_length_from_utf8_bytemask.h"
112
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
113
114
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
115
  #include "generic/utf8_to_utf32/valid_utf8_to_utf32.h"
116
  #include "generic/utf8_to_utf32/utf8_to_utf32.h"
117
  #include "generic/utf32.h"
118
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
119
120
#if SIMDUTF_FEATURE_UTF8
121
  #include "generic/utf8.h"
122
#endif // SIMDUTF_FEATURE_UTF8
123
#if SIMDUTF_FEATURE_UTF16
124
  #include "generic/utf16.h"
125
  #include "generic/utf16/utf8_length_from_utf16_bytemask.h"
126
#endif // SIMDUTF_FEATURE_UTF16
127
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
128
  #include "generic/validate_utf16.h"
129
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
130
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
131
  #include "generic/utf8_to_latin1/utf8_to_latin1.h"
132
  #include "generic/utf8_to_latin1/valid_utf8_to_latin1.h"
133
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
134
135
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
136
  #include "generic/validate_utf32.h"
137
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
138
139
#if SIMDUTF_FEATURE_BASE64
140
  #include "generic/base64.h"
141
  #include "generic/find.h"
142
#endif // SIMDUTF_FEATURE_BASE64
143
144
//
145
// Implementation-specific overrides
146
//
147
148
namespace simdutf {
149
namespace SIMDUTF_IMPLEMENTATION {
150
151
#if SIMDUTF_FEATURE_DETECT_ENCODING
152
simdutf_warn_unused int
153
implementation::detect_encodings(const char *input,
154
0
                                 size_t length) const noexcept {
155
  // If there is a BOM, then we trust it.
156
0
  auto bom_encoding = simdutf::BOM::check_bom(input, length);
157
0
  if (bom_encoding != encoding_type::unspecified) {
158
0
    return bom_encoding;
159
0
  }
160
161
0
  int out = 0;
162
0
  uint32_t utf16_err = (length % 2);
163
0
  uint32_t utf32_err = (length % 4);
164
0
  uint32_t ends_with_high = 0;
165
0
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
166
0
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
167
0
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
168
0
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
169
0
  const __m128i standardmax = _mm_set1_epi32(0x10ffff);
170
0
  const __m128i offset = _mm_set1_epi32(0xffff2000);
171
0
  const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff);
172
0
  __m128i currentmax = _mm_setzero_si128();
173
0
  __m128i currentoffsetmax = _mm_setzero_si128();
174
175
0
  utf8_checker c{};
176
0
  buf_block_reader<64> reader(reinterpret_cast<const uint8_t *>(input), length);
177
0
  while (reader.has_full_block()) {
178
0
    simd::simd8x64<uint8_t> in(reader.full_block());
179
    // utf8 checks
180
0
    c.check_next_input(in);
181
182
    // utf16le checks
183
0
    auto in0 = simd16<uint16_t>(in.chunks[0]);
184
0
    auto in1 = simd16<uint16_t>(in.chunks[1]);
185
0
    const auto t0 = in0.shr<8>();
186
0
    const auto t1 = in1.shr<8>();
187
0
    const auto packed1 = simd16<uint16_t>::pack(t0, t1);
188
0
    auto in2 = simd16<uint16_t>(in.chunks[2]);
189
0
    auto in3 = simd16<uint16_t>(in.chunks[3]);
190
0
    const auto t2 = in2.shr<8>();
191
0
    const auto t3 = in3.shr<8>();
192
0
    const auto packed2 = simd16<uint16_t>::pack(t2, t3);
193
194
0
    const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8;
195
0
    const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8;
196
0
    const uint32_t surrogates_bitmask =
197
0
        (surrogates_wordmask_hi.to_bitmask() << 16) |
198
0
        surrogates_wordmask_lo.to_bitmask();
199
0
    const auto vL_lo = (packed1 & v_fc) == v_dc;
200
0
    const auto vL_hi = (packed2 & v_fc) == v_dc;
201
0
    const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask();
202
0
    const uint32_t H = L ^ surrogates_bitmask;
203
0
    utf16_err |= (((H << 1) | ends_with_high) != L);
204
0
    ends_with_high = (H & 0x80000000) != 0;
205
206
    // utf32le checks
207
0
    currentmax = _mm_max_epu32(in.chunks[0], currentmax);
208
0
    currentoffsetmax =
209
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax);
210
0
    currentmax = _mm_max_epu32(in.chunks[1], currentmax);
211
0
    currentoffsetmax =
212
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax);
213
0
    currentmax = _mm_max_epu32(in.chunks[2], currentmax);
214
0
    currentoffsetmax =
215
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax);
216
0
    currentmax = _mm_max_epu32(in.chunks[3], currentmax);
217
0
    currentoffsetmax =
218
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax);
219
220
0
    reader.advance();
221
0
  }
222
223
0
  uint8_t block[64]{};
224
0
  size_t idx = reader.block_index();
225
0
  std::memcpy(block, &input[idx], length - idx);
226
0
  simd::simd8x64<uint8_t> in(block);
227
0
  c.check_next_input(in);
228
229
  // utf16le last block check
230
0
  auto in0 = simd16<uint16_t>(in.chunks[0]);
231
0
  auto in1 = simd16<uint16_t>(in.chunks[1]);
232
0
  const auto t0 = in0.shr<8>();
233
0
  const auto t1 = in1.shr<8>();
234
0
  const auto packed1 = simd16<uint16_t>::pack(t0, t1);
235
0
  auto in2 = simd16<uint16_t>(in.chunks[2]);
236
0
  auto in3 = simd16<uint16_t>(in.chunks[3]);
237
0
  const auto t2 = in2.shr<8>();
238
0
  const auto t3 = in3.shr<8>();
239
0
  const auto packed2 = simd16<uint16_t>::pack(t2, t3);
240
241
0
  const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8;
242
0
  const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8;
243
0
  const uint32_t surrogates_bitmask =
244
0
      (surrogates_wordmask_hi.to_bitmask() << 16) |
245
0
      surrogates_wordmask_lo.to_bitmask();
246
0
  const auto vL_lo = (packed1 & v_fc) == v_dc;
247
0
  const auto vL_hi = (packed2 & v_fc) == v_dc;
248
0
  const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask();
249
0
  const uint32_t H = L ^ surrogates_bitmask;
250
0
  utf16_err |= (((H << 1) | ends_with_high) != L);
251
  // this is required to check for last byte ending in high and end of input
252
  // is reached
253
0
  ends_with_high = (H & 0x80000000) != 0;
254
0
  utf16_err |= ends_with_high;
255
256
  // utf32le last block check
257
0
  currentmax = _mm_max_epu32(in.chunks[0], currentmax);
258
0
  currentoffsetmax =
259
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax);
260
0
  currentmax = _mm_max_epu32(in.chunks[1], currentmax);
261
0
  currentoffsetmax =
262
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax);
263
0
  currentmax = _mm_max_epu32(in.chunks[2], currentmax);
264
0
  currentoffsetmax =
265
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax);
266
0
  currentmax = _mm_max_epu32(in.chunks[3], currentmax);
267
0
  currentoffsetmax =
268
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax);
269
270
0
  reader.advance();
271
272
0
  c.check_eof();
273
0
  bool is_valid_utf8 = !c.errors();
274
0
  __m128i is_zero =
275
0
      _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax);
276
0
  utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0);
277
278
0
  is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax),
279
0
                          standardoffsetmax);
280
0
  utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0);
281
0
  if (is_valid_utf8) {
282
0
    out |= encoding_type::UTF8;
283
0
  }
284
0
  if (utf16_err == 0) {
285
0
    out |= encoding_type::UTF16_LE;
286
0
  }
287
0
  if (utf32_err == 0) {
288
0
    out |= encoding_type::UTF32_LE;
289
0
  }
290
0
  return out;
291
0
}
292
#endif // SIMDUTF_FEATURE_DETECT_ENCODING
293
294
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
295
simdutf_warn_unused bool
296
20.3k
implementation::validate_utf8(const char *buf, size_t len) const noexcept {
297
20.3k
  return westmere::utf8_validation::generic_validate_utf8(buf, len);
298
20.3k
}
299
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
300
301
#if SIMDUTF_FEATURE_UTF8
302
simdutf_warn_unused result implementation::validate_utf8_with_errors(
303
5.13k
    const char *buf, size_t len) const noexcept {
304
5.13k
  return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len);
305
5.13k
}
306
#endif // SIMDUTF_FEATURE_UTF8
307
308
#if SIMDUTF_FEATURE_ASCII
309
simdutf_warn_unused bool
310
0
implementation::validate_ascii(const char *buf, size_t len) const noexcept {
311
0
  return westmere::ascii_validation::generic_validate_ascii(buf, len);
312
0
}
313
314
simdutf_warn_unused result implementation::validate_ascii_with_errors(
315
0
    const char *buf, size_t len) const noexcept {
316
0
  return westmere::ascii_validation::generic_validate_ascii_with_errors(buf,
317
0
                                                                        len);
318
0
}
319
#endif // SIMDUTF_FEATURE_ASCII
320
321
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
322
simdutf_warn_unused bool
323
implementation::validate_utf16le_as_ascii(const char16_t *buf,
324
0
                                          size_t len) const noexcept {
325
0
  return westmere::utf16::validate_utf16_as_ascii_with_errors<
326
0
             endianness::LITTLE>(buf, len)
327
0
             .error == SUCCESS;
328
0
}
329
330
simdutf_warn_unused bool
331
implementation::validate_utf16be_as_ascii(const char16_t *buf,
332
0
                                          size_t len) const noexcept {
333
0
  return westmere::utf16::validate_utf16_as_ascii_with_errors<endianness::BIG>(
334
0
             buf, len)
335
0
             .error == SUCCESS;
336
0
}
337
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
338
339
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
340
simdutf_warn_unused bool
341
implementation::validate_utf16le(const char16_t *buf,
342
15.0k
                                 size_t len) const noexcept {
343
15.0k
  if (simdutf_unlikely(len == 0)) {
344
    // empty input is valid UTF-16. protect the implementation from
345
    // handling nullptr
346
13.2k
    return true;
347
13.2k
  }
348
1.76k
  const auto res =
349
1.76k
      westmere::utf16::validate_utf16_with_errors<endianness::LITTLE>(buf, len);
350
1.76k
  if (res.is_err()) {
351
51
    return false;
352
51
  }
353
354
1.71k
  if (res.count == len)
355
0
    return true;
356
357
1.71k
  return scalar::utf16::validate<endianness::LITTLE>(buf + res.count,
358
1.71k
                                                     len - res.count);
359
1.71k
}
360
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
361
362
#if SIMDUTF_FEATURE_UTF16
363
simdutf_warn_unused bool
364
implementation::validate_utf16be(const char16_t *buf,
365
15.0k
                                 size_t len) const noexcept {
366
15.0k
  if (simdutf_unlikely(len == 0)) {
367
    // empty input is valid UTF-16. protect the implementation from
368
    // handling nullptr
369
13.2k
    return true;
370
13.2k
  }
371
1.75k
  const auto res =
372
1.75k
      westmere::utf16::validate_utf16_with_errors<endianness::BIG>(buf, len);
373
1.75k
  if (res.is_err()) {
374
57
    return false;
375
57
  }
376
377
1.69k
  if (res.count == len)
378
0
    return true;
379
380
1.69k
  return scalar::utf16::validate<endianness::BIG>(buf + res.count,
381
1.69k
                                                  len - res.count);
382
1.69k
}
383
384
simdutf_warn_unused result implementation::validate_utf16le_with_errors(
385
5.13k
    const char16_t *buf, size_t len) const noexcept {
386
5.13k
  const result res =
387
5.13k
      westmere::utf16::validate_utf16_with_errors<endianness::LITTLE>(buf, len);
388
5.13k
  if (res.count != len) {
389
326
    const result scalar_res =
390
326
        scalar::utf16::validate_with_errors<endianness::LITTLE>(
391
326
            buf + res.count, len - res.count);
392
326
    return result(scalar_res.error, res.count + scalar_res.count);
393
4.80k
  } else {
394
4.80k
    return res;
395
4.80k
  }
396
5.13k
}
397
398
simdutf_warn_unused result implementation::validate_utf16be_with_errors(
399
5.13k
    const char16_t *buf, size_t len) const noexcept {
400
5.13k
  const result res =
401
5.13k
      westmere::utf16::validate_utf16_with_errors<endianness::BIG>(buf, len);
402
5.13k
  if (res.count != len) {
403
317
    result scalar_res = scalar::utf16::validate_with_errors<endianness::BIG>(
404
317
        buf + res.count, len - res.count);
405
317
    return result(scalar_res.error, res.count + scalar_res.count);
406
4.81k
  } else {
407
4.81k
    return res;
408
4.81k
  }
409
5.13k
}
410
411
void implementation::to_well_formed_utf16le(const char16_t *input, size_t len,
412
0
                                            char16_t *output) const noexcept {
413
0
  return utf16fix_sse<endianness::LITTLE>(input, len, output);
414
0
}
415
416
void implementation::to_well_formed_utf16be(const char16_t *input, size_t len,
417
0
                                            char16_t *output) const noexcept {
418
0
  return utf16fix_sse<endianness::BIG>(input, len, output);
419
0
}
420
#endif // SIMDUTF_FEATURE_UTF16
421
422
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
423
simdutf_warn_unused bool
424
9.89k
implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
425
9.89k
  return utf32::validate(buf, len);
426
9.89k
}
427
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
428
429
#if SIMDUTF_FEATURE_UTF32
430
simdutf_warn_unused result implementation::validate_utf32_with_errors(
431
0
    const char32_t *buf, size_t len) const noexcept {
432
0
  return utf32::validate_with_errors(buf, len);
433
0
}
434
#endif // SIMDUTF_FEATURE_UTF32
435
436
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
437
simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(
438
5.42k
    const char *buf, size_t len, char *utf8_output) const noexcept {
439
440
5.42k
  std::pair<const char *, char *> ret =
441
5.42k
      sse_convert_latin1_to_utf8(buf, len, utf8_output);
442
5.42k
  size_t converted_chars = ret.second - utf8_output;
443
444
5.42k
  if (ret.first != buf + len) {
445
804
    const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
446
804
        ret.first, len - (ret.first - buf), ret.second);
447
804
    converted_chars += scalar_converted_chars;
448
804
  }
449
450
5.42k
  return converted_chars;
451
5.42k
}
452
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
453
454
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
455
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(
456
5.13k
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
457
5.13k
  std::pair<const char *, char16_t *> ret =
458
5.13k
      sse_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
459
5.13k
  if (ret.first == nullptr) {
460
0
    return 0;
461
0
  }
462
5.13k
  size_t converted_chars = ret.second - utf16_output;
463
5.13k
  if (ret.first != buf + len) {
464
438
    const size_t scalar_converted_chars =
465
438
        scalar::latin1_to_utf16::convert<endianness::LITTLE>(
466
438
            ret.first, len - (ret.first - buf), ret.second);
467
438
    if (scalar_converted_chars == 0) {
468
0
      return 0;
469
0
    }
470
438
    converted_chars += scalar_converted_chars;
471
438
  }
472
5.13k
  return converted_chars;
473
5.13k
}
474
475
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(
476
5.13k
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
477
5.13k
  std::pair<const char *, char16_t *> ret =
478
5.13k
      sse_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
479
5.13k
  if (ret.first == nullptr) {
480
0
    return 0;
481
0
  }
482
5.13k
  size_t converted_chars = ret.second - utf16_output;
483
5.13k
  if (ret.first != buf + len) {
484
438
    const size_t scalar_converted_chars =
485
438
        scalar::latin1_to_utf16::convert<endianness::BIG>(
486
438
            ret.first, len - (ret.first - buf), ret.second);
487
438
    if (scalar_converted_chars == 0) {
488
0
      return 0;
489
0
    }
490
438
    converted_chars += scalar_converted_chars;
491
438
  }
492
5.13k
  return converted_chars;
493
5.13k
}
494
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
495
496
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
497
simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(
498
5.13k
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
499
5.13k
  std::pair<const char *, char32_t *> ret =
500
5.13k
      sse_convert_latin1_to_utf32(buf, len, utf32_output);
501
5.13k
  if (ret.first == nullptr) {
502
0
    return 0;
503
0
  }
504
5.13k
  size_t converted_chars = ret.second - utf32_output;
505
5.13k
  if (ret.first != buf + len) {
506
438
    const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
507
438
        ret.first, len - (ret.first - buf), ret.second);
508
438
    if (scalar_converted_chars == 0) {
509
0
      return 0;
510
0
    }
511
438
    converted_chars += scalar_converted_chars;
512
438
  }
513
5.13k
  return converted_chars;
514
5.13k
}
515
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
516
517
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
518
simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(
519
10.2k
    const char *buf, size_t len, char *latin1_output) const noexcept {
520
10.2k
  utf8_to_latin1::validating_transcoder converter;
521
10.2k
  return converter.convert(buf, len, latin1_output);
522
10.2k
}
523
524
simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(
525
0
    const char *buf, size_t len, char *latin1_output) const noexcept {
526
0
  utf8_to_latin1::validating_transcoder converter;
527
0
  return converter.convert_with_errors(buf, len, latin1_output);
528
0
}
529
530
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(
531
0
    const char *buf, size_t len, char *latin1_output) const noexcept {
532
0
  return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output);
533
0
}
534
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
535
536
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
537
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(
538
10.1k
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
539
10.1k
  utf8_to_utf16::validating_transcoder converter;
540
10.1k
  return converter.convert<endianness::LITTLE>(buf, len, utf16_output);
541
10.1k
}
542
543
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(
544
10.1k
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
545
10.1k
  utf8_to_utf16::validating_transcoder converter;
546
10.1k
  return converter.convert<endianness::BIG>(buf, len, utf16_output);
547
10.1k
}
548
549
simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(
550
0
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
551
0
  utf8_to_utf16::validating_transcoder converter;
552
0
  return converter.convert_with_errors<endianness::LITTLE>(buf, len,
553
0
                                                           utf16_output);
554
0
}
555
556
simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(
557
0
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
558
0
  utf8_to_utf16::validating_transcoder converter;
559
0
  return converter.convert_with_errors<endianness::BIG>(buf, len, utf16_output);
560
0
}
561
562
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(
563
0
    const char *input, size_t size, char16_t *utf16_output) const noexcept {
564
0
  return utf8_to_utf16::convert_valid<endianness::LITTLE>(input, size,
565
0
                                                          utf16_output);
566
0
}
567
568
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(
569
0
    const char *input, size_t size, char16_t *utf16_output) const noexcept {
570
0
  return utf8_to_utf16::convert_valid<endianness::BIG>(input, size,
571
0
                                                       utf16_output);
572
0
}
573
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
574
575
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
576
simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(
577
5.13k
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
578
5.13k
  utf8_to_utf32::validating_transcoder converter;
579
5.13k
  return converter.convert(buf, len, utf32_output);
580
5.13k
}
581
582
simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(
583
0
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
584
0
  utf8_to_utf32::validating_transcoder converter;
585
0
  return converter.convert_with_errors(buf, len, utf32_output);
586
0
}
587
588
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(
589
0
    const char *input, size_t size, char32_t *utf32_output) const noexcept {
590
0
  return utf8_to_utf32::convert_valid(input, size, utf32_output);
591
0
}
592
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
593
594
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
595
simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(
596
5.13k
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
597
5.13k
  std::pair<const char16_t *, char *> ret =
598
5.13k
      sse_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
599
5.13k
  if (ret.first == nullptr) {
600
0
    return 0;
601
0
  }
602
5.13k
  size_t saved_bytes = ret.second - latin1_output;
603
604
5.13k
  if (ret.first != buf + len) {
605
410
    const size_t scalar_saved_bytes =
606
410
        scalar::utf16_to_latin1::convert<endianness::LITTLE>(
607
410
            ret.first, len - (ret.first - buf), ret.second);
608
410
    if (scalar_saved_bytes == 0) {
609
0
      return 0;
610
0
    }
611
410
    saved_bytes += scalar_saved_bytes;
612
410
  }
613
5.13k
  return saved_bytes;
614
5.13k
}
615
616
simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(
617
5.13k
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
618
5.13k
  std::pair<const char16_t *, char *> ret =
619
5.13k
      sse_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
620
5.13k
  if (ret.first == nullptr) {
621
0
    return 0;
622
0
  }
623
5.13k
  size_t saved_bytes = ret.second - latin1_output;
624
625
5.13k
  if (ret.first != buf + len) {
626
410
    const size_t scalar_saved_bytes =
627
410
        scalar::utf16_to_latin1::convert<endianness::BIG>(
628
410
            ret.first, len - (ret.first - buf), ret.second);
629
410
    if (scalar_saved_bytes == 0) {
630
0
      return 0;
631
0
    }
632
410
    saved_bytes += scalar_saved_bytes;
633
410
  }
634
5.13k
  return saved_bytes;
635
5.13k
}
636
637
simdutf_warn_unused result
638
implementation::convert_utf16le_to_latin1_with_errors(
639
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
640
0
  std::pair<result, char *> ret =
641
0
      sse_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(
642
0
          buf, len, latin1_output);
643
0
  if (ret.first.error) {
644
0
    return ret.first;
645
0
  } // Can return directly since scalar fallback already found correct
646
    // ret.first.count
647
0
  if (ret.first.count != len) { // All good so far, but not finished
648
0
    result scalar_res =
649
0
        scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
650
0
            buf + ret.first.count, len - ret.first.count, ret.second);
651
0
    if (scalar_res.error) {
652
0
      scalar_res.count += ret.first.count;
653
0
      return scalar_res;
654
0
    } else {
655
0
      ret.second += scalar_res.count;
656
0
    }
657
0
  }
658
0
  ret.first.count =
659
0
      ret.second -
660
0
      latin1_output; // Set count to the number of 8-bit code units written
661
0
  return ret.first;
662
0
}
663
664
simdutf_warn_unused result
665
implementation::convert_utf16be_to_latin1_with_errors(
666
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
667
0
  std::pair<result, char *> ret =
668
0
      sse_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf, len,
669
0
                                                               latin1_output);
670
0
  if (ret.first.error) {
671
0
    return ret.first;
672
0
  } // Can return directly since scalar fallback already found correct
673
    // ret.first.count
674
0
  if (ret.first.count != len) { // All good so far, but not finished
675
0
    result scalar_res =
676
0
        scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
677
0
            buf + ret.first.count, len - ret.first.count, ret.second);
678
0
    if (scalar_res.error) {
679
0
      scalar_res.count += ret.first.count;
680
0
      return scalar_res;
681
0
    } else {
682
0
      ret.second += scalar_res.count;
683
0
    }
684
0
  }
685
0
  ret.first.count =
686
0
      ret.second -
687
0
      latin1_output; // Set count to the number of 8-bit code units written
688
0
  return ret.first;
689
0
}
690
691
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(
692
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
693
  // optimization opportunity: we could provide an optimized function.
694
0
  return convert_utf16be_to_latin1(buf, len, latin1_output);
695
0
}
696
697
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(
698
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
699
  // optimization opportunity: we could provide an optimized function.
700
0
  return convert_utf16le_to_latin1(buf, len, latin1_output);
701
0
}
702
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
703
704
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
705
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(
706
9.89k
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
707
9.89k
  std::pair<const char16_t *, char *> ret =
708
9.89k
      sse_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
709
9.89k
  if (ret.first == nullptr) {
710
52
    return 0;
711
52
  }
712
9.84k
  size_t saved_bytes = ret.second - utf8_output;
713
9.84k
  if (ret.first != buf + len) {
714
984
    const size_t scalar_saved_bytes =
715
984
        scalar::utf16_to_utf8::convert<endianness::LITTLE>(
716
984
            ret.first, len - (ret.first - buf), ret.second);
717
984
    if (scalar_saved_bytes == 0) {
718
41
      return 0;
719
41
    }
720
943
    saved_bytes += scalar_saved_bytes;
721
943
  }
722
9.80k
  return saved_bytes;
723
9.84k
}
724
725
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(
726
9.89k
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
727
9.89k
  std::pair<const char16_t *, char *> ret =
728
9.89k
      sse_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
729
9.89k
  if (ret.first == nullptr) {
730
62
    return 0;
731
62
  }
732
9.83k
  size_t saved_bytes = ret.second - utf8_output;
733
9.83k
  if (ret.first != buf + len) {
734
965
    const size_t scalar_saved_bytes =
735
965
        scalar::utf16_to_utf8::convert<endianness::BIG>(
736
965
            ret.first, len - (ret.first - buf), ret.second);
737
965
    if (scalar_saved_bytes == 0) {
738
36
      return 0;
739
36
    }
740
929
    saved_bytes += scalar_saved_bytes;
741
929
  }
742
9.79k
  return saved_bytes;
743
9.83k
}
744
745
simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(
746
0
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
747
  // ret.first.count is always the position in the buffer, not the number of
748
  // code units written even if finished
749
0
  std::pair<result, char *> ret =
750
0
      westmere::sse_convert_utf16_to_utf8_with_errors<endianness::LITTLE>(
751
0
          buf, len, utf8_output);
752
0
  if (ret.first.error) {
753
0
    return ret.first;
754
0
  } // Can return directly since scalar fallback already found correct
755
    // ret.first.count
756
0
  if (ret.first.count != len) { // All good so far, but not finished
757
0
    result scalar_res =
758
0
        scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
759
0
            buf + ret.first.count, len - ret.first.count, ret.second);
760
0
    if (scalar_res.error) {
761
0
      scalar_res.count += ret.first.count;
762
0
      return scalar_res;
763
0
    } else {
764
0
      ret.second += scalar_res.count;
765
0
    }
766
0
  }
767
0
  ret.first.count =
768
0
      ret.second -
769
0
      utf8_output; // Set count to the number of 8-bit code units written
770
0
  return ret.first;
771
0
}
772
773
simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(
774
0
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
775
  // ret.first.count is always the position in the buffer, not the number of
776
  // code units written even if finished
777
0
  std::pair<result, char *> ret =
778
0
      westmere::sse_convert_utf16_to_utf8_with_errors<endianness::BIG>(
779
0
          buf, len, utf8_output);
780
0
  if (ret.first.error) {
781
0
    return ret.first;
782
0
  } // Can return directly since scalar fallback already found correct
783
    // ret.first.count
784
0
  if (ret.first.count != len) { // All good so far, but not finished
785
0
    result scalar_res =
786
0
        scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
787
0
            buf + ret.first.count, len - ret.first.count, ret.second);
788
0
    if (scalar_res.error) {
789
0
      scalar_res.count += ret.first.count;
790
0
      return scalar_res;
791
0
    } else {
792
0
      ret.second += scalar_res.count;
793
0
    }
794
0
  }
795
0
  ret.first.count =
796
0
      ret.second -
797
0
      utf8_output; // Set count to the number of 8-bit code units written
798
0
  return ret.first;
799
0
}
800
801
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(
802
0
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
803
0
  return convert_utf16le_to_utf8(buf, len, utf8_output);
804
0
}
805
806
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(
807
0
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
808
0
  return convert_utf16be_to_utf8(buf, len, utf8_output);
809
0
}
810
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
811
812
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
813
simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(
814
5.13k
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
815
5.13k
  std::pair<const char32_t *, char *> ret =
816
5.13k
      sse_convert_utf32_to_latin1(buf, len, latin1_output);
817
5.13k
  if (ret.first == nullptr) {
818
0
    return 0;
819
0
  }
820
5.13k
  size_t saved_bytes = ret.second - latin1_output;
821
  // if (ret.first != buf + len) {
822
5.13k
  if (ret.first < buf + len) {
823
438
    const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
824
438
        ret.first, len - (ret.first - buf), ret.second);
825
438
    if (scalar_saved_bytes == 0) {
826
0
      return 0;
827
0
    }
828
438
    saved_bytes += scalar_saved_bytes;
829
438
  }
830
5.13k
  return saved_bytes;
831
5.13k
}
832
833
simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(
834
0
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
835
  // ret.first.count is always the position in the buffer, not the number of
836
  // code units written even if finished
837
0
  std::pair<result, char *> ret =
838
0
      westmere::sse_convert_utf32_to_latin1_with_errors(buf, len,
839
0
                                                        latin1_output);
840
0
  if (ret.first.count != len) {
841
0
    result scalar_res = scalar::utf32_to_latin1::convert_with_errors(
842
0
        buf + ret.first.count, len - ret.first.count, ret.second);
843
0
    if (scalar_res.error) {
844
0
      scalar_res.count += ret.first.count;
845
0
      return scalar_res;
846
0
    } else {
847
0
      ret.second += scalar_res.count;
848
0
    }
849
0
  }
850
0
  ret.first.count =
851
0
      ret.second -
852
0
      latin1_output; // Set count to the number of 8-bit code units written
853
0
  return ret.first;
854
0
}
855
856
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(
857
0
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
858
  // optimization opportunity: we could provide an optimized function.
859
0
  return convert_utf32_to_latin1(buf, len, latin1_output);
860
0
}
861
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
862
863
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
864
simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(
865
4.76k
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
866
4.76k
  std::pair<const char32_t *, char *> ret =
867
4.76k
      sse_convert_utf32_to_utf8(buf, len, utf8_output);
868
4.76k
  if (ret.first == nullptr) {
869
0
    return 0;
870
0
  }
871
4.76k
  size_t saved_bytes = ret.second - utf8_output;
872
4.76k
  if (ret.first != buf + len) {
873
710
    const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
874
710
        ret.first, len - (ret.first - buf), ret.second);
875
710
    if (scalar_saved_bytes == 0) {
876
0
      return 0;
877
0
    }
878
710
    saved_bytes += scalar_saved_bytes;
879
710
  }
880
4.76k
  return saved_bytes;
881
4.76k
}
882
883
simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(
884
0
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
885
  // ret.first.count is always the position in the buffer, not the number of
886
  // code units written even if finished
887
0
  std::pair<result, char *> ret =
888
0
      westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
889
0
  if (ret.first.count != len) {
890
0
    result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
891
0
        buf + ret.first.count, len - ret.first.count, ret.second);
892
0
    if (scalar_res.error) {
893
0
      scalar_res.count += ret.first.count;
894
0
      return scalar_res;
895
0
    } else {
896
0
      ret.second += scalar_res.count;
897
0
    }
898
0
  }
899
0
  ret.first.count =
900
0
      ret.second -
901
0
      utf8_output; // Set count to the number of 8-bit code units written
902
0
  return ret.first;
903
0
}
904
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
905
906
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
907
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(
908
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
909
0
  std::pair<const char16_t *, char32_t *> ret =
910
0
      sse_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
911
0
  if (ret.first == nullptr) {
912
0
    return 0;
913
0
  }
914
0
  size_t saved_bytes = ret.second - utf32_output;
915
0
  if (ret.first != buf + len) {
916
0
    const size_t scalar_saved_bytes =
917
0
        scalar::utf16_to_utf32::convert<endianness::LITTLE>(
918
0
            ret.first, len - (ret.first - buf), ret.second);
919
0
    if (scalar_saved_bytes == 0) {
920
0
      return 0;
921
0
    }
922
0
    saved_bytes += scalar_saved_bytes;
923
0
  }
924
0
  return saved_bytes;
925
0
}
926
927
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(
928
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
929
0
  std::pair<const char16_t *, char32_t *> ret =
930
0
      sse_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
931
0
  if (ret.first == nullptr) {
932
0
    return 0;
933
0
  }
934
0
  size_t saved_bytes = ret.second - utf32_output;
935
0
  if (ret.first != buf + len) {
936
0
    const size_t scalar_saved_bytes =
937
0
        scalar::utf16_to_utf32::convert<endianness::BIG>(
938
0
            ret.first, len - (ret.first - buf), ret.second);
939
0
    if (scalar_saved_bytes == 0) {
940
0
      return 0;
941
0
    }
942
0
    saved_bytes += scalar_saved_bytes;
943
0
  }
944
0
  return saved_bytes;
945
0
}
946
947
simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
948
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
949
  // ret.first.count is always the position in the buffer, not the number of
950
  // code units written even if finished
951
0
  std::pair<result, char32_t *> ret =
952
0
      westmere::sse_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(
953
0
          buf, len, utf32_output);
954
0
  if (ret.first.error) {
955
0
    return ret.first;
956
0
  } // Can return directly since scalar fallback already found correct
957
    // ret.first.count
958
0
  if (ret.first.count != len) { // All good so far, but not finished
959
0
    result scalar_res =
960
0
        scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
961
0
            buf + ret.first.count, len - ret.first.count, ret.second);
962
0
    if (scalar_res.error) {
963
0
      scalar_res.count += ret.first.count;
964
0
      return scalar_res;
965
0
    } else {
966
0
      ret.second += scalar_res.count;
967
0
    }
968
0
  }
969
0
  ret.first.count =
970
0
      ret.second -
971
0
      utf32_output; // Set count to the number of 8-bit code units written
972
0
  return ret.first;
973
0
}
974
975
simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
976
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
977
  // ret.first.count is always the position in the buffer, not the number of
978
  // code units written even if finished
979
0
  std::pair<result, char32_t *> ret =
980
0
      westmere::sse_convert_utf16_to_utf32_with_errors<endianness::BIG>(
981
0
          buf, len, utf32_output);
982
0
  if (ret.first.error) {
983
0
    return ret.first;
984
0
  } // Can return directly since scalar fallback already found correct
985
    // ret.first.count
986
0
  if (ret.first.count != len) { // All good so far, but not finished
987
0
    result scalar_res =
988
0
        scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
989
0
            buf + ret.first.count, len - ret.first.count, ret.second);
990
0
    if (scalar_res.error) {
991
0
      scalar_res.count += ret.first.count;
992
0
      return scalar_res;
993
0
    } else {
994
0
      ret.second += scalar_res.count;
995
0
    }
996
0
  }
997
0
  ret.first.count =
998
0
      ret.second -
999
0
      utf32_output; // Set count to the number of 8-bit code units written
1000
0
  return ret.first;
1001
0
}
1002
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1003
1004
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1005
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(
1006
0
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
1007
0
  return convert_utf32_to_utf8(buf, len, utf8_output);
1008
0
}
1009
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1010
1011
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1012
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(
1013
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1014
0
  std::pair<const char32_t *, char16_t *> ret =
1015
0
      sse_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
1016
0
  if (ret.first == nullptr) {
1017
0
    return 0;
1018
0
  }
1019
0
  size_t saved_bytes = ret.second - utf16_output;
1020
0
  if (ret.first != buf + len) {
1021
0
    const size_t scalar_saved_bytes =
1022
0
        scalar::utf32_to_utf16::convert<endianness::LITTLE>(
1023
0
            ret.first, len - (ret.first - buf), ret.second);
1024
0
    if (scalar_saved_bytes == 0) {
1025
0
      return 0;
1026
0
    }
1027
0
    saved_bytes += scalar_saved_bytes;
1028
0
  }
1029
0
  return saved_bytes;
1030
0
}
1031
1032
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(
1033
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1034
0
  std::pair<const char32_t *, char16_t *> ret =
1035
0
      sse_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
1036
0
  if (ret.first == nullptr) {
1037
0
    return 0;
1038
0
  }
1039
0
  size_t saved_bytes = ret.second - utf16_output;
1040
0
  if (ret.first != buf + len) {
1041
0
    const size_t scalar_saved_bytes =
1042
0
        scalar::utf32_to_utf16::convert<endianness::BIG>(
1043
0
            ret.first, len - (ret.first - buf), ret.second);
1044
0
    if (scalar_saved_bytes == 0) {
1045
0
      return 0;
1046
0
    }
1047
0
    saved_bytes += scalar_saved_bytes;
1048
0
  }
1049
0
  return saved_bytes;
1050
0
}
1051
1052
simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
1053
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1054
  // ret.first.count is always the position in the buffer, not the number of
1055
  // code units written even if finished
1056
0
  std::pair<result, char16_t *> ret =
1057
0
      westmere::sse_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(
1058
0
          buf, len, utf16_output);
1059
0
  if (ret.first.count != len) {
1060
0
    result scalar_res =
1061
0
        scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
1062
0
            buf + ret.first.count, len - ret.first.count, ret.second);
1063
0
    if (scalar_res.error) {
1064
0
      scalar_res.count += ret.first.count;
1065
0
      return scalar_res;
1066
0
    } else {
1067
0
      ret.second += scalar_res.count;
1068
0
    }
1069
0
  }
1070
0
  ret.first.count =
1071
0
      ret.second -
1072
0
      utf16_output; // Set count to the number of 8-bit code units written
1073
0
  return ret.first;
1074
0
}
1075
1076
simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
1077
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1078
  // ret.first.count is always the position in the buffer, not the number of
1079
  // code units written even if finished
1080
0
  std::pair<result, char16_t *> ret =
1081
0
      westmere::sse_convert_utf32_to_utf16_with_errors<endianness::BIG>(
1082
0
          buf, len, utf16_output);
1083
0
  if (ret.first.count != len) {
1084
0
    result scalar_res =
1085
0
        scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
1086
0
            buf + ret.first.count, len - ret.first.count, ret.second);
1087
0
    if (scalar_res.error) {
1088
0
      scalar_res.count += ret.first.count;
1089
0
      return scalar_res;
1090
0
    } else {
1091
0
      ret.second += scalar_res.count;
1092
0
    }
1093
0
  }
1094
0
  ret.first.count =
1095
0
      ret.second -
1096
0
      utf16_output; // Set count to the number of 8-bit code units written
1097
0
  return ret.first;
1098
0
}
1099
1100
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(
1101
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1102
0
  return convert_utf32_to_utf16le(buf, len, utf16_output);
1103
0
}
1104
1105
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(
1106
0
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1107
0
  return convert_utf32_to_utf16be(buf, len, utf16_output);
1108
0
}
1109
1110
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(
1111
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1112
0
  return convert_utf16le_to_utf32(buf, len, utf32_output);
1113
0
}
1114
1115
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(
1116
0
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1117
0
  return convert_utf16be_to_utf32(buf, len, utf32_output);
1118
0
}
1119
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1120
1121
#if SIMDUTF_FEATURE_UTF16
1122
void implementation::change_endianness_utf16(const char16_t *input,
1123
                                             size_t length,
1124
0
                                             char16_t *output) const noexcept {
1125
0
  utf16::change_endianness_utf16(input, length, output);
1126
0
}
1127
1128
simdutf_warn_unused size_t implementation::count_utf16le(
1129
0
    const char16_t *input, size_t length) const noexcept {
1130
0
  return utf16::count_code_points<endianness::LITTLE>(input, length);
1131
0
}
1132
1133
simdutf_warn_unused size_t implementation::count_utf16be(
1134
0
    const char16_t *input, size_t length) const noexcept {
1135
0
  return utf16::count_code_points<endianness::BIG>(input, length);
1136
0
}
1137
#endif // SIMDUTF_FEATURE_UTF16
1138
1139
#if SIMDUTF_FEATURE_UTF8
1140
simdutf_warn_unused size_t
1141
10.2k
implementation::count_utf8(const char *input, size_t length) const noexcept {
1142
10.2k
  return utf8::count_code_points_bytemask(input, length);
1143
10.2k
}
1144
#endif // SIMDUTF_FEATURE_UTF8
1145
1146
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1147
simdutf_warn_unused size_t implementation::latin1_length_from_utf8(
1148
10.2k
    const char *buf, size_t len) const noexcept {
1149
10.2k
  return count_utf8(buf, len);
1150
10.2k
}
1151
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1152
1153
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1154
simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(
1155
9.89k
    const char16_t *input, size_t length) const noexcept {
1156
9.89k
  return utf16::utf8_length_from_utf16_bytemask<endianness::LITTLE>(input,
1157
9.89k
                                                                    length);
1158
9.89k
}
1159
1160
simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(
1161
9.89k
    const char16_t *input, size_t length) const noexcept {
1162
9.89k
  return utf16::utf8_length_from_utf16_bytemask<endianness::BIG>(input, length);
1163
9.89k
}
1164
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1165
1166
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1167
simdutf_warn_unused size_t implementation::utf8_length_from_latin1(
1168
5.42k
    const char *input, size_t len) const noexcept {
1169
5.42k
  const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
1170
5.42k
  size_t answer = len / sizeof(__m128i) * sizeof(__m128i);
1171
5.42k
  size_t i = 0;
1172
5.42k
  if (answer >= 2048) { // long strings optimization
1173
0
    __m128i two_64bits = _mm_setzero_si128();
1174
0
    while (i + sizeof(__m128i) <= len) {
1175
0
      __m128i runner = _mm_setzero_si128();
1176
0
      size_t iterations = (len - i) / sizeof(__m128i);
1177
0
      if (iterations > 255) {
1178
0
        iterations = 255;
1179
0
      }
1180
0
      size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i);
1181
0
      for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) {
1182
0
        __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i));
1183
0
        __m128i input2 =
1184
0
            _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i)));
1185
0
        __m128i input3 =
1186
0
            _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i)));
1187
0
        __m128i input4 =
1188
0
            _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i)));
1189
0
        __m128i input12 =
1190
0
            _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1),
1191
0
                         _mm_cmpgt_epi8(_mm_setzero_si128(), input2));
1192
0
        __m128i input34 =
1193
0
            _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3),
1194
0
                         _mm_cmpgt_epi8(_mm_setzero_si128(), input4));
1195
0
        __m128i input1234 = _mm_add_epi8(input12, input34);
1196
0
        runner = _mm_sub_epi8(runner, input1234);
1197
0
      }
1198
0
      for (; i <= max_i; i += sizeof(__m128i)) {
1199
0
        __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i));
1200
0
        runner = _mm_sub_epi8(runner,
1201
0
                              _mm_cmpgt_epi8(_mm_setzero_si128(), more_input));
1202
0
      }
1203
0
      two_64bits =
1204
0
          _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128()));
1205
0
    }
1206
0
    answer +=
1207
0
        _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1);
1208
5.42k
  } else if (answer > 0) { // short string optimization
1209
6.61k
    for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) {
1210
5.85k
      __m128i latin = _mm_loadu_si128((const __m128i *)(input + i));
1211
5.85k
      uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1212
5.85k
      answer += count_ones(non_ascii);
1213
5.85k
      latin = _mm_loadu_si128((const __m128i *)(input + i) + 1);
1214
5.85k
      non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1215
5.85k
      answer += count_ones(non_ascii);
1216
5.85k
    }
1217
983
    for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) {
1218
226
      __m128i latin = _mm_loadu_si128((const __m128i *)(input + i));
1219
226
      uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1220
226
      answer += count_ones(non_ascii);
1221
226
    }
1222
757
  }
1223
5.42k
  return answer + scalar::latin1::utf8_length_from_latin1(
1224
5.42k
                      reinterpret_cast<const char *>(str + i), len - i);
1225
5.42k
}
1226
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1227
1228
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1229
simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(
1230
0
    const char16_t *input, size_t length) const noexcept {
1231
0
  return utf16::utf32_length_from_utf16<endianness::LITTLE>(input, length);
1232
0
}
1233
1234
simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(
1235
0
    const char16_t *input, size_t length) const noexcept {
1236
0
  return utf16::utf32_length_from_utf16<endianness::BIG>(input, length);
1237
0
}
1238
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1239
1240
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1241
simdutf_warn_unused size_t implementation::utf16_length_from_utf8(
1242
20.3k
    const char *input, size_t length) const noexcept {
1243
20.3k
  return utf8::utf16_length_from_utf8_bytemask(input, length);
1244
20.3k
}
1245
simdutf_warn_unused result
1246
implementation::utf8_length_from_utf16le_with_replacement(
1247
0
    const char16_t *input, size_t length) const noexcept {
1248
0
  return utf16::utf8_length_from_utf16_with_replacement<endianness::LITTLE>(
1249
0
      input, length);
1250
0
}
1251
1252
simdutf_warn_unused result
1253
implementation::utf8_length_from_utf16be_with_replacement(
1254
0
    const char16_t *input, size_t length) const noexcept {
1255
0
  return utf16::utf8_length_from_utf16_with_replacement<endianness::BIG>(
1256
0
      input, length);
1257
0
}
1258
1259
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1260
1261
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1262
simdutf_warn_unused size_t implementation::utf8_length_from_utf32(
1263
4.76k
    const char32_t *input, size_t length) const noexcept {
1264
4.76k
  return utf32::utf8_length_from_utf32(input, length);
1265
4.76k
}
1266
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1267
1268
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1269
simdutf_warn_unused size_t implementation::utf16_length_from_utf32(
1270
0
    const char32_t *input, size_t length) const noexcept {
1271
0
  const __m128i v_00000000 = _mm_setzero_si128();
1272
0
  const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
1273
0
  size_t pos = 0;
1274
0
  size_t count = 0;
1275
0
  for (; pos + 4 <= length; pos += 4) {
1276
0
    __m128i in = _mm_loadu_si128((__m128i *)(input + pos));
1277
0
    const __m128i surrogate_bytemask =
1278
0
        _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000);
1279
0
    const uint16_t surrogate_bitmask =
1280
0
        static_cast<uint16_t>(_mm_movemask_epi8(surrogate_bytemask));
1281
0
    size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4;
1282
0
    count += 4 + surrogate_count;
1283
0
  }
1284
0
  return count +
1285
0
         scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
1286
0
}
1287
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1288
1289
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1290
simdutf_warn_unused size_t implementation::utf32_length_from_utf8(
1291
5.13k
    const char *input, size_t length) const noexcept {
1292
5.13k
  return utf8::count_code_points(input, length);
1293
5.13k
}
1294
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1295
1296
#if SIMDUTF_FEATURE_BASE64
1297
simdutf_warn_unused result implementation::base64_to_binary(
1298
    const char *input, size_t length, char *output, base64_options options,
1299
14.9k
    last_chunk_handling_options last_chunk_options) const noexcept {
1300
14.9k
  if (options & base64_default_or_url) {
1301
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1302
0
      return base64::compress_decode_base64<false, true, true>(
1303
0
          output, input, length, options, last_chunk_options);
1304
0
    } else {
1305
0
      return base64::compress_decode_base64<false, false, true>(
1306
0
          output, input, length, options, last_chunk_options);
1307
0
    }
1308
14.9k
  } else if (options & base64_url) {
1309
0
    if (options == base64_options::base64_url_accept_garbage) {
1310
0
      return base64::compress_decode_base64<true, true, false>(
1311
0
          output, input, length, options, last_chunk_options);
1312
0
    } else {
1313
0
      return base64::compress_decode_base64<true, false, false>(
1314
0
          output, input, length, options, last_chunk_options);
1315
0
    }
1316
14.9k
  } else {
1317
14.9k
    if (options == base64_options::base64_default_accept_garbage) {
1318
0
      return base64::compress_decode_base64<false, true, false>(
1319
0
          output, input, length, options, last_chunk_options);
1320
14.9k
    } else {
1321
14.9k
      return base64::compress_decode_base64<false, false, false>(
1322
14.9k
          output, input, length, options, last_chunk_options);
1323
14.9k
    }
1324
14.9k
  }
1325
14.9k
}
1326
1327
simdutf_warn_unused full_result implementation::base64_to_binary_details(
1328
    const char *input, size_t length, char *output, base64_options options,
1329
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1330
0
  if (options & base64_default_or_url) {
1331
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1332
0
      return base64::compress_decode_base64<false, true, true>(
1333
0
          output, input, length, options, last_chunk_options);
1334
0
    } else {
1335
0
      return base64::compress_decode_base64<false, false, true>(
1336
0
          output, input, length, options, last_chunk_options);
1337
0
    }
1338
0
  } else if (options & base64_url) {
1339
0
    if (options == base64_options::base64_url_accept_garbage) {
1340
0
      return base64::compress_decode_base64<true, true, false>(
1341
0
          output, input, length, options, last_chunk_options);
1342
0
    } else {
1343
0
      return base64::compress_decode_base64<true, false, false>(
1344
0
          output, input, length, options, last_chunk_options);
1345
0
    }
1346
0
  } else {
1347
0
    if (options == base64_options::base64_default_accept_garbage) {
1348
0
      return base64::compress_decode_base64<false, true, false>(
1349
0
          output, input, length, options, last_chunk_options);
1350
0
    } else {
1351
0
      return base64::compress_decode_base64<false, false, false>(
1352
0
          output, input, length, options, last_chunk_options);
1353
0
    }
1354
0
  }
1355
0
}
1356
1357
simdutf_warn_unused result implementation::base64_to_binary(
1358
    const char16_t *input, size_t length, char *output, base64_options options,
1359
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1360
0
  if (options & base64_default_or_url) {
1361
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1362
0
      return base64::compress_decode_base64<false, true, true>(
1363
0
          output, input, length, options, last_chunk_options);
1364
0
    } else {
1365
0
      return base64::compress_decode_base64<false, false, true>(
1366
0
          output, input, length, options, last_chunk_options);
1367
0
    }
1368
0
  } else if (options & base64_url) {
1369
0
    if (options == base64_options::base64_url_accept_garbage) {
1370
0
      return base64::compress_decode_base64<true, true, false>(
1371
0
          output, input, length, options, last_chunk_options);
1372
0
    } else {
1373
0
      return base64::compress_decode_base64<true, false, false>(
1374
0
          output, input, length, options, last_chunk_options);
1375
0
    }
1376
0
  } else {
1377
0
    if (options == base64_options::base64_default_accept_garbage) {
1378
0
      return base64::compress_decode_base64<false, true, false>(
1379
0
          output, input, length, options, last_chunk_options);
1380
0
    } else {
1381
0
      return base64::compress_decode_base64<false, false, false>(
1382
0
          output, input, length, options, last_chunk_options);
1383
0
    }
1384
0
  }
1385
0
}
1386
1387
simdutf_warn_unused full_result implementation::base64_to_binary_details(
1388
    const char16_t *input, size_t length, char *output, base64_options options,
1389
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1390
0
  if (options & base64_default_or_url) {
1391
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1392
0
      return base64::compress_decode_base64<false, true, true>(
1393
0
          output, input, length, options, last_chunk_options);
1394
0
    } else {
1395
0
      return base64::compress_decode_base64<false, false, true>(
1396
0
          output, input, length, options, last_chunk_options);
1397
0
    }
1398
0
  } else if (options & base64_url) {
1399
0
    if (options == base64_options::base64_url_accept_garbage) {
1400
0
      return base64::compress_decode_base64<true, true, false>(
1401
0
          output, input, length, options, last_chunk_options);
1402
0
    } else {
1403
0
      return base64::compress_decode_base64<true, false, false>(
1404
0
          output, input, length, options, last_chunk_options);
1405
0
    }
1406
0
  } else {
1407
0
    if (options == base64_options::base64_default_accept_garbage) {
1408
0
      return base64::compress_decode_base64<false, true, false>(
1409
0
          output, input, length, options, last_chunk_options);
1410
0
    } else {
1411
0
      return base64::compress_decode_base64<false, false, false>(
1412
0
          output, input, length, options, last_chunk_options);
1413
0
    }
1414
0
  }
1415
0
}
1416
1417
size_t implementation::binary_to_base64(const char *input, size_t length,
1418
                                        char *output,
1419
14.4k
                                        base64_options options) const noexcept {
1420
14.4k
  if (options & base64_url) {
1421
0
    return encode_base64<true>(output, input, length, options);
1422
14.4k
  } else {
1423
14.4k
    return encode_base64<false>(output, input, length, options);
1424
14.4k
  }
1425
14.4k
}
1426
1427
size_t implementation::binary_to_base64_with_lines(
1428
    const char *input, size_t length, char *output, size_t line_length,
1429
0
    base64_options options) const noexcept {
1430
0
  if (options & base64_url) {
1431
0
    return encode_base64_impl<true, true>(output, input, length, options,
1432
0
                                          line_length);
1433
1434
0
  } else {
1435
0
    return encode_base64_impl<false, true>(output, input, length, options,
1436
0
                                           line_length);
1437
0
  }
1438
0
}
1439
1440
const char *implementation::find(const char *start, const char *end,
1441
0
                                 char character) const noexcept {
1442
0
  return util::find(start, end, character);
1443
0
}
1444
1445
const char16_t *implementation::find(const char16_t *start, const char16_t *end,
1446
0
                                     char16_t character) const noexcept {
1447
0
  return util::find(start, end, character);
1448
0
}
1449
#endif // SIMDUTF_FEATURE_BASE64
1450
1451
} // namespace SIMDUTF_IMPLEMENTATION
1452
} // namespace simdutf
1453
1454
#include "simdutf/westmere/end.h"