Coverage Report

Created: 2026-02-26 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/src/westmere/implementation.cpp
Line
Count
Source
1
#include "simdutf/westmere/begin.h"
2
3
namespace simdutf {
4
namespace SIMDUTF_IMPLEMENTATION {
5
namespace {
6
#ifndef SIMDUTF_WESTMERE_H
7
  #error "westmere.h must be included"
8
#endif
9
using namespace simd;
10
11
#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING ||                \
12
    SIMDUTF_FEATURE_UTF8
13
4.77M
simdutf_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
14
4.77M
  return input.reduce_or().is_ascii();
15
4.77M
}
16
#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING ||
17
       // SIMDUTF_FEATURE_UTF8
18
19
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
20
simdutf_really_inline simd8<bool>
21
must_be_2_3_continuation(const simd8<uint8_t> prev2,
22
6.55M
                         const simd8<uint8_t> prev3) {
23
6.55M
  simd8<uint8_t> is_third_byte =
24
6.55M
      prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80
25
6.55M
  simd8<uint8_t> is_fourth_byte =
26
6.55M
      prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80
27
6.55M
  return simd8<bool>(is_third_byte | is_fourth_byte);
28
6.55M
}
29
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
30
31
#if SIMDUTF_FEATURE_UTF8
32
  #include "westmere/internal/loader.cpp"
33
#endif // SIMDUTF_FEATURE_UTF8
34
35
#if SIMDUTF_FEATURE_UTF16
36
  #include "westmere/sse_utf16fix.cpp"
37
#endif // SIMDUTF_FEATURE_UTF16
38
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
39
  #include "westmere/sse_validate_utf16.cpp"
40
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
41
42
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
43
  #include "westmere/sse_convert_latin1_to_utf8.cpp"
44
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
45
46
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
47
  #include "westmere/sse_convert_latin1_to_utf16.cpp"
48
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
49
50
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
51
  #include "westmere/sse_convert_latin1_to_utf32.cpp"
52
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
53
54
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
55
  #include "westmere/sse_convert_utf8_to_utf16.cpp"
56
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
57
58
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
59
  #include "westmere/sse_convert_utf8_to_utf32.cpp"
60
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
61
62
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
63
  #include "westmere/sse_convert_utf8_to_latin1.cpp"
64
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
65
66
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
67
  #include "westmere/sse_convert_utf16_to_latin1.cpp"
68
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
69
70
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
71
  #include "westmere/sse_convert_utf16_to_utf8.cpp"
72
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
73
74
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
75
  #include "westmere/sse_convert_utf16_to_utf32.cpp"
76
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
77
78
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
79
  #include "westmere/sse_convert_utf32_to_latin1.cpp"
80
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
81
82
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
83
  #include "westmere/sse_convert_utf32_to_utf8.cpp"
84
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
85
86
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
87
  #include "westmere/sse_convert_utf32_to_utf16.cpp"
88
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
89
90
#if SIMDUTF_FEATURE_BASE64
91
  #include "westmere/sse_base64.cpp"
92
#endif // SIMDUTF_FEATURE_BASE64
93
94
} // unnamed namespace
95
} // namespace SIMDUTF_IMPLEMENTATION
96
} // namespace simdutf
97
98
#include "generic/buf_block_reader.h"
99
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
100
  #include "generic/utf8_validation/utf8_lookup4_algorithm.h"
101
  #include "generic/utf8_validation/utf8_validator.h"
102
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
103
#if SIMDUTF_FEATURE_ASCII
104
  #include "generic/ascii_validation.h"
105
#endif // SIMDUTF_FEATURE_ASCII
106
107
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
108
  // transcoding from UTF-8 to UTF-16
109
  #include "generic/utf8_to_utf16/valid_utf8_to_utf16.h"
110
  #include "generic/utf8_to_utf16/utf8_to_utf16.h"
111
  #include "generic/utf8/utf16_length_from_utf8_bytemask.h"
112
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
113
114
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
115
  #include "generic/utf8_to_utf32/valid_utf8_to_utf32.h"
116
  #include "generic/utf8_to_utf32/utf8_to_utf32.h"
117
  #include "generic/utf32.h"
118
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
119
120
#if SIMDUTF_FEATURE_UTF8
121
  #include "generic/utf8.h"
122
#endif // SIMDUTF_FEATURE_UTF8
123
#if SIMDUTF_FEATURE_UTF16
124
  #include "generic/utf16.h"
125
  #include "generic/utf16/utf8_length_from_utf16_bytemask.h"
126
#endif // SIMDUTF_FEATURE_UTF16
127
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
128
  #include "generic/validate_utf16.h"
129
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
130
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
131
  #include "generic/utf8_to_latin1/utf8_to_latin1.h"
132
  #include "generic/utf8_to_latin1/valid_utf8_to_latin1.h"
133
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
134
135
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
136
  #include "generic/validate_utf32.h"
137
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
138
139
#if SIMDUTF_FEATURE_BASE64
140
  #include "generic/base64.h"
141
  #include "generic/find.h"
142
#endif // SIMDUTF_FEATURE_BASE64
143
144
//
145
// Implementation-specific overrides
146
//
147
148
namespace simdutf {
149
namespace SIMDUTF_IMPLEMENTATION {
150
151
#if SIMDUTF_FEATURE_DETECT_ENCODING
152
simdutf_warn_unused int
153
implementation::detect_encodings(const char *input,
154
0
                                 size_t length) const noexcept {
155
  // If there is a BOM, then we trust it.
156
0
  auto bom_encoding = simdutf::BOM::check_bom(input, length);
157
0
  if (bom_encoding != encoding_type::unspecified) {
158
0
    return bom_encoding;
159
0
  }
160
161
0
  int out = 0;
162
0
  uint32_t utf16_err = (length % 2);
163
0
  uint32_t utf32_err = (length % 4);
164
0
  uint32_t ends_with_high = 0;
165
0
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
166
0
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
167
0
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
168
0
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
169
0
  const __m128i standardmax = _mm_set1_epi32(0x10ffff);
170
0
  const __m128i offset = _mm_set1_epi32(0xffff2000);
171
0
  const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff);
172
0
  __m128i currentmax = _mm_setzero_si128();
173
0
  __m128i currentoffsetmax = _mm_setzero_si128();
174
175
0
  utf8_checker c{};
176
0
  buf_block_reader<64> reader(reinterpret_cast<const uint8_t *>(input), length);
177
0
  while (reader.has_full_block()) {
178
0
    simd::simd8x64<uint8_t> in(reader.full_block());
179
    // utf8 checks
180
0
    c.check_next_input(in);
181
182
    // utf16le checks
183
0
    auto in0 = simd16<uint16_t>(in.chunks[0]);
184
0
    auto in1 = simd16<uint16_t>(in.chunks[1]);
185
0
    const auto t0 = in0.shr<8>();
186
0
    const auto t1 = in1.shr<8>();
187
0
    const auto packed1 = simd16<uint16_t>::pack(t0, t1);
188
0
    auto in2 = simd16<uint16_t>(in.chunks[2]);
189
0
    auto in3 = simd16<uint16_t>(in.chunks[3]);
190
0
    const auto t2 = in2.shr<8>();
191
0
    const auto t3 = in3.shr<8>();
192
0
    const auto packed2 = simd16<uint16_t>::pack(t2, t3);
193
194
0
    const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8;
195
0
    const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8;
196
0
    const uint32_t surrogates_bitmask =
197
0
        (surrogates_wordmask_hi.to_bitmask() << 16) |
198
0
        surrogates_wordmask_lo.to_bitmask();
199
0
    const auto vL_lo = (packed1 & v_fc) == v_dc;
200
0
    const auto vL_hi = (packed2 & v_fc) == v_dc;
201
0
    const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask();
202
0
    const uint32_t H = L ^ surrogates_bitmask;
203
0
    utf16_err |= (((H << 1) | ends_with_high) != L);
204
0
    ends_with_high = (H & 0x80000000) != 0;
205
206
    // utf32le checks
207
0
    currentmax = _mm_max_epu32(in.chunks[0], currentmax);
208
0
    currentoffsetmax =
209
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax);
210
0
    currentmax = _mm_max_epu32(in.chunks[1], currentmax);
211
0
    currentoffsetmax =
212
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax);
213
0
    currentmax = _mm_max_epu32(in.chunks[2], currentmax);
214
0
    currentoffsetmax =
215
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax);
216
0
    currentmax = _mm_max_epu32(in.chunks[3], currentmax);
217
0
    currentoffsetmax =
218
0
        _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax);
219
220
0
    reader.advance();
221
0
  }
222
223
0
  uint8_t block[64]{};
224
0
  size_t idx = reader.block_index();
225
0
  std::memcpy(block, &input[idx], length - idx);
226
0
  simd::simd8x64<uint8_t> in(block);
227
0
  c.check_next_input(in);
228
229
  // utf16le last block check
230
0
  auto in0 = simd16<uint16_t>(in.chunks[0]);
231
0
  auto in1 = simd16<uint16_t>(in.chunks[1]);
232
0
  const auto t0 = in0.shr<8>();
233
0
  const auto t1 = in1.shr<8>();
234
0
  const auto packed1 = simd16<uint16_t>::pack(t0, t1);
235
0
  auto in2 = simd16<uint16_t>(in.chunks[2]);
236
0
  auto in3 = simd16<uint16_t>(in.chunks[3]);
237
0
  const auto t2 = in2.shr<8>();
238
0
  const auto t3 = in3.shr<8>();
239
0
  const auto packed2 = simd16<uint16_t>::pack(t2, t3);
240
241
0
  const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8;
242
0
  const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8;
243
0
  const uint32_t surrogates_bitmask =
244
0
      (surrogates_wordmask_hi.to_bitmask() << 16) |
245
0
      surrogates_wordmask_lo.to_bitmask();
246
0
  const auto vL_lo = (packed1 & v_fc) == v_dc;
247
0
  const auto vL_hi = (packed2 & v_fc) == v_dc;
248
0
  const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask();
249
0
  const uint32_t H = L ^ surrogates_bitmask;
250
0
  utf16_err |= (((H << 1) | ends_with_high) != L);
251
  // this is required to check for last byte ending in high and end of input
252
  // is reached
253
0
  ends_with_high = (H & 0x80000000) != 0;
254
0
  utf16_err |= ends_with_high;
255
256
  // utf32le last block check
257
0
  currentmax = _mm_max_epu32(in.chunks[0], currentmax);
258
0
  currentoffsetmax =
259
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax);
260
0
  currentmax = _mm_max_epu32(in.chunks[1], currentmax);
261
0
  currentoffsetmax =
262
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax);
263
0
  currentmax = _mm_max_epu32(in.chunks[2], currentmax);
264
0
  currentoffsetmax =
265
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax);
266
0
  currentmax = _mm_max_epu32(in.chunks[3], currentmax);
267
0
  currentoffsetmax =
268
0
      _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax);
269
270
0
  reader.advance();
271
272
0
  c.check_eof();
273
0
  bool is_valid_utf8 = !c.errors();
274
0
  __m128i is_zero =
275
0
      _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax);
276
0
  utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0);
277
278
0
  is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax),
279
0
                          standardoffsetmax);
280
0
  utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0);
281
0
  if (is_valid_utf8) {
282
0
    out |= encoding_type::UTF8;
283
0
  }
284
0
  if (utf16_err == 0) {
285
0
    out |= encoding_type::UTF16_LE;
286
0
  }
287
0
  if (utf32_err == 0) {
288
0
    out |= encoding_type::UTF32_LE;
289
0
  }
290
0
  return out;
291
0
}
292
#endif // SIMDUTF_FEATURE_DETECT_ENCODING
293
294
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
295
simdutf_warn_unused bool
296
3.47k
implementation::validate_utf8(const char *buf, size_t len) const noexcept {
297
3.47k
  return westmere::utf8_validation::generic_validate_utf8(buf, len);
298
3.47k
}
299
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
300
301
#if SIMDUTF_FEATURE_UTF8
302
simdutf_warn_unused result implementation::validate_utf8_with_errors(
303
3.47k
    const char *buf, size_t len) const noexcept {
304
3.47k
  return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len);
305
3.47k
}
306
#endif // SIMDUTF_FEATURE_UTF8
307
308
#if SIMDUTF_FEATURE_ASCII
309
simdutf_warn_unused bool
310
0
implementation::validate_ascii(const char *buf, size_t len) const noexcept {
311
0
  return westmere::ascii_validation::generic_validate_ascii(buf, len);
312
0
}
313
314
simdutf_warn_unused result implementation::validate_ascii_with_errors(
315
0
    const char *buf, size_t len) const noexcept {
316
0
  return westmere::ascii_validation::generic_validate_ascii_with_errors(buf,
317
0
                                                                        len);
318
0
}
319
#endif // SIMDUTF_FEATURE_ASCII
320
321
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
322
simdutf_warn_unused bool
323
implementation::validate_utf16le_as_ascii(const char16_t *buf,
324
0
                                          size_t len) const noexcept {
325
0
  return westmere::utf16::validate_utf16_as_ascii_with_errors<
326
0
             endianness::LITTLE>(buf, len)
327
0
             .error == SUCCESS;
328
0
}
329
330
simdutf_warn_unused bool
331
implementation::validate_utf16be_as_ascii(const char16_t *buf,
332
0
                                          size_t len) const noexcept {
333
0
  return westmere::utf16::validate_utf16_as_ascii_with_errors<endianness::BIG>(
334
0
             buf, len)
335
0
             .error == SUCCESS;
336
0
}
337
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
338
339
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
340
simdutf_warn_unused bool
341
implementation::validate_utf16le(const char16_t *buf,
342
1.38k
                                 size_t len) const noexcept {
343
1.38k
  if (simdutf_unlikely(len == 0)) {
344
    // empty input is valid UTF-16. protect the implementation from
345
    // handling nullptr
346
17
    return true;
347
17
  }
348
1.36k
  const auto res =
349
1.36k
      westmere::utf16::validate_utf16_with_errors<endianness::LITTLE>(buf, len);
350
1.36k
  if (res.is_err()) {
351
165
    return false;
352
165
  }
353
354
1.20k
  if (res.count == len)
355
0
    return true;
356
357
1.20k
  return scalar::utf16::validate<endianness::LITTLE>(buf + res.count,
358
1.20k
                                                     len - res.count);
359
1.20k
}
360
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
361
362
#if SIMDUTF_FEATURE_UTF16
363
simdutf_warn_unused bool
364
implementation::validate_utf16be(const char16_t *buf,
365
1.32k
                                 size_t len) const noexcept {
366
1.32k
  if (simdutf_unlikely(len == 0)) {
367
    // empty input is valid UTF-16. protect the implementation from
368
    // handling nullptr
369
16
    return true;
370
16
  }
371
1.30k
  const auto res =
372
1.30k
      westmere::utf16::validate_utf16_with_errors<endianness::BIG>(buf, len);
373
1.30k
  if (res.is_err()) {
374
183
    return false;
375
183
  }
376
377
1.12k
  if (res.count == len)
378
0
    return true;
379
380
1.12k
  return scalar::utf16::validate<endianness::BIG>(buf + res.count,
381
1.12k
                                                  len - res.count);
382
1.12k
}
383
384
simdutf_warn_unused result implementation::validate_utf16le_with_errors(
385
1.38k
    const char16_t *buf, size_t len) const noexcept {
386
1.38k
  const result res =
387
1.38k
      westmere::utf16::validate_utf16_with_errors<endianness::LITTLE>(buf, len);
388
1.38k
  if (res.count != len) {
389
1.36k
    const result scalar_res =
390
1.36k
        scalar::utf16::validate_with_errors<endianness::LITTLE>(
391
1.36k
            buf + res.count, len - res.count);
392
1.36k
    return result(scalar_res.error, res.count + scalar_res.count);
393
1.36k
  } else {
394
17
    return res;
395
17
  }
396
1.38k
}
397
398
simdutf_warn_unused result implementation::validate_utf16be_with_errors(
399
1.32k
    const char16_t *buf, size_t len) const noexcept {
400
1.32k
  const result res =
401
1.32k
      westmere::utf16::validate_utf16_with_errors<endianness::BIG>(buf, len);
402
1.32k
  if (res.count != len) {
403
1.30k
    result scalar_res = scalar::utf16::validate_with_errors<endianness::BIG>(
404
1.30k
        buf + res.count, len - res.count);
405
1.30k
    return result(scalar_res.error, res.count + scalar_res.count);
406
1.30k
  } else {
407
16
    return res;
408
16
  }
409
1.32k
}
410
411
void implementation::to_well_formed_utf16le(const char16_t *input, size_t len,
412
0
                                            char16_t *output) const noexcept {
413
0
  return utf16fix_sse<endianness::LITTLE>(input, len, output);
414
0
}
415
416
void implementation::to_well_formed_utf16be(const char16_t *input, size_t len,
417
0
                                            char16_t *output) const noexcept {
418
0
  return utf16fix_sse<endianness::BIG>(input, len, output);
419
0
}
420
#endif // SIMDUTF_FEATURE_UTF16
421
422
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
423
simdutf_warn_unused bool
424
2.58k
implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
425
2.58k
  return utf32::validate(buf, len);
426
2.58k
}
427
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
428
429
#if SIMDUTF_FEATURE_UTF32
430
simdutf_warn_unused result implementation::validate_utf32_with_errors(
431
2.58k
    const char32_t *buf, size_t len) const noexcept {
432
2.58k
  return utf32::validate_with_errors(buf, len);
433
2.58k
}
434
#endif // SIMDUTF_FEATURE_UTF32
435
436
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
437
simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(
438
604
    const char *buf, size_t len, char *utf8_output) const noexcept {
439
440
604
  std::pair<const char *, char *> ret =
441
604
      sse_convert_latin1_to_utf8(buf, len, utf8_output);
442
604
  size_t converted_chars = ret.second - utf8_output;
443
444
604
  if (ret.first != buf + len) {
445
474
    const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
446
474
        ret.first, len - (ret.first - buf), ret.second);
447
474
    converted_chars += scalar_converted_chars;
448
474
  }
449
450
604
  return converted_chars;
451
604
}
452
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
453
454
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
455
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(
456
72
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
457
72
  std::pair<const char *, char16_t *> ret =
458
72
      sse_convert_latin1_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
459
72
  if (ret.first == nullptr) {
460
4
    return 0;
461
4
  }
462
68
  size_t converted_chars = ret.second - utf16_output;
463
68
  if (ret.first != buf + len) {
464
54
    const size_t scalar_converted_chars =
465
54
        scalar::latin1_to_utf16::convert<endianness::LITTLE>(
466
54
            ret.first, len - (ret.first - buf), ret.second);
467
54
    if (scalar_converted_chars == 0) {
468
0
      return 0;
469
0
    }
470
54
    converted_chars += scalar_converted_chars;
471
54
  }
472
68
  return converted_chars;
473
68
}
474
475
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(
476
106
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
477
106
  std::pair<const char *, char16_t *> ret =
478
106
      sse_convert_latin1_to_utf16<endianness::BIG>(buf, len, utf16_output);
479
106
  if (ret.first == nullptr) {
480
4
    return 0;
481
4
  }
482
102
  size_t converted_chars = ret.second - utf16_output;
483
102
  if (ret.first != buf + len) {
484
88
    const size_t scalar_converted_chars =
485
88
        scalar::latin1_to_utf16::convert<endianness::BIG>(
486
88
            ret.first, len - (ret.first - buf), ret.second);
487
88
    if (scalar_converted_chars == 0) {
488
0
      return 0;
489
0
    }
490
88
    converted_chars += scalar_converted_chars;
491
88
  }
492
102
  return converted_chars;
493
102
}
494
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
495
496
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
497
simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(
498
100
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
499
100
  std::pair<const char *, char32_t *> ret =
500
100
      sse_convert_latin1_to_utf32(buf, len, utf32_output);
501
100
  if (ret.first == nullptr) {
502
4
    return 0;
503
4
  }
504
96
  size_t converted_chars = ret.second - utf32_output;
505
96
  if (ret.first != buf + len) {
506
82
    const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
507
82
        ret.first, len - (ret.first - buf), ret.second);
508
82
    if (scalar_converted_chars == 0) {
509
0
      return 0;
510
0
    }
511
82
    converted_chars += scalar_converted_chars;
512
82
  }
513
96
  return converted_chars;
514
96
}
515
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
516
517
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
518
simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(
519
572
    const char *buf, size_t len, char *latin1_output) const noexcept {
520
572
  utf8_to_latin1::validating_transcoder converter;
521
572
  return converter.convert(buf, len, latin1_output);
522
572
}
523
524
simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(
525
530
    const char *buf, size_t len, char *latin1_output) const noexcept {
526
530
  utf8_to_latin1::validating_transcoder converter;
527
530
  return converter.convert_with_errors(buf, len, latin1_output);
528
530
}
529
530
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(
531
0
    const char *buf, size_t len, char *latin1_output) const noexcept {
532
0
  return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output);
533
0
}
534
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
535
536
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
537
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(
538
788
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
539
788
  utf8_to_utf16::validating_transcoder converter;
540
788
  return converter.convert<endianness::LITTLE>(buf, len, utf16_output);
541
788
}
542
543
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(
544
790
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
545
790
  utf8_to_utf16::validating_transcoder converter;
546
790
  return converter.convert<endianness::BIG>(buf, len, utf16_output);
547
790
}
548
549
simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(
550
644
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
551
644
  utf8_to_utf16::validating_transcoder converter;
552
644
  return converter.convert_with_errors<endianness::LITTLE>(buf, len,
553
644
                                                           utf16_output);
554
644
}
555
556
simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(
557
784
    const char *buf, size_t len, char16_t *utf16_output) const noexcept {
558
784
  utf8_to_utf16::validating_transcoder converter;
559
784
  return converter.convert_with_errors<endianness::BIG>(buf, len, utf16_output);
560
784
}
561
562
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(
563
420
    const char *input, size_t size, char16_t *utf16_output) const noexcept {
564
420
  return utf8_to_utf16::convert_valid<endianness::LITTLE>(input, size,
565
420
                                                          utf16_output);
566
420
}
567
568
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(
569
470
    const char *input, size_t size, char16_t *utf16_output) const noexcept {
570
470
  return utf8_to_utf16::convert_valid<endianness::BIG>(input, size,
571
470
                                                       utf16_output);
572
470
}
573
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
574
575
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
576
simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(
577
704
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
578
704
  utf8_to_utf32::validating_transcoder converter;
579
704
  return converter.convert(buf, len, utf32_output);
580
704
}
581
582
simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(
583
728
    const char *buf, size_t len, char32_t *utf32_output) const noexcept {
584
728
  utf8_to_utf32::validating_transcoder converter;
585
728
  return converter.convert_with_errors(buf, len, utf32_output);
586
728
}
587
588
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(
589
446
    const char *input, size_t size, char32_t *utf32_output) const noexcept {
590
446
  return utf8_to_utf32::convert_valid(input, size, utf32_output);
591
446
}
592
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
593
594
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
595
simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(
596
152
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
597
152
  std::pair<const char16_t *, char *> ret =
598
152
      sse_convert_utf16_to_latin1<endianness::LITTLE>(buf, len, latin1_output);
599
152
  if (ret.first == nullptr) {
600
54
    return 0;
601
54
  }
602
98
  size_t saved_bytes = ret.second - latin1_output;
603
604
98
  if (ret.first != buf + len) {
605
80
    const size_t scalar_saved_bytes =
606
80
        scalar::utf16_to_latin1::convert<endianness::LITTLE>(
607
80
            ret.first, len - (ret.first - buf), ret.second);
608
80
    if (scalar_saved_bytes == 0) {
609
28
      return 0;
610
28
    }
611
52
    saved_bytes += scalar_saved_bytes;
612
52
  }
613
70
  return saved_bytes;
614
98
}
615
616
simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(
617
120
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
618
120
  std::pair<const char16_t *, char *> ret =
619
120
      sse_convert_utf16_to_latin1<endianness::BIG>(buf, len, latin1_output);
620
120
  if (ret.first == nullptr) {
621
44
    return 0;
622
44
  }
623
76
  size_t saved_bytes = ret.second - latin1_output;
624
625
76
  if (ret.first != buf + len) {
626
58
    const size_t scalar_saved_bytes =
627
58
        scalar::utf16_to_latin1::convert<endianness::BIG>(
628
58
            ret.first, len - (ret.first - buf), ret.second);
629
58
    if (scalar_saved_bytes == 0) {
630
30
      return 0;
631
30
    }
632
28
    saved_bytes += scalar_saved_bytes;
633
28
  }
634
46
  return saved_bytes;
635
76
}
636
637
simdutf_warn_unused result
638
implementation::convert_utf16le_to_latin1_with_errors(
639
302
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
640
302
  std::pair<result, char *> ret =
641
302
      sse_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(
642
302
          buf, len, latin1_output);
643
302
  if (ret.first.error) {
644
168
    return ret.first;
645
168
  } // Can return directly since scalar fallback already found correct
646
    // ret.first.count
647
134
  if (ret.first.count != len) { // All good so far, but not finished
648
98
    result scalar_res =
649
98
        scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
650
98
            buf + ret.first.count, len - ret.first.count, ret.second);
651
98
    if (scalar_res.error) {
652
38
      scalar_res.count += ret.first.count;
653
38
      return scalar_res;
654
60
    } else {
655
60
      ret.second += scalar_res.count;
656
60
    }
657
98
  }
658
96
  ret.first.count =
659
96
      ret.second -
660
96
      latin1_output; // Set count to the number of 8-bit code units written
661
96
  return ret.first;
662
134
}
663
664
simdutf_warn_unused result
665
implementation::convert_utf16be_to_latin1_with_errors(
666
276
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
667
276
  std::pair<result, char *> ret =
668
276
      sse_convert_utf16_to_latin1_with_errors<endianness::BIG>(buf, len,
669
276
                                                               latin1_output);
670
276
  if (ret.first.error) {
671
148
    return ret.first;
672
148
  } // Can return directly since scalar fallback already found correct
673
    // ret.first.count
674
128
  if (ret.first.count != len) { // All good so far, but not finished
675
82
    result scalar_res =
676
82
        scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
677
82
            buf + ret.first.count, len - ret.first.count, ret.second);
678
82
    if (scalar_res.error) {
679
28
      scalar_res.count += ret.first.count;
680
28
      return scalar_res;
681
54
    } else {
682
54
      ret.second += scalar_res.count;
683
54
    }
684
82
  }
685
100
  ret.first.count =
686
100
      ret.second -
687
100
      latin1_output; // Set count to the number of 8-bit code units written
688
100
  return ret.first;
689
128
}
690
691
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(
692
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
693
  // optimization opportunity: we could provide an optimized function.
694
0
  return convert_utf16be_to_latin1(buf, len, latin1_output);
695
0
}
696
697
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(
698
0
    const char16_t *buf, size_t len, char *latin1_output) const noexcept {
699
  // optimization opportunity: we could provide an optimized function.
700
0
  return convert_utf16le_to_latin1(buf, len, latin1_output);
701
0
}
702
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
703
704
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
705
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(
706
852
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
707
852
  std::pair<const char16_t *, char *> ret =
708
852
      sse_convert_utf16_to_utf8<endianness::LITTLE>(buf, len, utf8_output);
709
852
  if (ret.first == nullptr) {
710
104
    return 0;
711
104
  }
712
748
  size_t saved_bytes = ret.second - utf8_output;
713
748
  if (ret.first != buf + len) {
714
746
    const size_t scalar_saved_bytes =
715
746
        scalar::utf16_to_utf8::convert<endianness::LITTLE>(
716
746
            ret.first, len - (ret.first - buf), ret.second);
717
746
    if (scalar_saved_bytes == 0) {
718
64
      return 0;
719
64
    }
720
682
    saved_bytes += scalar_saved_bytes;
721
682
  }
722
684
  return saved_bytes;
723
748
}
724
725
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(
726
682
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
727
682
  std::pair<const char16_t *, char *> ret =
728
682
      sse_convert_utf16_to_utf8<endianness::BIG>(buf, len, utf8_output);
729
682
  if (ret.first == nullptr) {
730
70
    return 0;
731
70
  }
732
612
  size_t saved_bytes = ret.second - utf8_output;
733
612
  if (ret.first != buf + len) {
734
610
    const size_t scalar_saved_bytes =
735
610
        scalar::utf16_to_utf8::convert<endianness::BIG>(
736
610
            ret.first, len - (ret.first - buf), ret.second);
737
610
    if (scalar_saved_bytes == 0) {
738
62
      return 0;
739
62
    }
740
548
    saved_bytes += scalar_saved_bytes;
741
548
  }
742
550
  return saved_bytes;
743
612
}
744
745
simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(
746
664
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
747
  // ret.first.count is always the position in the buffer, not the number of
748
  // code units written even if finished
749
664
  std::pair<result, char *> ret =
750
664
      westmere::sse_convert_utf16_to_utf8_with_errors<endianness::LITTLE>(
751
664
          buf, len, utf8_output);
752
664
  if (ret.first.error) {
753
138
    return ret.first;
754
138
  } // Can return directly since scalar fallback already found correct
755
    // ret.first.count
756
526
  if (ret.first.count != len) { // All good so far, but not finished
757
522
    result scalar_res =
758
522
        scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
759
522
            buf + ret.first.count, len - ret.first.count, ret.second);
760
522
    if (scalar_res.error) {
761
120
      scalar_res.count += ret.first.count;
762
120
      return scalar_res;
763
402
    } else {
764
402
      ret.second += scalar_res.count;
765
402
    }
766
522
  }
767
406
  ret.first.count =
768
406
      ret.second -
769
406
      utf8_output; // Set count to the number of 8-bit code units written
770
406
  return ret.first;
771
526
}
772
773
simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(
774
720
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
775
  // ret.first.count is always the position in the buffer, not the number of
776
  // code units written even if finished
777
720
  std::pair<result, char *> ret =
778
720
      westmere::sse_convert_utf16_to_utf8_with_errors<endianness::BIG>(
779
720
          buf, len, utf8_output);
780
720
  if (ret.first.error) {
781
200
    return ret.first;
782
200
  } // Can return directly since scalar fallback already found correct
783
    // ret.first.count
784
520
  if (ret.first.count != len) { // All good so far, but not finished
785
516
    result scalar_res =
786
516
        scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
787
516
            buf + ret.first.count, len - ret.first.count, ret.second);
788
516
    if (scalar_res.error) {
789
136
      scalar_res.count += ret.first.count;
790
136
      return scalar_res;
791
380
    } else {
792
380
      ret.second += scalar_res.count;
793
380
    }
794
516
  }
795
384
  ret.first.count =
796
384
      ret.second -
797
384
      utf8_output; // Set count to the number of 8-bit code units written
798
384
  return ret.first;
799
520
}
800
801
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(
802
436
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
803
436
  return convert_utf16le_to_utf8(buf, len, utf8_output);
804
436
}
805
806
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(
807
436
    const char16_t *buf, size_t len, char *utf8_output) const noexcept {
808
436
  return convert_utf16be_to_utf8(buf, len, utf8_output);
809
436
}
810
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
811
812
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
813
simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(
814
174
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
815
174
  std::pair<const char32_t *, char *> ret =
816
174
      sse_convert_utf32_to_latin1(buf, len, latin1_output);
817
174
  if (ret.first == nullptr) {
818
34
    return 0;
819
34
  }
820
140
  size_t saved_bytes = ret.second - latin1_output;
821
  // if (ret.first != buf + len) {
822
140
  if (ret.first < buf + len) {
823
126
    const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
824
126
        ret.first, len - (ret.first - buf), ret.second);
825
126
    if (scalar_saved_bytes == 0) {
826
92
      return 0;
827
92
    }
828
34
    saved_bytes += scalar_saved_bytes;
829
34
  }
830
48
  return saved_bytes;
831
140
}
832
833
simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(
834
444
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
835
  // ret.first.count is always the position in the buffer, not the number of
836
  // code units written even if finished
837
444
  std::pair<result, char *> ret =
838
444
      westmere::sse_convert_utf32_to_latin1_with_errors(buf, len,
839
444
                                                        latin1_output);
840
444
  if (ret.first.count != len) {
841
424
    result scalar_res = scalar::utf32_to_latin1::convert_with_errors(
842
424
        buf + ret.first.count, len - ret.first.count, ret.second);
843
424
    if (scalar_res.error) {
844
392
      scalar_res.count += ret.first.count;
845
392
      return scalar_res;
846
392
    } else {
847
32
      ret.second += scalar_res.count;
848
32
    }
849
424
  }
850
52
  ret.first.count =
851
52
      ret.second -
852
52
      latin1_output; // Set count to the number of 8-bit code units written
853
52
  return ret.first;
854
444
}
855
856
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(
857
0
    const char32_t *buf, size_t len, char *latin1_output) const noexcept {
858
  // optimization opportunity: we could provide an optimized function.
859
0
  return convert_utf32_to_latin1(buf, len, latin1_output);
860
0
}
861
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
862
863
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
864
simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(
865
992
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
866
992
  std::pair<const char32_t *, char *> ret =
867
992
      sse_convert_utf32_to_utf8(buf, len, utf8_output);
868
992
  if (ret.first == nullptr) {
869
352
    return 0;
870
352
  }
871
640
  size_t saved_bytes = ret.second - utf8_output;
872
640
  if (ret.first != buf + len) {
873
638
    const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
874
638
        ret.first, len - (ret.first - buf), ret.second);
875
638
    if (scalar_saved_bytes == 0) {
876
300
      return 0;
877
300
    }
878
338
    saved_bytes += scalar_saved_bytes;
879
338
  }
880
340
  return saved_bytes;
881
640
}
882
883
simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(
884
728
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
885
  // ret.first.count is always the position in the buffer, not the number of
886
  // code units written even if finished
887
728
  std::pair<result, char *> ret =
888
728
      westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
889
728
  if (ret.first.count != len) {
890
724
    result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
891
724
        buf + ret.first.count, len - ret.first.count, ret.second);
892
724
    if (scalar_res.error) {
893
544
      scalar_res.count += ret.first.count;
894
544
      return scalar_res;
895
544
    } else {
896
180
      ret.second += scalar_res.count;
897
180
    }
898
724
  }
899
184
  ret.first.count =
900
184
      ret.second -
901
184
      utf8_output; // Set count to the number of 8-bit code units written
902
184
  return ret.first;
903
728
}
904
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
905
906
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
907
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(
908
432
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
909
432
  std::pair<const char16_t *, char32_t *> ret =
910
432
      sse_convert_utf16_to_utf32<endianness::LITTLE>(buf, len, utf32_output);
911
432
  if (ret.first == nullptr) {
912
126
    return 0;
913
126
  }
914
306
  size_t saved_bytes = ret.second - utf32_output;
915
306
  if (ret.first != buf + len) {
916
240
    const size_t scalar_saved_bytes =
917
240
        scalar::utf16_to_utf32::convert<endianness::LITTLE>(
918
240
            ret.first, len - (ret.first - buf), ret.second);
919
240
    if (scalar_saved_bytes == 0) {
920
66
      return 0;
921
66
    }
922
174
    saved_bytes += scalar_saved_bytes;
923
174
  }
924
240
  return saved_bytes;
925
306
}
926
927
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(
928
418
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
929
418
  std::pair<const char16_t *, char32_t *> ret =
930
418
      sse_convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
931
418
  if (ret.first == nullptr) {
932
94
    return 0;
933
94
  }
934
324
  size_t saved_bytes = ret.second - utf32_output;
935
324
  if (ret.first != buf + len) {
936
244
    const size_t scalar_saved_bytes =
937
244
        scalar::utf16_to_utf32::convert<endianness::BIG>(
938
244
            ret.first, len - (ret.first - buf), ret.second);
939
244
    if (scalar_saved_bytes == 0) {
940
58
      return 0;
941
58
    }
942
186
    saved_bytes += scalar_saved_bytes;
943
186
  }
944
266
  return saved_bytes;
945
324
}
946
947
simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
948
312
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
949
  // ret.first.count is always the position in the buffer, not the number of
950
  // code units written even if finished
951
312
  std::pair<result, char32_t *> ret =
952
312
      westmere::sse_convert_utf16_to_utf32_with_errors<endianness::LITTLE>(
953
312
          buf, len, utf32_output);
954
312
  if (ret.first.error) {
955
110
    return ret.first;
956
110
  } // Can return directly since scalar fallback already found correct
957
    // ret.first.count
958
202
  if (ret.first.count != len) { // All good so far, but not finished
959
168
    result scalar_res =
960
168
        scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
961
168
            buf + ret.first.count, len - ret.first.count, ret.second);
962
168
    if (scalar_res.error) {
963
64
      scalar_res.count += ret.first.count;
964
64
      return scalar_res;
965
104
    } else {
966
104
      ret.second += scalar_res.count;
967
104
    }
968
168
  }
969
138
  ret.first.count =
970
138
      ret.second -
971
138
      utf32_output; // Set count to the number of 8-bit code units written
972
138
  return ret.first;
973
202
}
974
975
simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
976
364
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
977
  // ret.first.count is always the position in the buffer, not the number of
978
  // code units written even if finished
979
364
  std::pair<result, char32_t *> ret =
980
364
      westmere::sse_convert_utf16_to_utf32_with_errors<endianness::BIG>(
981
364
          buf, len, utf32_output);
982
364
  if (ret.first.error) {
983
126
    return ret.first;
984
126
  } // Can return directly since scalar fallback already found correct
985
    // ret.first.count
986
238
  if (ret.first.count != len) { // All good so far, but not finished
987
190
    result scalar_res =
988
190
        scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
989
190
            buf + ret.first.count, len - ret.first.count, ret.second);
990
190
    if (scalar_res.error) {
991
60
      scalar_res.count += ret.first.count;
992
60
      return scalar_res;
993
130
    } else {
994
130
      ret.second += scalar_res.count;
995
130
    }
996
190
  }
997
178
  ret.first.count =
998
178
      ret.second -
999
178
      utf32_output; // Set count to the number of 8-bit code units written
1000
178
  return ret.first;
1001
238
}
1002
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1003
1004
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1005
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(
1006
204
    const char32_t *buf, size_t len, char *utf8_output) const noexcept {
1007
204
  return convert_utf32_to_utf8(buf, len, utf8_output);
1008
204
}
1009
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1010
1011
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1012
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(
1013
792
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1014
792
  std::pair<const char32_t *, char16_t *> ret =
1015
792
      sse_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
1016
792
  if (ret.first == nullptr) {
1017
72
    return 0;
1018
72
  }
1019
720
  size_t saved_bytes = ret.second - utf16_output;
1020
720
  if (ret.first != buf + len) {
1021
716
    const size_t scalar_saved_bytes =
1022
716
        scalar::utf32_to_utf16::convert<endianness::LITTLE>(
1023
716
            ret.first, len - (ret.first - buf), ret.second);
1024
716
    if (scalar_saved_bytes == 0) {
1025
298
      return 0;
1026
298
    }
1027
418
    saved_bytes += scalar_saved_bytes;
1028
418
  }
1029
422
  return saved_bytes;
1030
720
}
1031
1032
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(
1033
616
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1034
616
  std::pair<const char32_t *, char16_t *> ret =
1035
616
      sse_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
1036
616
  if (ret.first == nullptr) {
1037
60
    return 0;
1038
60
  }
1039
556
  size_t saved_bytes = ret.second - utf16_output;
1040
556
  if (ret.first != buf + len) {
1041
554
    const size_t scalar_saved_bytes =
1042
554
        scalar::utf32_to_utf16::convert<endianness::BIG>(
1043
554
            ret.first, len - (ret.first - buf), ret.second);
1044
554
    if (scalar_saved_bytes == 0) {
1045
300
      return 0;
1046
300
    }
1047
254
    saved_bytes += scalar_saved_bytes;
1048
254
  }
1049
256
  return saved_bytes;
1050
556
}
1051
1052
simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
1053
724
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1054
  // ret.first.count is always the position in the buffer, not the number of
1055
  // code units written even if finished
1056
724
  std::pair<result, char16_t *> ret =
1057
724
      westmere::sse_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(
1058
724
          buf, len, utf16_output);
1059
724
  if (ret.first.count != len) {
1060
700
    result scalar_res =
1061
700
        scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
1062
700
            buf + ret.first.count, len - ret.first.count, ret.second);
1063
700
    if (scalar_res.error) {
1064
550
      scalar_res.count += ret.first.count;
1065
550
      return scalar_res;
1066
550
    } else {
1067
150
      ret.second += scalar_res.count;
1068
150
    }
1069
700
  }
1070
174
  ret.first.count =
1071
174
      ret.second -
1072
174
      utf16_output; // Set count to the number of 8-bit code units written
1073
174
  return ret.first;
1074
724
}
1075
1076
simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
1077
652
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1078
  // ret.first.count is always the position in the buffer, not the number of
1079
  // code units written even if finished
1080
652
  std::pair<result, char16_t *> ret =
1081
652
      westmere::sse_convert_utf32_to_utf16_with_errors<endianness::BIG>(
1082
652
          buf, len, utf16_output);
1083
652
  if (ret.first.count != len) {
1084
630
    result scalar_res =
1085
630
        scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
1086
630
            buf + ret.first.count, len - ret.first.count, ret.second);
1087
630
    if (scalar_res.error) {
1088
486
      scalar_res.count += ret.first.count;
1089
486
      return scalar_res;
1090
486
    } else {
1091
144
      ret.second += scalar_res.count;
1092
144
    }
1093
630
  }
1094
166
  ret.first.count =
1095
166
      ret.second -
1096
166
      utf16_output; // Set count to the number of 8-bit code units written
1097
166
  return ret.first;
1098
652
}
1099
1100
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(
1101
284
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1102
284
  return convert_utf32_to_utf16le(buf, len, utf16_output);
1103
284
}
1104
1105
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(
1106
128
    const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1107
128
  return convert_utf32_to_utf16be(buf, len, utf16_output);
1108
128
}
1109
1110
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(
1111
128
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1112
128
  return convert_utf16le_to_utf32(buf, len, utf32_output);
1113
128
}
1114
1115
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(
1116
182
    const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1117
182
  return convert_utf16be_to_utf32(buf, len, utf32_output);
1118
182
}
1119
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1120
1121
#if SIMDUTF_FEATURE_UTF16
1122
void implementation::change_endianness_utf16(const char16_t *input,
1123
                                             size_t length,
1124
0
                                             char16_t *output) const noexcept {
1125
0
  utf16::change_endianness_utf16(input, length, output);
1126
0
}
1127
1128
simdutf_warn_unused size_t implementation::count_utf16le(
1129
1.38k
    const char16_t *input, size_t length) const noexcept {
1130
1.38k
  return utf16::count_code_points<endianness::LITTLE>(input, length);
1131
1.38k
}
1132
1133
simdutf_warn_unused size_t implementation::count_utf16be(
1134
1.32k
    const char16_t *input, size_t length) const noexcept {
1135
1.32k
  return utf16::count_code_points<endianness::BIG>(input, length);
1136
1.32k
}
1137
#endif // SIMDUTF_FEATURE_UTF16
1138
1139
#if SIMDUTF_FEATURE_UTF8
1140
simdutf_warn_unused size_t
1141
4.02k
implementation::count_utf8(const char *input, size_t length) const noexcept {
1142
4.02k
  return utf8::count_code_points_bytemask(input, length);
1143
4.02k
}
1144
#endif // SIMDUTF_FEATURE_UTF8
1145
1146
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1147
simdutf_warn_unused size_t implementation::latin1_length_from_utf8(
1148
551
    const char *buf, size_t len) const noexcept {
1149
551
  return count_utf8(buf, len);
1150
551
}
1151
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1152
1153
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1154
simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(
1155
778
    const char16_t *input, size_t length) const noexcept {
1156
778
  return utf16::utf8_length_from_utf16_bytemask<endianness::LITTLE>(input,
1157
778
                                                                    length);
1158
778
}
1159
1160
simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(
1161
715
    const char16_t *input, size_t length) const noexcept {
1162
715
  return utf16::utf8_length_from_utf16_bytemask<endianness::BIG>(input, length);
1163
715
}
1164
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1165
1166
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1167
simdutf_warn_unused size_t implementation::utf8_length_from_latin1(
1168
302
    const char *input, size_t len) const noexcept {
1169
302
  const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
1170
302
  size_t answer = len / sizeof(__m128i) * sizeof(__m128i);
1171
302
  size_t i = 0;
1172
302
  if (answer >= 2048) { // long strings optimization
1173
153
    __m128i two_64bits = _mm_setzero_si128();
1174
6.46k
    while (i + sizeof(__m128i) <= len) {
1175
6.31k
      __m128i runner = _mm_setzero_si128();
1176
6.31k
      size_t iterations = (len - i) / sizeof(__m128i);
1177
6.31k
      if (iterations > 255) {
1178
6.16k
        iterations = 255;
1179
6.16k
      }
1180
6.31k
      size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i);
1181
397k
      for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) {
1182
391k
        __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i));
1183
391k
        __m128i input2 =
1184
391k
            _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i)));
1185
391k
        __m128i input3 =
1186
391k
            _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i)));
1187
391k
        __m128i input4 =
1188
391k
            _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i)));
1189
391k
        __m128i input12 =
1190
391k
            _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1),
1191
391k
                         _mm_cmpgt_epi8(_mm_setzero_si128(), input2));
1192
391k
        __m128i input34 =
1193
391k
            _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3),
1194
391k
                         _mm_cmpgt_epi8(_mm_setzero_si128(), input4));
1195
391k
        __m128i input1234 = _mm_add_epi8(input12, input34);
1196
391k
        runner = _mm_sub_epi8(runner, input1234);
1197
391k
      }
1198
25.1k
      for (; i <= max_i; i += sizeof(__m128i)) {
1199
18.8k
        __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i));
1200
18.8k
        runner = _mm_sub_epi8(runner,
1201
18.8k
                              _mm_cmpgt_epi8(_mm_setzero_si128(), more_input));
1202
18.8k
      }
1203
6.31k
      two_64bits =
1204
6.31k
          _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128()));
1205
6.31k
    }
1206
153
    answer +=
1207
153
        _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1);
1208
153
  } else if (answer > 0) { // short string optimization
1209
1.25k
    for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) {
1210
1.15k
      __m128i latin = _mm_loadu_si128((const __m128i *)(input + i));
1211
1.15k
      uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1212
1.15k
      answer += count_ones(non_ascii);
1213
1.15k
      latin = _mm_loadu_si128((const __m128i *)(input + i) + 1);
1214
1.15k
      non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1215
1.15k
      answer += count_ones(non_ascii);
1216
1.15k
    }
1217
140
    for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) {
1218
44
      __m128i latin = _mm_loadu_si128((const __m128i *)(input + i));
1219
44
      uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin);
1220
44
      answer += count_ones(non_ascii);
1221
44
    }
1222
96
  }
1223
302
  return answer + scalar::latin1::utf8_length_from_latin1(
1224
302
                      reinterpret_cast<const char *>(str + i), len - i);
1225
302
}
1226
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1227
1228
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1229
simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(
1230
380
    const char16_t *input, size_t length) const noexcept {
1231
380
  return utf16::utf32_length_from_utf16<endianness::LITTLE>(input, length);
1232
380
}
1233
1234
simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(
1235
407
    const char16_t *input, size_t length) const noexcept {
1236
407
  return utf16::utf32_length_from_utf16<endianness::BIG>(input, length);
1237
407
}
1238
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1239
1240
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1241
simdutf_warn_unused size_t implementation::utf16_length_from_utf8(
1242
1.96k
    const char *input, size_t length) const noexcept {
1243
1.96k
  return utf8::utf16_length_from_utf8_bytemask(input, length);
1244
1.96k
}
1245
simdutf_warn_unused result
1246
implementation::utf8_length_from_utf16le_with_replacement(
1247
0
    const char16_t *input, size_t length) const noexcept {
1248
0
  return utf16::utf8_length_from_utf16_with_replacement<endianness::LITTLE>(
1249
0
      input, length);
1250
0
}
1251
1252
simdutf_warn_unused result
1253
implementation::utf8_length_from_utf16be_with_replacement(
1254
0
    const char16_t *input, size_t length) const noexcept {
1255
0
  return utf16::utf8_length_from_utf16_with_replacement<endianness::BIG>(
1256
0
      input, length);
1257
0
}
1258
1259
simdutf_warn_unused size_t
1260
implementation::convert_utf16le_to_utf8_with_replacement(
1261
0
    const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
1262
0
  return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
1263
0
      input, length, utf8_buffer);
1264
0
}
1265
1266
simdutf_warn_unused size_t
1267
implementation::convert_utf16be_to_utf8_with_replacement(
1268
0
    const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
1269
0
  return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
1270
0
      input, length, utf8_buffer);
1271
0
}
1272
1273
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1274
1275
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1276
simdutf_warn_unused size_t implementation::utf8_length_from_utf32(
1277
869
    const char32_t *input, size_t length) const noexcept {
1278
869
  return utf32::utf8_length_from_utf32(input, length);
1279
869
}
1280
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1281
1282
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1283
simdutf_warn_unused size_t implementation::utf16_length_from_utf32(
1284
1.40k
    const char32_t *input, size_t length) const noexcept {
1285
1.40k
  const __m128i v_00000000 = _mm_setzero_si128();
1286
1.40k
  const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
1287
1.40k
  size_t pos = 0;
1288
1.40k
  size_t count = 0;
1289
1.06M
  for (; pos + 4 <= length; pos += 4) {
1290
1.06M
    __m128i in = _mm_loadu_si128((__m128i *)(input + pos));
1291
1.06M
    const __m128i surrogate_bytemask =
1292
1.06M
        _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000);
1293
1.06M
    const uint16_t surrogate_bitmask =
1294
1.06M
        static_cast<uint16_t>(_mm_movemask_epi8(surrogate_bytemask));
1295
1.06M
    size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4;
1296
1.06M
    count += 4 + surrogate_count;
1297
1.06M
  }
1298
1.40k
  return count +
1299
1.40k
         scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
1300
1.40k
}
1301
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1302
1303
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1304
simdutf_warn_unused size_t implementation::utf32_length_from_utf8(
1305
958
    const char *input, size_t length) const noexcept {
1306
958
  return utf8::count_code_points(input, length);
1307
958
}
1308
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1309
1310
#if SIMDUTF_FEATURE_BASE64
1311
simdutf_warn_unused result implementation::base64_to_binary(
1312
    const char *input, size_t length, char *output, base64_options options,
1313
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1314
0
  if (options & base64_default_or_url) {
1315
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1316
0
      return base64::compress_decode_base64<false, true, true>(
1317
0
          output, input, length, options, last_chunk_options);
1318
0
    } else {
1319
0
      return base64::compress_decode_base64<false, false, true>(
1320
0
          output, input, length, options, last_chunk_options);
1321
0
    }
1322
0
  } else if (options & base64_url) {
1323
0
    if (options == base64_options::base64_url_accept_garbage) {
1324
0
      return base64::compress_decode_base64<true, true, false>(
1325
0
          output, input, length, options, last_chunk_options);
1326
0
    } else {
1327
0
      return base64::compress_decode_base64<true, false, false>(
1328
0
          output, input, length, options, last_chunk_options);
1329
0
    }
1330
0
  } else {
1331
0
    if (options == base64_options::base64_default_accept_garbage) {
1332
0
      return base64::compress_decode_base64<false, true, false>(
1333
0
          output, input, length, options, last_chunk_options);
1334
0
    } else {
1335
0
      return base64::compress_decode_base64<false, false, false>(
1336
0
          output, input, length, options, last_chunk_options);
1337
0
    }
1338
0
  }
1339
0
}
1340
1341
simdutf_warn_unused full_result implementation::base64_to_binary_details(
1342
    const char *input, size_t length, char *output, base64_options options,
1343
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1344
0
  if (options & base64_default_or_url) {
1345
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1346
0
      return base64::compress_decode_base64<false, true, true>(
1347
0
          output, input, length, options, last_chunk_options);
1348
0
    } else {
1349
0
      return base64::compress_decode_base64<false, false, true>(
1350
0
          output, input, length, options, last_chunk_options);
1351
0
    }
1352
0
  } else if (options & base64_url) {
1353
0
    if (options == base64_options::base64_url_accept_garbage) {
1354
0
      return base64::compress_decode_base64<true, true, false>(
1355
0
          output, input, length, options, last_chunk_options);
1356
0
    } else {
1357
0
      return base64::compress_decode_base64<true, false, false>(
1358
0
          output, input, length, options, last_chunk_options);
1359
0
    }
1360
0
  } else {
1361
0
    if (options == base64_options::base64_default_accept_garbage) {
1362
0
      return base64::compress_decode_base64<false, true, false>(
1363
0
          output, input, length, options, last_chunk_options);
1364
0
    } else {
1365
0
      return base64::compress_decode_base64<false, false, false>(
1366
0
          output, input, length, options, last_chunk_options);
1367
0
    }
1368
0
  }
1369
0
}
1370
1371
simdutf_warn_unused result implementation::base64_to_binary(
1372
    const char16_t *input, size_t length, char *output, base64_options options,
1373
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1374
0
  if (options & base64_default_or_url) {
1375
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1376
0
      return base64::compress_decode_base64<false, true, true>(
1377
0
          output, input, length, options, last_chunk_options);
1378
0
    } else {
1379
0
      return base64::compress_decode_base64<false, false, true>(
1380
0
          output, input, length, options, last_chunk_options);
1381
0
    }
1382
0
  } else if (options & base64_url) {
1383
0
    if (options == base64_options::base64_url_accept_garbage) {
1384
0
      return base64::compress_decode_base64<true, true, false>(
1385
0
          output, input, length, options, last_chunk_options);
1386
0
    } else {
1387
0
      return base64::compress_decode_base64<true, false, false>(
1388
0
          output, input, length, options, last_chunk_options);
1389
0
    }
1390
0
  } else {
1391
0
    if (options == base64_options::base64_default_accept_garbage) {
1392
0
      return base64::compress_decode_base64<false, true, false>(
1393
0
          output, input, length, options, last_chunk_options);
1394
0
    } else {
1395
0
      return base64::compress_decode_base64<false, false, false>(
1396
0
          output, input, length, options, last_chunk_options);
1397
0
    }
1398
0
  }
1399
0
}
1400
1401
simdutf_warn_unused full_result implementation::base64_to_binary_details(
1402
    const char16_t *input, size_t length, char *output, base64_options options,
1403
0
    last_chunk_handling_options last_chunk_options) const noexcept {
1404
0
  if (options & base64_default_or_url) {
1405
0
    if (options == base64_options::base64_default_or_url_accept_garbage) {
1406
0
      return base64::compress_decode_base64<false, true, true>(
1407
0
          output, input, length, options, last_chunk_options);
1408
0
    } else {
1409
0
      return base64::compress_decode_base64<false, false, true>(
1410
0
          output, input, length, options, last_chunk_options);
1411
0
    }
1412
0
  } else if (options & base64_url) {
1413
0
    if (options == base64_options::base64_url_accept_garbage) {
1414
0
      return base64::compress_decode_base64<true, true, false>(
1415
0
          output, input, length, options, last_chunk_options);
1416
0
    } else {
1417
0
      return base64::compress_decode_base64<true, false, false>(
1418
0
          output, input, length, options, last_chunk_options);
1419
0
    }
1420
0
  } else {
1421
0
    if (options == base64_options::base64_default_accept_garbage) {
1422
0
      return base64::compress_decode_base64<false, true, false>(
1423
0
          output, input, length, options, last_chunk_options);
1424
0
    } else {
1425
0
      return base64::compress_decode_base64<false, false, false>(
1426
0
          output, input, length, options, last_chunk_options);
1427
0
    }
1428
0
  }
1429
0
}
1430
1431
size_t implementation::binary_to_base64(const char *input, size_t length,
1432
                                        char *output,
1433
0
                                        base64_options options) const noexcept {
1434
0
  if (options & base64_url) {
1435
0
    return encode_base64<true>(output, input, length, options);
1436
0
  } else {
1437
0
    return encode_base64<false>(output, input, length, options);
1438
0
  }
1439
0
}
1440
1441
size_t implementation::binary_to_base64_with_lines(
1442
    const char *input, size_t length, char *output, size_t line_length,
1443
0
    base64_options options) const noexcept {
1444
0
  if (options & base64_url) {
1445
0
    return encode_base64_impl<true, true>(output, input, length, options,
1446
0
                                          line_length);
1447
1448
0
  } else {
1449
0
    return encode_base64_impl<false, true>(output, input, length, options,
1450
0
                                           line_length);
1451
0
  }
1452
0
}
1453
1454
const char *implementation::find(const char *start, const char *end,
1455
0
                                 char character) const noexcept {
1456
0
  return util::find(start, end, character);
1457
0
}
1458
1459
const char16_t *implementation::find(const char16_t *start, const char16_t *end,
1460
0
                                     char16_t character) const noexcept {
1461
0
  return util::find(start, end, character);
1462
0
}
1463
#endif // SIMDUTF_FEATURE_BASE64
1464
1465
} // namespace SIMDUTF_IMPLEMENTATION
1466
} // namespace simdutf
1467
1468
#include "simdutf/westmere/end.h"