Coverage Report

Created: 2025-11-11 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/src/generic/validate_utf16.h
Line
Count
Source
1
namespace simdutf {
2
namespace SIMDUTF_IMPLEMENTATION {
3
namespace {
4
namespace utf16 {
5
/*
6
    UTF-16 validation
7
    --------------------------------------------------
8
9
    In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning.
10
11
    In a vectorized algorithm we want to examine the most significant
12
    nibble in order to select a fast path. If none of highest nibbles
13
    are 0xD (13), than we are sure that UTF-16 chunk in a vector
14
    register is valid.
15
16
    Let us analyze what we need to check if the nibble is 0xD. The
17
    value of the preceding nibble determines what we have:
18
19
    0xd000 .. 0xd7ff - a valid word
20
    0xd800 .. 0xdbff - low surrogate
21
    0xdc00 .. 0xdfff - high surrogate
22
23
    Other constraints we have to consider:
24
    - there must not be two consecutive low surrogates (0xd800 .. 0xdbff)
25
    - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff)
26
    - there must not be sole low surrogate nor high surrogate
27
28
    We are going to build three bitmasks based on the 3rd nibble:
29
    - V = valid word,
30
    - L = low surrogate (0xd800 .. 0xdbff)
31
    - H = high surrogate (0xdc00 .. 0xdfff)
32
33
      0   1   2   3   4   5   6   7    <--- word index
34
    [ V | L | H | L | H | V | V | L ]
35
      1   0   0   0   0   1   1   0     - V = valid masks
36
      0   1   0   1   0   0   0   1     - L = low surrogate
37
      0   0   1   0   1   0   0   0     - H high surrogate
38
39
40
      1   0   0   0   0   1   1   0   V = valid masks
41
      0   1   0   1   0   0   0   0   a = L & (H >> 1)
42
      0   0   1   0   1   0   0   0   b = a << 1
43
      1   1   1   1   1   1   1   0   c = V | a | b
44
                                  ^
45
                                  the last bit can be zero, we just consume 7
46
   code units and recheck this word in the next iteration
47
*/
48
template <endianness big_endian>
49
30.2k
const result validate_utf16_with_errors(const char16_t *input, size_t size) {
50
30.2k
  if (simdutf_unlikely(size == 0)) {
51
20.5k
    return result(error_code::SUCCESS, 0);
52
20.5k
  }
53
54
9.72k
  const char16_t *start = input;
55
9.72k
  const char16_t *end = input + size;
56
57
9.72k
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
58
9.72k
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
59
9.72k
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
60
9.72k
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
61
62
68.0k
  while (input + simd16<uint16_t>::SIZE * 2 < end) {
63
    // 0. Load data: since the validation takes into account only higher
64
    //    byte of each word, we compress the two vectors into one which
65
    //    consists only the higher bytes.
66
58.7k
    auto in0 = simd16<uint16_t>(input);
67
58.7k
    auto in1 =
68
58.7k
        simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
69
70
    // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16
71
    // and yields a single vector having only higher bytes of characters.
72
58.7k
    const auto in = utf16_gather_high_bytes<big_endian>(in0, in1);
73
74
    // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
75
58.7k
    const auto surrogates_wordmask = (in & v_f8) == v_d8;
76
58.7k
    const uint16_t surrogates_bitmask =
77
58.7k
        static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
78
58.7k
    if (surrogates_bitmask == 0x0000) {
79
53.9k
      input += 16;
80
53.9k
    } else {
81
      // 2. We have some surrogates that have to be distinguished:
82
      //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
83
      //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
84
      //
85
      //    Fact: high surrogate has 11th bit set (3rd bit in the higher byte)
86
87
      // V - non-surrogate code units
88
      //     V = not surrogates_wordmask
89
4.89k
      const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
90
91
      // H - word-mask for high surrogates: the six highest bits are 0b1101'11
92
4.89k
      const auto vH = (in & v_fc) == v_dc;
93
4.89k
      const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
94
95
      // L - word mask for low surrogates
96
      //     L = not H and surrogates_wordmask
97
4.89k
      const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
98
99
4.89k
      const uint16_t a = static_cast<uint16_t>(
100
4.89k
          L & (H >> 1)); // A low surrogate must be followed by high one.
101
                         // (A low surrogate placed in the 7th register's word
102
                         // is an exception we handle.)
103
4.89k
      const uint16_t b = static_cast<uint16_t>(
104
4.89k
          a << 1); // Just mark that the opinput - startite fact is hold,
105
                   // thanks to that we have only two masks for valid case.
106
4.89k
      const uint16_t c = static_cast<uint16_t>(
107
4.89k
          V | a | b); // Combine all the masks into the final one.
108
109
4.89k
      if (c == 0xffff) {
110
        // The whole input register contains valid UTF-16, i.e.,
111
        // either single code units or proper surrogate pairs.
112
3.05k
        input += 16;
113
3.05k
      } else if (c == 0x7fff) {
114
        // The 15 lower code units of the input register contains valid UTF-16.
115
        // The 15th word may be either a low or high surrogate. It the next
116
        // iteration we 1) check if the low surrogate is followed by a high
117
        // one, 2) reject sole high surrogate.
118
1.41k
        input += 15;
119
1.41k
      } else {
120
428
        return result(error_code::SURROGATE, input - start);
121
428
      }
122
4.89k
    }
123
58.7k
  }
124
125
9.30k
  return result(error_code::SUCCESS, input - start);
126
9.72k
}
simdutf.cpp:simdutf::result const simdutf::haswell::(anonymous namespace)::utf16::validate_utf16_with_errors<(simdutf::endianness)0>(char16_t const*, unsigned long)
Line
Count
Source
49
7.76k
const result validate_utf16_with_errors(const char16_t *input, size_t size) {
50
7.76k
  if (simdutf_unlikely(size == 0)) {
51
5.11k
    return result(error_code::SUCCESS, 0);
52
5.11k
  }
53
54
2.65k
  const char16_t *start = input;
55
2.65k
  const char16_t *end = input + size;
56
57
2.65k
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
58
2.65k
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
59
2.65k
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
60
2.65k
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
61
62
14.5k
  while (input + simd16<uint16_t>::SIZE * 2 < end) {
63
    // 0. Load data: since the validation takes into account only higher
64
    //    byte of each word, we compress the two vectors into one which
65
    //    consists only the higher bytes.
66
12.0k
    auto in0 = simd16<uint16_t>(input);
67
12.0k
    auto in1 =
68
12.0k
        simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
69
70
    // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16
71
    // and yields a single vector having only higher bytes of characters.
72
12.0k
    const auto in = utf16_gather_high_bytes<big_endian>(in0, in1);
73
74
    // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
75
12.0k
    const auto surrogates_wordmask = (in & v_f8) == v_d8;
76
12.0k
    const uint16_t surrogates_bitmask =
77
12.0k
        static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
78
12.0k
    if (surrogates_bitmask == 0x0000) {
79
11.0k
      input += 16;
80
11.0k
    } else {
81
      // 2. We have some surrogates that have to be distinguished:
82
      //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
83
      //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
84
      //
85
      //    Fact: high surrogate has 11th bit set (3rd bit in the higher byte)
86
87
      // V - non-surrogate code units
88
      //     V = not surrogates_wordmask
89
912
      const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
90
91
      // H - word-mask for high surrogates: the six highest bits are 0b1101'11
92
912
      const auto vH = (in & v_fc) == v_dc;
93
912
      const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
94
95
      // L - word mask for low surrogates
96
      //     L = not H and surrogates_wordmask
97
912
      const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
98
99
912
      const uint16_t a = static_cast<uint16_t>(
100
912
          L & (H >> 1)); // A low surrogate must be followed by high one.
101
                         // (A low surrogate placed in the 7th register's word
102
                         // is an exception we handle.)
103
912
      const uint16_t b = static_cast<uint16_t>(
104
912
          a << 1); // Just mark that the opinput - startite fact is hold,
105
                   // thanks to that we have only two masks for valid case.
106
912
      const uint16_t c = static_cast<uint16_t>(
107
912
          V | a | b); // Combine all the masks into the final one.
108
109
912
      if (c == 0xffff) {
110
        // The whole input register contains valid UTF-16, i.e.,
111
        // either single code units or proper surrogate pairs.
112
560
        input += 16;
113
560
      } else if (c == 0x7fff) {
114
        // The 15 lower code units of the input register contains valid UTF-16.
115
        // The 15th word may be either a low or high surrogate. It the next
116
        // iteration we 1) check if the low surrogate is followed by a high
117
        // one, 2) reject sole high surrogate.
118
288
        input += 15;
119
288
      } else {
120
64
        return result(error_code::SURROGATE, input - start);
121
64
      }
122
912
    }
123
12.0k
  }
124
125
2.59k
  return result(error_code::SUCCESS, input - start);
126
2.65k
}
simdutf.cpp:simdutf::result const simdutf::haswell::(anonymous namespace)::utf16::validate_utf16_with_errors<(simdutf::endianness)1>(char16_t const*, unsigned long)
Line
Count
Source
49
7.75k
const result validate_utf16_with_errors(const char16_t *input, size_t size) {
50
7.75k
  if (simdutf_unlikely(size == 0)) {
51
5.12k
    return result(error_code::SUCCESS, 0);
52
5.12k
  }
53
54
2.63k
  const char16_t *start = input;
55
2.63k
  const char16_t *end = input + size;
56
57
2.63k
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
58
2.63k
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
59
2.63k
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
60
2.63k
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
61
62
14.5k
  while (input + simd16<uint16_t>::SIZE * 2 < end) {
63
    // 0. Load data: since the validation takes into account only higher
64
    //    byte of each word, we compress the two vectors into one which
65
    //    consists only the higher bytes.
66
12.0k
    auto in0 = simd16<uint16_t>(input);
67
12.0k
    auto in1 =
68
12.0k
        simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
69
70
    // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16
71
    // and yields a single vector having only higher bytes of characters.
72
12.0k
    const auto in = utf16_gather_high_bytes<big_endian>(in0, in1);
73
74
    // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
75
12.0k
    const auto surrogates_wordmask = (in & v_f8) == v_d8;
76
12.0k
    const uint16_t surrogates_bitmask =
77
12.0k
        static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
78
12.0k
    if (surrogates_bitmask == 0x0000) {
79
10.9k
      input += 16;
80
10.9k
    } else {
81
      // 2. We have some surrogates that have to be distinguished:
82
      //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
83
      //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
84
      //
85
      //    Fact: high surrogate has 11th bit set (3rd bit in the higher byte)
86
87
      // V - non-surrogate code units
88
      //     V = not surrogates_wordmask
89
1.09k
      const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
90
91
      // H - word-mask for high surrogates: the six highest bits are 0b1101'11
92
1.09k
      const auto vH = (in & v_fc) == v_dc;
93
1.09k
      const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
94
95
      // L - word mask for low surrogates
96
      //     L = not H and surrogates_wordmask
97
1.09k
      const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
98
99
1.09k
      const uint16_t a = static_cast<uint16_t>(
100
1.09k
          L & (H >> 1)); // A low surrogate must be followed by high one.
101
                         // (A low surrogate placed in the 7th register's word
102
                         // is an exception we handle.)
103
1.09k
      const uint16_t b = static_cast<uint16_t>(
104
1.09k
          a << 1); // Just mark that the opinput - startite fact is hold,
105
                   // thanks to that we have only two masks for valid case.
106
1.09k
      const uint16_t c = static_cast<uint16_t>(
107
1.09k
          V | a | b); // Combine all the masks into the final one.
108
109
1.09k
      if (c == 0xffff) {
110
        // The whole input register contains valid UTF-16, i.e.,
111
        // either single code units or proper surrogate pairs.
112
688
        input += 16;
113
688
      } else if (c == 0x7fff) {
114
        // The 15 lower code units of the input register contains valid UTF-16.
115
        // The 15th word may be either a low or high surrogate. It the next
116
        // iteration we 1) check if the low surrogate is followed by a high
117
        // one, 2) reject sole high surrogate.
118
308
        input += 15;
119
308
      } else {
120
100
        return result(error_code::SURROGATE, input - start);
121
100
      }
122
1.09k
    }
123
12.0k
  }
124
125
2.53k
  return result(error_code::SUCCESS, input - start);
126
2.63k
}
simdutf.cpp:simdutf::result const simdutf::westmere::(anonymous namespace)::utf16::validate_utf16_with_errors<(simdutf::endianness)0>(char16_t const*, unsigned long)
Line
Count
Source
49
7.36k
const result validate_utf16_with_errors(const char16_t *input, size_t size) {
50
7.36k
  if (simdutf_unlikely(size == 0)) {
51
5.12k
    return result(error_code::SUCCESS, 0);
52
5.12k
  }
53
54
2.23k
  const char16_t *start = input;
55
2.23k
  const char16_t *end = input + size;
56
57
2.23k
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
58
2.23k
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
59
2.23k
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
60
2.23k
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
61
62
19.3k
  while (input + simd16<uint16_t>::SIZE * 2 < end) {
63
    // 0. Load data: since the validation takes into account only higher
64
    //    byte of each word, we compress the two vectors into one which
65
    //    consists only the higher bytes.
66
17.2k
    auto in0 = simd16<uint16_t>(input);
67
17.2k
    auto in1 =
68
17.2k
        simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
69
70
    // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16
71
    // and yields a single vector having only higher bytes of characters.
72
17.2k
    const auto in = utf16_gather_high_bytes<big_endian>(in0, in1);
73
74
    // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
75
17.2k
    const auto surrogates_wordmask = (in & v_f8) == v_d8;
76
17.2k
    const uint16_t surrogates_bitmask =
77
17.2k
        static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
78
17.2k
    if (surrogates_bitmask == 0x0000) {
79
15.7k
      input += 16;
80
15.7k
    } else {
81
      // 2. We have some surrogates that have to be distinguished:
82
      //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
83
      //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
84
      //
85
      //    Fact: high surrogate has 11th bit set (3rd bit in the higher byte)
86
87
      // V - non-surrogate code units
88
      //     V = not surrogates_wordmask
89
1.43k
      const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
90
91
      // H - word-mask for high surrogates: the six highest bits are 0b1101'11
92
1.43k
      const auto vH = (in & v_fc) == v_dc;
93
1.43k
      const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
94
95
      // L - word mask for low surrogates
96
      //     L = not H and surrogates_wordmask
97
1.43k
      const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
98
99
1.43k
      const uint16_t a = static_cast<uint16_t>(
100
1.43k
          L & (H >> 1)); // A low surrogate must be followed by high one.
101
                         // (A low surrogate placed in the 7th register's word
102
                         // is an exception we handle.)
103
1.43k
      const uint16_t b = static_cast<uint16_t>(
104
1.43k
          a << 1); // Just mark that the opinput - startite fact is hold,
105
                   // thanks to that we have only two masks for valid case.
106
1.43k
      const uint16_t c = static_cast<uint16_t>(
107
1.43k
          V | a | b); // Combine all the masks into the final one.
108
109
1.43k
      if (c == 0xffff) {
110
        // The whole input register contains valid UTF-16, i.e.,
111
        // either single code units or proper surrogate pairs.
112
900
        input += 16;
113
900
      } else if (c == 0x7fff) {
114
        // The 15 lower code units of the input register contains valid UTF-16.
115
        // The 15th word may be either a low or high surrogate. It the next
116
        // iteration we 1) check if the low surrogate is followed by a high
117
        // one, 2) reject sole high surrogate.
118
405
        input += 15;
119
405
      } else {
120
128
        return result(error_code::SURROGATE, input - start);
121
128
      }
122
1.43k
    }
123
17.2k
  }
124
125
2.11k
  return result(error_code::SUCCESS, input - start);
126
2.23k
}
simdutf.cpp:simdutf::result const simdutf::westmere::(anonymous namespace)::utf16::validate_utf16_with_errors<(simdutf::endianness)1>(char16_t const*, unsigned long)
Line
Count
Source
49
7.34k
const result validate_utf16_with_errors(const char16_t *input, size_t size) {
50
7.34k
  if (simdutf_unlikely(size == 0)) {
51
5.14k
    return result(error_code::SUCCESS, 0);
52
5.14k
  }
53
54
2.20k
  const char16_t *start = input;
55
2.20k
  const char16_t *end = input + size;
56
57
2.20k
  const auto v_d8 = simd8<uint8_t>::splat(0xd8);
58
2.20k
  const auto v_f8 = simd8<uint8_t>::splat(0xf8);
59
2.20k
  const auto v_fc = simd8<uint8_t>::splat(0xfc);
60
2.20k
  const auto v_dc = simd8<uint8_t>::splat(0xdc);
61
62
19.5k
  while (input + simd16<uint16_t>::SIZE * 2 < end) {
63
    // 0. Load data: since the validation takes into account only higher
64
    //    byte of each word, we compress the two vectors into one which
65
    //    consists only the higher bytes.
66
17.5k
    auto in0 = simd16<uint16_t>(input);
67
17.5k
    auto in1 =
68
17.5k
        simd16<uint16_t>(input + simd16<uint16_t>::SIZE / sizeof(char16_t));
69
70
    // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16
71
    // and yields a single vector having only higher bytes of characters.
72
17.5k
    const auto in = utf16_gather_high_bytes<big_endian>(in0, in1);
73
74
    // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
75
17.5k
    const auto surrogates_wordmask = (in & v_f8) == v_d8;
76
17.5k
    const uint16_t surrogates_bitmask =
77
17.5k
        static_cast<uint16_t>(surrogates_wordmask.to_bitmask());
78
17.5k
    if (surrogates_bitmask == 0x0000) {
79
16.0k
      input += 16;
80
16.0k
    } else {
81
      // 2. We have some surrogates that have to be distinguished:
82
      //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
83
      //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
84
      //
85
      //    Fact: high surrogate has 11th bit set (3rd bit in the higher byte)
86
87
      // V - non-surrogate code units
88
      //     V = not surrogates_wordmask
89
1.45k
      const uint16_t V = static_cast<uint16_t>(~surrogates_bitmask);
90
91
      // H - word-mask for high surrogates: the six highest bits are 0b1101'11
92
1.45k
      const auto vH = (in & v_fc) == v_dc;
93
1.45k
      const uint16_t H = static_cast<uint16_t>(vH.to_bitmask());
94
95
      // L - word mask for low surrogates
96
      //     L = not H and surrogates_wordmask
97
1.45k
      const uint16_t L = static_cast<uint16_t>(~H & surrogates_bitmask);
98
99
1.45k
      const uint16_t a = static_cast<uint16_t>(
100
1.45k
          L & (H >> 1)); // A low surrogate must be followed by high one.
101
                         // (A low surrogate placed in the 7th register's word
102
                         // is an exception we handle.)
103
1.45k
      const uint16_t b = static_cast<uint16_t>(
104
1.45k
          a << 1); // Just mark that the opinput - startite fact is hold,
105
                   // thanks to that we have only two masks for valid case.
106
1.45k
      const uint16_t c = static_cast<uint16_t>(
107
1.45k
          V | a | b); // Combine all the masks into the final one.
108
109
1.45k
      if (c == 0xffff) {
110
        // The whole input register contains valid UTF-16, i.e.,
111
        // either single code units or proper surrogate pairs.
112
908
        input += 16;
113
908
      } else if (c == 0x7fff) {
114
        // The 15 lower code units of the input register contains valid UTF-16.
115
        // The 15th word may be either a low or high surrogate. It the next
116
        // iteration we 1) check if the low surrogate is followed by a high
117
        // one, 2) reject sole high surrogate.
118
413
        input += 15;
119
413
      } else {
120
136
        return result(error_code::SURROGATE, input - start);
121
136
      }
122
1.45k
    }
123
17.5k
  }
124
125
2.06k
  return result(error_code::SUCCESS, input - start);
126
2.20k
}
127
128
template <endianness big_endian>
129
const result validate_utf16_as_ascii_with_errors(const char16_t *input,
130
0
                                                 size_t size) {
131
0
  if (simdutf_unlikely(size == 0)) {
132
0
    return result(error_code::SUCCESS, 0);
133
0
  }
134
0
  size_t pos = 0;
135
0
  for (; pos < size / 32 * 32; pos += 32) {
136
0
    simd16x32<uint16_t> input_vec(
137
0
        reinterpret_cast<const uint16_t *>(input + pos));
138
0
    if (!match_system(big_endian)) {
139
0
      input_vec.swap_bytes();
140
0
    }
141
0
    uint64_t matches = input_vec.lteq(uint16_t(0x7f));
142
0
    if (~matches) {
143
      // Found a match, return the first one
144
0
      int index = trailing_zeroes(~matches) / 2;
145
0
      return result(error_code::TOO_LARGE, pos + index);
146
0
    }
147
0
  }
148
149
  // Scalar tail
150
0
  while (pos < size) {
151
0
    char16_t v = big_endian ? scalar::u16_swap_bytes(input[pos]) : input[pos];
152
0
    if (v > 0x7F) {
153
0
      return result(error_code::TOO_LARGE, pos);
154
0
    }
155
0
    pos++;
156
0
  }
157
0
  return result(error_code::SUCCESS, size);
158
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::result const simdutf::haswell::(anonymous namespace)::utf16::validate_utf16_as_ascii_with_errors<(simdutf::endianness)0>(char16_t const*, unsigned long)
Unexecuted instantiation: simdutf.cpp:simdutf::result const simdutf::haswell::(anonymous namespace)::utf16::validate_utf16_as_ascii_with_errors<(simdutf::endianness)1>(char16_t const*, unsigned long)
Unexecuted instantiation: simdutf.cpp:simdutf::result const simdutf::westmere::(anonymous namespace)::utf16::validate_utf16_as_ascii_with_errors<(simdutf::endianness)0>(char16_t const*, unsigned long)
Unexecuted instantiation: simdutf.cpp:simdutf::result const simdutf::westmere::(anonymous namespace)::utf16::validate_utf16_as_ascii_with_errors<(simdutf::endianness)1>(char16_t const*, unsigned long)
159
160
} // namespace utf16
161
} // unnamed namespace
162
} // namespace SIMDUTF_IMPLEMENTATION
163
} // namespace simdutf