Coverage Report

Created: 2025-10-10 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/src/generic/validate_utf32.h
Line
Count
Source
1
namespace simdutf {
2
namespace SIMDUTF_IMPLEMENTATION {
3
namespace {
4
namespace utf32 {
5
6
4.88k
simdutf_really_inline bool validate(const char32_t *input, size_t size) {
7
4.88k
  if (simdutf_unlikely(size == 0)) {
8
    // empty input is valid UTF-32. protect the implementation from
9
    // handling nullptr
10
46
    return true;
11
46
  }
12
13
4.84k
  const char32_t *end = input + size;
14
15
4.84k
  using vector_u32 = simd32<uint32_t>;
16
17
4.84k
  const auto standardmax = vector_u32::splat(0x10ffff);
18
4.84k
  const auto offset = vector_u32::splat(0xffff2000);
19
4.84k
  const auto standardoffsetmax = vector_u32::splat(0xfffff7ff);
20
4.84k
  auto currentmax = vector_u32::zero();
21
4.84k
  auto currentoffsetmax = vector_u32::zero();
22
23
4.84k
  constexpr size_t N = vector_u32::ELEMENTS;
24
25
3.47M
  while (input + N < end) {
26
3.47M
    auto in = vector_u32(input);
27
3.47M
    if (!match_system(endianness::BIG)) {
28
3.47M
      in.swap_bytes();
29
3.47M
    }
30
31
3.47M
    currentmax = max(currentmax, in);
32
3.47M
    currentoffsetmax = max(currentoffsetmax, in + offset);
33
3.47M
    input += N;
34
3.47M
  }
35
36
4.84k
  const auto too_large = currentmax > standardmax;
37
4.84k
  if (too_large.any()) {
38
1.80k
    return false;
39
1.80k
  }
40
41
3.03k
  const auto surrogate = currentoffsetmax > standardoffsetmax;
42
3.03k
  if (surrogate.any()) {
43
43
    return false;
44
43
  }
45
46
2.98k
  return scalar::utf32::validate(input, end - input);
47
3.03k
}
simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf32::validate(char32_t const*, unsigned long)
Line
Count
Source
6
2.44k
simdutf_really_inline bool validate(const char32_t *input, size_t size) {
7
2.44k
  if (simdutf_unlikely(size == 0)) {
8
    // empty input is valid UTF-32. protect the implementation from
9
    // handling nullptr
10
23
    return true;
11
23
  }
12
13
2.42k
  const char32_t *end = input + size;
14
15
2.42k
  using vector_u32 = simd32<uint32_t>;
16
17
2.42k
  const auto standardmax = vector_u32::splat(0x10ffff);
18
2.42k
  const auto offset = vector_u32::splat(0xffff2000);
19
2.42k
  const auto standardoffsetmax = vector_u32::splat(0xfffff7ff);
20
2.42k
  auto currentmax = vector_u32::zero();
21
2.42k
  auto currentoffsetmax = vector_u32::zero();
22
23
2.42k
  constexpr size_t N = vector_u32::ELEMENTS;
24
25
1.15M
  while (input + N < end) {
26
1.15M
    auto in = vector_u32(input);
27
1.15M
    if (!match_system(endianness::BIG)) {
28
1.15M
      in.swap_bytes();
29
1.15M
    }
30
31
1.15M
    currentmax = max(currentmax, in);
32
1.15M
    currentoffsetmax = max(currentoffsetmax, in + offset);
33
1.15M
    input += N;
34
1.15M
  }
35
36
2.42k
  const auto too_large = currentmax > standardmax;
37
2.42k
  if (too_large.any()) {
38
838
    return false;
39
838
  }
40
41
1.58k
  const auto surrogate = currentoffsetmax > standardoffsetmax;
42
1.58k
  if (surrogate.any()) {
43
20
    return false;
44
20
  }
45
46
1.56k
  return scalar::utf32::validate(input, end - input);
47
1.58k
}
simdutf.cpp:simdutf::westmere::(anonymous namespace)::utf32::validate(char32_t const*, unsigned long)
Line
Count
Source
6
2.44k
simdutf_really_inline bool validate(const char32_t *input, size_t size) {
7
2.44k
  if (simdutf_unlikely(size == 0)) {
8
    // empty input is valid UTF-32. protect the implementation from
9
    // handling nullptr
10
23
    return true;
11
23
  }
12
13
2.42k
  const char32_t *end = input + size;
14
15
2.42k
  using vector_u32 = simd32<uint32_t>;
16
17
2.42k
  const auto standardmax = vector_u32::splat(0x10ffff);
18
2.42k
  const auto offset = vector_u32::splat(0xffff2000);
19
2.42k
  const auto standardoffsetmax = vector_u32::splat(0xfffff7ff);
20
2.42k
  auto currentmax = vector_u32::zero();
21
2.42k
  auto currentoffsetmax = vector_u32::zero();
22
23
2.42k
  constexpr size_t N = vector_u32::ELEMENTS;
24
25
2.31M
  while (input + N < end) {
26
2.31M
    auto in = vector_u32(input);
27
2.31M
    if (!match_system(endianness::BIG)) {
28
2.31M
      in.swap_bytes();
29
2.31M
    }
30
31
2.31M
    currentmax = max(currentmax, in);
32
2.31M
    currentoffsetmax = max(currentoffsetmax, in + offset);
33
2.31M
    input += N;
34
2.31M
  }
35
36
2.42k
  const auto too_large = currentmax > standardmax;
37
2.42k
  if (too_large.any()) {
38
971
    return false;
39
971
  }
40
41
1.44k
  const auto surrogate = currentoffsetmax > standardoffsetmax;
42
1.44k
  if (surrogate.any()) {
43
23
    return false;
44
23
  }
45
46
1.42k
  return scalar::utf32::validate(input, end - input);
47
1.44k
}
48
49
simdutf_really_inline result validate_with_errors(const char32_t *input,
50
4.88k
                                                  size_t size) {
51
4.88k
  if (simdutf_unlikely(size == 0)) {
52
    // empty input is valid UTF-32. protect the implementation from
53
    // handling nullptr
54
46
    return result(error_code::SUCCESS, 0);
55
46
  }
56
57
4.84k
  const char32_t *start = input;
58
4.84k
  const char32_t *end = input + size;
59
60
4.84k
  using vector_u32 = simd32<uint32_t>;
61
62
4.84k
  const auto standardmax = vector_u32::splat(0x10ffff + 1);
63
4.84k
  const auto surrogate_mask = vector_u32::splat(0xfffff800);
64
4.84k
  const auto surrogate_byte = vector_u32::splat(0x0000d800);
65
66
4.84k
  constexpr size_t N = vector_u32::ELEMENTS;
67
68
375k
  while (input + N < end) {
69
372k
    auto in = vector_u32(input);
70
372k
    if (!match_system(endianness::BIG)) {
71
372k
      in.swap_bytes();
72
372k
    }
73
74
372k
    const auto too_large = in >= standardmax;
75
372k
    const auto surrogate = (in & surrogate_mask) == surrogate_byte;
76
77
372k
    const auto combined = too_large | surrogate;
78
372k
    if (simdutf_unlikely(combined.any())) {
79
1.85k
      const size_t consumed = input - start;
80
1.85k
      auto sr = scalar::utf32::validate_with_errors(input, end - input);
81
1.85k
      sr.count += consumed;
82
83
1.85k
      return sr;
84
1.85k
    }
85
86
370k
    input += N;
87
370k
  }
88
89
2.98k
  const size_t consumed = input - start;
90
2.98k
  auto sr = scalar::utf32::validate_with_errors(input, end - input);
91
2.98k
  sr.count += consumed;
92
93
2.98k
  return sr;
94
4.84k
}
simdutf.cpp:simdutf::haswell::(anonymous namespace)::utf32::validate_with_errors(char32_t const*, unsigned long)
Line
Count
Source
50
2.44k
                                                  size_t size) {
51
2.44k
  if (simdutf_unlikely(size == 0)) {
52
    // empty input is valid UTF-32. protect the implementation from
53
    // handling nullptr
54
23
    return result(error_code::SUCCESS, 0);
55
23
  }
56
57
2.42k
  const char32_t *start = input;
58
2.42k
  const char32_t *end = input + size;
59
60
2.42k
  using vector_u32 = simd32<uint32_t>;
61
62
2.42k
  const auto standardmax = vector_u32::splat(0x10ffff + 1);
63
2.42k
  const auto surrogate_mask = vector_u32::splat(0xfffff800);
64
2.42k
  const auto surrogate_byte = vector_u32::splat(0x0000d800);
65
66
2.42k
  constexpr size_t N = vector_u32::ELEMENTS;
67
68
125k
  while (input + N < end) {
69
124k
    auto in = vector_u32(input);
70
124k
    if (!match_system(endianness::BIG)) {
71
124k
      in.swap_bytes();
72
124k
    }
73
74
124k
    const auto too_large = in >= standardmax;
75
124k
    const auto surrogate = (in & surrogate_mask) == surrogate_byte;
76
77
124k
    const auto combined = too_large | surrogate;
78
124k
    if (simdutf_unlikely(combined.any())) {
79
858
      const size_t consumed = input - start;
80
858
      auto sr = scalar::utf32::validate_with_errors(input, end - input);
81
858
      sr.count += consumed;
82
83
858
      return sr;
84
858
    }
85
86
123k
    input += N;
87
123k
  }
88
89
1.56k
  const size_t consumed = input - start;
90
1.56k
  auto sr = scalar::utf32::validate_with_errors(input, end - input);
91
1.56k
  sr.count += consumed;
92
93
1.56k
  return sr;
94
2.42k
}
simdutf.cpp:simdutf::westmere::(anonymous namespace)::utf32::validate_with_errors(char32_t const*, unsigned long)
Line
Count
Source
50
2.44k
                                                  size_t size) {
51
2.44k
  if (simdutf_unlikely(size == 0)) {
52
    // empty input is valid UTF-32. protect the implementation from
53
    // handling nullptr
54
23
    return result(error_code::SUCCESS, 0);
55
23
  }
56
57
2.42k
  const char32_t *start = input;
58
2.42k
  const char32_t *end = input + size;
59
60
2.42k
  using vector_u32 = simd32<uint32_t>;
61
62
2.42k
  const auto standardmax = vector_u32::splat(0x10ffff + 1);
63
2.42k
  const auto surrogate_mask = vector_u32::splat(0xfffff800);
64
2.42k
  const auto surrogate_byte = vector_u32::splat(0x0000d800);
65
66
2.42k
  constexpr size_t N = vector_u32::ELEMENTS;
67
68
249k
  while (input + N < end) {
69
248k
    auto in = vector_u32(input);
70
248k
    if (!match_system(endianness::BIG)) {
71
248k
      in.swap_bytes();
72
248k
    }
73
74
248k
    const auto too_large = in >= standardmax;
75
248k
    const auto surrogate = (in & surrogate_mask) == surrogate_byte;
76
77
248k
    const auto combined = too_large | surrogate;
78
248k
    if (simdutf_unlikely(combined.any())) {
79
994
      const size_t consumed = input - start;
80
994
      auto sr = scalar::utf32::validate_with_errors(input, end - input);
81
994
      sr.count += consumed;
82
83
994
      return sr;
84
994
    }
85
86
247k
    input += N;
87
247k
  }
88
89
1.42k
  const size_t consumed = input - start;
90
1.42k
  auto sr = scalar::utf32::validate_with_errors(input, end - input);
91
1.42k
  sr.count += consumed;
92
93
1.42k
  return sr;
94
2.42k
}
95
96
} // namespace utf32
97
} // unnamed namespace
98
} // namespace SIMDUTF_IMPLEMENTATION
99
} // namespace simdutf