Coverage Report

Created: 2025-10-12 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdutf/src/scalar/utf32.h
Line
Count
Source
1
#ifndef SIMDUTF_UTF32_H
2
#define SIMDUTF_UTF32_H
3
4
namespace simdutf {
5
namespace scalar {
6
namespace {
7
namespace utf32 {
8
9
inline simdutf_warn_unused bool validate(const char32_t *buf,
10
14.1k
                                         size_t len) noexcept {
11
14.1k
  const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
12
14.1k
  uint64_t pos = 0;
13
129k
  for (; pos < len; pos++) {
14
115k
    uint32_t word = data[pos];
15
115k
    if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) {
16
0
      return false;
17
0
    }
18
115k
  }
19
14.1k
  return true;
20
14.1k
}
21
22
inline simdutf_warn_unused result validate_with_errors(const char32_t *buf,
23
0
                                                       size_t len) noexcept {
24
0
  const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
25
0
  size_t pos = 0;
26
0
  for (; pos < len; pos++) {
27
0
    uint32_t word = data[pos];
28
0
    if (word > 0x10FFFF) {
29
0
      return result(error_code::TOO_LARGE, pos);
30
0
    }
31
0
    if (word >= 0xD800 && word <= 0xDFFF) {
32
0
      return result(error_code::SURROGATE, pos);
33
0
    }
34
0
  }
35
0
  return result(error_code::SUCCESS, pos);
36
0
}
37
38
15.1k
inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) {
39
  // We are not BOM aware.
40
15.1k
  const uint32_t *p = reinterpret_cast<const uint32_t *>(buf);
41
15.1k
  size_t counter{0};
42
58.9k
  for (size_t i = 0; i < len; i++) {
43
    // credit: @ttsugriy  for the vectorizable approach
44
43.8k
    counter++;                                     // ASCII
45
43.8k
    counter += static_cast<size_t>(p[i] > 0x7F);   // two-byte
46
43.8k
    counter += static_cast<size_t>(p[i] > 0x7FF);  // three-byte
47
43.8k
    counter += static_cast<size_t>(p[i] > 0xFFFF); // four-bytes
48
43.8k
  }
49
15.1k
  return counter;
50
15.1k
}
51
52
0
inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) {
53
  // We are not BOM aware.
54
0
  const uint32_t *p = reinterpret_cast<const uint32_t *>(buf);
55
0
  size_t counter{0};
56
0
  for (size_t i = 0; i < len; i++) {
57
0
    counter++;                                     // non-surrogate word
58
0
    counter += static_cast<size_t>(p[i] > 0xFFFF); // surrogate pair
59
0
  }
60
0
  return counter;
61
0
}
62
63
} // namespace utf32
64
} // unnamed namespace
65
} // namespace scalar
66
} // namespace simdutf
67
68
#endif