Coverage Report

Created: 2026-06-21 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boost/boost/json/detail/utf8.hpp
Line
Count
Source
1
//
2
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
3
//
4
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
//
7
// Official repository: https://github.com/boostorg/json
8
//
9
10
#ifndef BOOST_JSON_DETAIL_UTF8_HPP
11
#define BOOST_JSON_DETAIL_UTF8_HPP
12
13
#include <boost/endian/conversion.hpp>
14
#include <boost/json/detail/config.hpp>
15
16
#include <cstddef>
17
#include <cstring>
18
#include <cstdint>
19
20
namespace boost {
21
namespace json {
22
namespace detail {
23
24
template<int N>
25
std::uint32_t
26
load_little_endian(void const* p)
27
646k
{
28
646k
    std::uint32_t v = 0;
29
646k
    std::memcpy(&v, p, N);
30
646k
    endian::little_to_native_inplace(v);
31
646k
    return v;
32
646k
}
unsigned int boost::json::detail::load_little_endian<2>(void const*)
Line
Count
Source
27
17.2k
{
28
17.2k
    std::uint32_t v = 0;
29
17.2k
    std::memcpy(&v, p, N);
30
17.2k
    endian::little_to_native_inplace(v);
31
17.2k
    return v;
32
17.2k
}
unsigned int boost::json::detail::load_little_endian<3>(void const*)
Line
Count
Source
27
32.3k
{
28
32.3k
    std::uint32_t v = 0;
29
32.3k
    std::memcpy(&v, p, N);
30
32.3k
    endian::little_to_native_inplace(v);
31
32.3k
    return v;
32
32.3k
}
unsigned int boost::json::detail::load_little_endian<4>(void const*)
Line
Count
Source
27
596k
{
28
596k
    std::uint32_t v = 0;
29
596k
    std::memcpy(&v, p, N);
30
596k
    endian::little_to_native_inplace(v);
31
596k
    return v;
32
596k
}
33
34
inline
35
uint16_t
36
classify_utf8(char c)
37
81.4k
{
38
    // 0x000 = invalid
39
    // 0x102 = 2 bytes, second byte [80, BF]
40
    // 0x203 = 3 bytes, second byte [A0, BF]
41
    // 0x303 = 3 bytes, second byte [80, BF]
42
    // 0x403 = 3 bytes, second byte [80, 9F]
43
    // 0x504 = 4 bytes, second byte [90, BF]
44
    // 0x604 = 4 bytes, second byte [80, BF]
45
    // 0x704 = 4 bytes, second byte [80, 8F]
46
81.4k
    static constexpr uint16_t first[128]
47
81.4k
    {
48
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
49
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
50
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
51
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
52
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
53
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
54
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
55
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
56
57
81.4k
       0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
58
81.4k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
59
81.4k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
60
81.4k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
61
81.4k
       0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303,
62
81.4k
       0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303,
63
81.4k
       0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000,
64
81.4k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
65
81.4k
    };
66
81.4k
    return first[static_cast<unsigned char>(c & 0x7F)];
67
81.4k
}
68
69
inline
70
bool
71
is_valid_utf8(const char* p, uint16_t first)
72
80.6k
{
73
80.6k
    uint32_t v;
74
80.6k
    switch(first >> 8)
75
80.6k
    {
76
190
    default:
77
190
        return false;
78
79
    // 2 bytes, second byte [80, BF]
80
17.2k
    case 1:
81
17.2k
        v = load_little_endian<2>(p);
82
17.2k
        return (v & 0xC000) == 0x8000;
83
84
    // 3 bytes, second byte [A0, BF]
85
7.82k
    case 2:
86
7.82k
        v = load_little_endian<3>(p);
87
7.82k
        return (v & 0xC0E000) == 0x80A000;
88
89
    // 3 bytes, second byte [80, BF]
90
19.5k
    case 3:
91
19.5k
        v = load_little_endian<3>(p);
92
19.5k
        return (v & 0xC0C000) == 0x808000;
93
94
    // 3 bytes, second byte [80, 9F]
95
4.88k
    case 4:
96
4.88k
        v = load_little_endian<3>(p);
97
4.88k
        return (v & 0xC0E000) == 0x808000;
98
99
    // 4 bytes, second byte [90, BF]
100
3.71k
    case 5:
101
3.71k
        v = load_little_endian<4>(p);
102
3.71k
        return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00;
103
104
    // 4 bytes, second byte [80, BF]
105
25.5k
    case 6:
106
25.5k
        v = load_little_endian<4>(p);
107
25.5k
        return (v & 0xC0C0C000) == 0x80808000;
108
109
    // 4 bytes, second byte [80, 8F]
110
1.61k
    case 7:
111
1.61k
        v = load_little_endian<4>(p);
112
1.61k
        return (v & 0xC0C0F000) == 0x80808000;
113
80.6k
    }
114
80.6k
}
115
116
class utf8_sequence
117
{
118
    char seq_[4];
119
    uint16_t first_;
120
    uint8_t size_;
121
122
public:
123
    void
124
    save(
125
        const char* p,
126
        std::size_t remain) noexcept
127
796
    {
128
796
        first_ = classify_utf8(*p );
129
796
        if(remain >= length())
130
734
            size_ = length();
131
62
        else
132
62
            size_ = static_cast<uint8_t>(remain);
133
796
        std::memcpy(seq_, p, size_);
134
796
    }
135
136
    uint8_t
137
    length() const noexcept
138
2.39k
    {
139
2.39k
        return first_ & 0xFF;
140
2.39k
    }
141
142
    bool
143
    complete() const noexcept
144
796
    {
145
796
        return size_ >= length();
146
796
    }
147
148
    // returns true if complete
149
    bool
150
    append(
151
        const char* p,
152
        std::size_t remain) noexcept
153
22
    {
154
22
        if(BOOST_JSON_UNLIKELY(needed() == 0))
155
0
            return true;
156
22
        if(BOOST_JSON_LIKELY(remain >= needed()))
157
0
        {
158
0
            std::memcpy(
159
0
                seq_ + size_, p, needed());
160
0
            size_ = length();
161
0
            return true;
162
0
        }
163
22
        if(BOOST_JSON_LIKELY(remain > 0))
164
0
        {
165
0
            std::memcpy(seq_ + size_, p, remain);
166
0
            size_ += static_cast<uint8_t>(remain);
167
0
        }
168
22
        return false;
169
22
    }
170
171
    const char*
172
    data() const noexcept
173
0
    {
174
0
        return seq_;
175
0
    }
176
177
    uint8_t
178
    needed() const noexcept
179
66
    {
180
66
        return length() - size_;
181
66
    }
182
183
    bool
184
    valid() const noexcept
185
0
    {
186
        BOOST_ASSERT(size_ >= length());
187
0
        return is_valid_utf8(seq_, first_);
188
0
    }
189
};
190
191
} // detail
192
} // namespace json
193
} // namespace boost
194
195
#endif