Coverage Report

Created: 2026-01-13 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boost/boost/json/detail/utf8.hpp
Line
Count
Source
1
//
2
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
3
//
4
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
//
7
// Official repository: https://github.com/boostorg/json
8
//
9
10
#ifndef BOOST_JSON_DETAIL_UTF8_HPP
11
#define BOOST_JSON_DETAIL_UTF8_HPP
12
13
#include <boost/endian/conversion.hpp>
14
#include <boost/json/detail/config.hpp>
15
16
#include <cstddef>
17
#include <cstring>
18
#include <cstdint>
19
20
namespace boost {
21
namespace json {
22
namespace detail {
23
24
template<int N>
25
std::uint32_t
26
load_little_endian(void const* p)
27
785k
{
28
785k
    std::uint32_t v = 0;
29
785k
    std::memcpy(&v, p, N);
30
785k
    endian::little_to_native_inplace(v);
31
785k
    return v;
32
785k
}
unsigned int boost::json::detail::load_little_endian<2>(void const*)
Line
Count
Source
27
12.6k
{
28
12.6k
    std::uint32_t v = 0;
29
12.6k
    std::memcpy(&v, p, N);
30
12.6k
    endian::little_to_native_inplace(v);
31
12.6k
    return v;
32
12.6k
}
unsigned int boost::json::detail::load_little_endian<3>(void const*)
Line
Count
Source
27
20.1k
{
28
20.1k
    std::uint32_t v = 0;
29
20.1k
    std::memcpy(&v, p, N);
30
20.1k
    endian::little_to_native_inplace(v);
31
20.1k
    return v;
32
20.1k
}
unsigned int boost::json::detail::load_little_endian<4>(void const*)
Line
Count
Source
27
752k
{
28
752k
    std::uint32_t v = 0;
29
752k
    std::memcpy(&v, p, N);
30
752k
    endian::little_to_native_inplace(v);
31
752k
    return v;
32
752k
}
33
34
inline
35
uint16_t
36
classify_utf8(char c)
37
58.1k
{
38
    // 0x000 = invalid
39
    // 0x102 = 2 bytes, second byte [80, BF]
40
    // 0x203 = 3 bytes, second byte [A0, BF]
41
    // 0x303 = 3 bytes, second byte [80, BF]
42
    // 0x403 = 3 bytes, second byte [80, 9F]
43
    // 0x504 = 4 bytes, second byte [90, BF]
44
    // 0x604 = 4 bytes, second byte [80, BF]
45
    // 0x704 = 4 bytes, second byte [80, 8F]
46
58.1k
    static constexpr uint16_t first[128]
47
58.1k
    {
48
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
49
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
50
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
51
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
52
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
53
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
54
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
55
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
56
57
58.1k
       0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
58
58.1k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
59
58.1k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
60
58.1k
       0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
61
58.1k
       0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303,
62
58.1k
       0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303,
63
58.1k
       0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000,
64
58.1k
       0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
65
58.1k
    };
66
58.1k
    return first[static_cast<unsigned char>(c & 0x7F)];
67
58.1k
}
68
69
inline
70
bool
71
is_valid_utf8(const char* p, uint16_t first)
72
56.9k
{
73
56.9k
    uint32_t v;
74
56.9k
    switch(first >> 8)
75
56.9k
    {
76
304
    default:
77
304
        return false;
78
79
    // 2 bytes, second byte [80, BF]
80
12.6k
    case 1:
81
12.6k
        v = load_little_endian<2>(p);
82
12.6k
        return (v & 0xC000) == 0x8000;
83
84
    // 3 bytes, second byte [A0, BF]
85
8.76k
    case 2:
86
8.76k
        v = load_little_endian<3>(p);
87
8.76k
        return (v & 0xC0E000) == 0x80A000;
88
89
    // 3 bytes, second byte [80, BF]
90
7.59k
    case 3:
91
7.59k
        v = load_little_endian<3>(p);
92
7.59k
        return (v & 0xC0C000) == 0x808000;
93
94
    // 3 bytes, second byte [80, 9F]
95
3.82k
    case 4:
96
3.82k
        v = load_little_endian<3>(p);
97
3.82k
        return (v & 0xC0E000) == 0x808000;
98
99
    // 4 bytes, second byte [90, BF]
100
5.35k
    case 5:
101
5.35k
        v = load_little_endian<4>(p);
102
5.35k
        return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00;
103
104
    // 4 bytes, second byte [80, BF]
105
16.7k
    case 6:
106
16.7k
        v = load_little_endian<4>(p);
107
16.7k
        return (v & 0xC0C0C000) == 0x80808000;
108
109
    // 4 bytes, second byte [80, 8F]
110
1.71k
    case 7:
111
1.71k
        v = load_little_endian<4>(p);
112
1.71k
        return (v & 0xC0C0F000) == 0x80808000;
113
56.9k
    }
114
56.9k
}
115
116
class utf8_sequence
117
{
118
    char seq_[4];
119
    uint16_t first_;
120
    uint8_t size_;
121
122
public:
123
    void
124
    save(
125
        const char* p,
126
        std::size_t remain) noexcept
127
1.14k
    {
128
1.14k
        first_ = classify_utf8(*p );
129
1.14k
        if(remain >= length())
130
1.07k
            size_ = length();
131
77
        else
132
77
            size_ = static_cast<uint8_t>(remain);
133
1.14k
        std::memcpy(seq_, p, size_);
134
1.14k
    }
135
136
    uint8_t
137
    length() const noexcept
138
3.42k
    {
139
3.42k
        return first_ & 0xFF;
140
3.42k
    }
141
142
    bool
143
    complete() const noexcept
144
1.14k
    {
145
1.14k
        return size_ >= length();
146
1.14k
    }
147
148
    // returns true if complete
149
    bool
150
    append(
151
        const char* p,
152
        std::size_t remain) noexcept
153
18
    {
154
18
        if(BOOST_JSON_UNLIKELY(needed() == 0))
155
0
            return true;
156
18
        if(BOOST_JSON_LIKELY(remain >= needed()))
157
0
        {
158
0
            std::memcpy(
159
0
                seq_ + size_, p, needed());
160
0
            size_ = length();
161
0
            return true;
162
0
        }
163
18
        if(BOOST_JSON_LIKELY(remain > 0))
164
0
        {
165
0
            std::memcpy(seq_ + size_, p, remain);
166
0
            size_ += static_cast<uint8_t>(remain);
167
0
        }
168
18
        return false;
169
18
    }
170
171
    const char*
172
    data() const noexcept
173
0
    {
174
0
        return seq_;
175
0
    }
176
177
    uint8_t
178
    needed() const noexcept
179
54
    {
180
54
        return length() - size_;
181
54
    }
182
183
    bool
184
    valid() const noexcept
185
0
    {
186
        BOOST_ASSERT(size_ >= length());
187
0
        return is_valid_utf8(seq_, first_);
188
0
    }
189
};
190
191
} // detail
192
} // namespace json
193
} // namespace boost
194
195
#endif