Coverage Report

Created: 2024-09-19 09:45

/proc/self/cwd/test/common/json/utf8.h
Line
Count
Source
1
#pragma once
2
3
#include <cstdint>
4
5
#include "absl/strings/string_view.h"
6
7
namespace Envoy {
8
namespace Json {
9
namespace Utf8 {
10
11
// Constants used for decoding UTF-8 sequences. These are primarily needed
12
// by decodeUtf8, but are also useful for writing tests that cover all
13
// possible utf-8 encodings.
14
static constexpr uint32_t Mask1Byte = 0b10000000;
15
static constexpr uint32_t Mask2Byte = 0b11100000;
16
static constexpr uint32_t Mask3Byte = 0b11110000;
17
static constexpr uint32_t Mask4Byte = 0b11111000;
18
19
static constexpr uint32_t Pattern1Byte = 0b00000000;
20
static constexpr uint32_t Pattern2Byte = 0b11000000;
21
static constexpr uint32_t Pattern3Byte = 0b11100000;
22
static constexpr uint32_t Pattern4Byte = 0b11110000;
23
24
static constexpr uint32_t ContinueMask = 0b11000000;
25
static constexpr uint32_t ContinuePattern = 0b10000000;
26
27
static constexpr uint32_t Shift = 6;
28
29
using UnicodeSizePair = std::pair<uint32_t, uint32_t>;
30
31
/**
32
 * Decodes a single Utf8-encoded code-point from the string,
33
 * @param str A possibly Utf-8 encoded string.
34
 * @return the pair containing the first Unicode symbol from str, and the number of bytes
35
 *         consumed from str. If str does not start with a valid UTF-8 code sequence,
36
 *         then zero is returned for the size (UnicodeSizePair.second) and the returned
37
 *         Unicode value should be ignored.
38
 */
39
UnicodeSizePair decode(const uint8_t* bytes, uint32_t size);
40
1.30k
inline UnicodeSizePair decode(absl::string_view str) {
41
1.30k
  return decode(reinterpret_cast<const uint8_t*>(str.data()), str.size());
42
1.30k
}
43
44
} // namespace Utf8
45
} // namespace Json
46
} // namespace Envoy