/proc/self/cwd/test/common/json/utf8.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include <cstdint> |
4 | | |
5 | | #include "absl/strings/string_view.h" |
6 | | |
7 | | namespace Envoy { |
8 | | namespace Json { |
9 | | namespace Utf8 { |
10 | | |
11 | | // Constants used for decoding UTF-8 sequences. These are primarily needed |
12 | | // by decodeUtf8, but are also useful for writing tests that cover all |
13 | | // possible utf-8 encodings. |
14 | | static constexpr uint32_t Mask1Byte = 0b10000000; |
15 | | static constexpr uint32_t Mask2Byte = 0b11100000; |
16 | | static constexpr uint32_t Mask3Byte = 0b11110000; |
17 | | static constexpr uint32_t Mask4Byte = 0b11111000; |
18 | | |
19 | | static constexpr uint32_t Pattern1Byte = 0b00000000; |
20 | | static constexpr uint32_t Pattern2Byte = 0b11000000; |
21 | | static constexpr uint32_t Pattern3Byte = 0b11100000; |
22 | | static constexpr uint32_t Pattern4Byte = 0b11110000; |
23 | | |
24 | | static constexpr uint32_t ContinueMask = 0b11000000; |
25 | | static constexpr uint32_t ContinuePattern = 0b10000000; |
26 | | |
27 | | static constexpr uint32_t Shift = 6; |
28 | | |
29 | | using UnicodeSizePair = std::pair<uint32_t, uint32_t>; |
30 | | |
31 | | /** |
32 | | * Decodes a single Utf8-encoded code-point from the string, |
33 | | * @param str A possibly Utf-8 encoded string. |
34 | | * @return the pair containing the first Unicode symbol from str, and the number of bytes |
35 | | * consumed from str. If str does not start with a valid UTF-8 code sequence, |
36 | | * then zero is returned for the size (UnicodeSizePair.second) and the returned |
37 | | * Unicode value should be ignored. |
38 | | */ |
39 | | UnicodeSizePair decode(const uint8_t* bytes, uint32_t size); |
40 | 1.30k | inline UnicodeSizePair decode(absl::string_view str) { |
41 | 1.30k | return decode(reinterpret_cast<const uint8_t*>(str.data()), str.size()); |
42 | 1.30k | } |
43 | | |
44 | | } // namespace Utf8 |
45 | | } // namespace Json |
46 | | } // namespace Envoy |