/src/jsonnet/core/unicode.h
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #ifndef JSONNET_UNICODE_H |
18 | | #define JSONNET_UNICODE_H |
19 | | |
20 | | /** Substituted when a unicode translation format encoding error is encountered. */ |
21 | 10.9M | #define JSONNET_CODEPOINT_ERROR 0xfffd |
22 | 1.50G | #define JSONNET_CODEPOINT_MAX 0x110000 |
23 | | |
24 | | namespace jsonnet::internal { |
25 | | |
26 | | /** Convert a unicode codepoint to UTF8. |
27 | | * |
28 | | * \param x The unicode codepoint. |
29 | | * \param s The UTF-8 string to append to. |
30 | | * \returns The number of characters appended. |
31 | | */ |
32 | | static inline int encode_utf8(char32_t x, std::string &s) |
33 | 1.50G | { |
34 | 1.50G | if (x >= JSONNET_CODEPOINT_MAX) |
35 | 3.08k | x = JSONNET_CODEPOINT_ERROR; |
36 | | |
37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx |
38 | 1.50G | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); |
39 | | |
40 | 1.50G | if (x < 0x80) { |
41 | 1.49G | s.push_back((char)x); |
42 | 1.49G | return 1; |
43 | 1.49G | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 |
44 | 54.7k | bytes |= 0xC080; |
45 | 54.7k | s.push_back((bytes >> 8) & 0xFF); |
46 | 54.7k | s.push_back((bytes >> 0) & 0xFF); |
47 | 54.7k | return 2; |
48 | 3.28M | } else if (x < 0x10000) { // note that 'z' bits must be 0 |
49 | 3.25M | bytes |= 0xE08080; |
50 | 3.25M | s.push_back((bytes >> 16) & 0xFF); |
51 | 3.25M | s.push_back((bytes >> 8) & 0xFF); |
52 | 3.25M | s.push_back((bytes >> 0) & 0xFF); |
53 | 3.25M | return 3; |
54 | 3.25M | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 |
55 | 31.6k | bytes |= 0xF0808080; |
56 | 31.6k | s.push_back((bytes >> 24) & 0xFF); |
57 | 31.6k | s.push_back((bytes >> 16) & 0xFF); |
58 | 31.6k | s.push_back((bytes >> 8) & 0xFF); |
59 | 31.6k | s.push_back((bytes >> 0) & 0xFF); |
60 | 31.6k | return 4; |
61 | 31.6k | } else { |
62 | 0 | std::cerr << "Should never get here." << std::endl; |
63 | 0 | abort(); |
64 | 0 | } |
65 | 1.50G | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) static_analysis.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 24.1M | { | 34 | 24.1M | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 0 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 24.1M | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 24.1M | if (x < 0x80) { | 41 | 24.1M | s.push_back((char)x); | 42 | 24.1M | return 1; | 43 | 24.1M | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 0 | bytes |= 0xC080; | 45 | 0 | s.push_back((bytes >> 8) & 0xFF); | 46 | 0 | s.push_back((bytes >> 0) & 0xFF); | 47 | 0 | return 2; | 48 | 0 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 0 | bytes |= 0xE08080; | 50 | 0 | s.push_back((bytes >> 16) & 0xFF); | 51 | 0 | s.push_back((bytes >> 8) & 0xFF); | 52 | 0 | s.push_back((bytes >> 0) & 0xFF); | 53 | 0 | return 3; | 54 | 0 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 0 | bytes |= 0xF0808080; | 56 | 0 | s.push_back((bytes >> 24) & 0xFF); | 57 | 0 | s.push_back((bytes >> 16) & 0xFF); | 58 | 0 | s.push_back((bytes >> 8) & 0xFF); | 59 | 0 | s.push_back((bytes >> 0) & 0xFF); | 60 | 0 | return 4; | 61 | 0 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 24.1M | } |
string_utils.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 358 | { | 34 | 358 | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 5 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 358 | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 358 | if (x < 0x80) { | 41 | 188 | s.push_back((char)x); | 42 | 188 | return 1; | 43 | 188 | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 23 | bytes |= 0xC080; | 45 | 23 | s.push_back((bytes >> 8) & 0xFF); | 46 | 23 | s.push_back((bytes >> 0) & 0xFF); | 47 | 23 | return 2; | 48 | 147 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 117 | bytes |= 0xE08080; | 50 | 117 | s.push_back((bytes >> 16) & 0xFF); | 51 | 117 | s.push_back((bytes >> 8) & 0xFF); | 52 | 117 | s.push_back((bytes >> 0) & 0xFF); | 53 | 117 | return 3; | 54 | 117 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 30 | bytes |= 0xF0808080; | 56 | 30 | s.push_back((bytes >> 24) & 0xFF); | 57 | 30 | s.push_back((bytes >> 16) & 0xFF); | 58 | 30 | s.push_back((bytes >> 8) & 0xFF); | 59 | 30 | s.push_back((bytes >> 0) & 0xFF); | 60 | 30 | return 4; | 61 | 30 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 358 | } |
vm.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 1.47G | { | 34 | 1.47G | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 3.07k | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 1.47G | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 1.47G | if (x < 0x80) { | 41 | 1.46G | s.push_back((char)x); | 42 | 1.46G | return 1; | 43 | 1.46G | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 54.7k | bytes |= 0xC080; | 45 | 54.7k | s.push_back((bytes >> 8) & 0xFF); | 46 | 54.7k | s.push_back((bytes >> 0) & 0xFF); | 47 | 54.7k | return 2; | 48 | 3.28M | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 3.25M | bytes |= 0xE08080; | 50 | 3.25M | s.push_back((bytes >> 16) & 0xFF); | 51 | 3.25M | s.push_back((bytes >> 8) & 0xFF); | 52 | 3.25M | s.push_back((bytes >> 0) & 0xFF); | 53 | 3.25M | return 3; | 54 | 3.25M | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 31.5k | bytes |= 0xF0808080; | 56 | 31.5k | s.push_back((bytes >> 24) & 0xFF); | 57 | 31.5k | s.push_back((bytes >> 16) & 0xFF); | 58 | 31.5k | s.push_back((bytes >> 8) & 0xFF); | 59 | 31.5k | s.push_back((bytes >> 0) & 0xFF); | 60 | 31.5k | return 4; | 61 | 31.5k | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 1.47G | } |
desugarer.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 5.90M | { | 34 | 5.90M | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 0 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 5.90M | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 5.90M | if (x < 0x80) { | 41 | 5.90M | s.push_back((char)x); | 42 | 5.90M | return 1; | 43 | 5.90M | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 0 | bytes |= 0xC080; | 45 | 0 | s.push_back((bytes >> 8) & 0xFF); | 46 | 0 | s.push_back((bytes >> 0) & 0xFF); | 47 | 0 | return 2; | 48 | 0 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 0 | bytes |= 0xE08080; | 50 | 0 | s.push_back((bytes >> 16) & 0xFF); | 51 | 0 | s.push_back((bytes >> 8) & 0xFF); | 52 | 0 | s.push_back((bytes >> 0) & 0xFF); | 53 | 0 | return 3; | 54 | 0 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 0 | bytes |= 0xF0808080; | 56 | 0 | s.push_back((bytes >> 24) & 0xFF); | 57 | 0 | s.push_back((bytes >> 16) & 0xFF); | 58 | 0 | s.push_back((bytes >> 8) & 0xFF); | 59 | 0 | s.push_back((bytes >> 0) & 0xFF); | 60 | 0 | return 4; | 61 | 0 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 5.90M | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) |
66 | | |
67 | | /** Convert the UTF8 byte sequence in the given string to a unicode code point. |
68 | | * |
69 | | * \param str The string. |
70 | | * \param i The index of the string from which to start decoding and returns the index of the last |
71 | | * byte of the encoded codepoint. |
72 | | * \returns The decoded unicode codepoint. |
73 | | */ |
74 | | static inline char32_t decode_utf8(const std::string &str, size_t &i) |
75 | 795M | { |
76 | 795M | char c0 = str[i]; |
77 | 795M | if ((c0 & 0x80) == 0) { // 0xxxxxxx |
78 | 784M | return c0; |
79 | 784M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx |
80 | 702k | if (i + 1 >= str.length()) { |
81 | 12.0k | return JSONNET_CODEPOINT_ERROR; |
82 | 12.0k | } |
83 | 690k | char c1 = str[++i]; |
84 | 690k | if ((c1 & 0xC0) != 0x80) { |
85 | 574k | return JSONNET_CODEPOINT_ERROR; |
86 | 574k | } |
87 | 115k | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); |
88 | 10.5M | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx |
89 | 1.13M | if (i + 2 >= str.length()) { |
90 | 6.33k | return JSONNET_CODEPOINT_ERROR; |
91 | 6.33k | } |
92 | 1.12M | char c1 = str[++i]; |
93 | 1.12M | if ((c1 & 0xC0) != 0x80) { |
94 | 1.01M | return JSONNET_CODEPOINT_ERROR; |
95 | 1.01M | } |
96 | 114k | char c2 = str[++i]; |
97 | 114k | if ((c2 & 0xC0) != 0x80) { |
98 | 35.0k | return JSONNET_CODEPOINT_ERROR; |
99 | 35.0k | } |
100 | 79.6k | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); |
101 | 9.37M | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
102 | 364k | if (i + 3 >= str.length()) { |
103 | 7.30k | return JSONNET_CODEPOINT_ERROR; |
104 | 7.30k | } |
105 | 356k | char c1 = str[++i]; |
106 | 356k | if ((c1 & 0xC0) != 0x80) { |
107 | 213k | return JSONNET_CODEPOINT_ERROR; |
108 | 213k | } |
109 | 143k | char c2 = str[++i]; |
110 | 143k | if ((c2 & 0xC0) != 0x80) { |
111 | 10.6k | return JSONNET_CODEPOINT_ERROR; |
112 | 10.6k | } |
113 | 132k | char c3 = str[++i]; |
114 | 132k | if ((c3 & 0xC0) != 0x80) { |
115 | 25.9k | return JSONNET_CODEPOINT_ERROR; |
116 | 25.9k | } |
117 | 106k | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); |
118 | 9.00M | } else { |
119 | 9.00M | return JSONNET_CODEPOINT_ERROR; |
120 | 9.00M | } |
121 | 795M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) parser.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 609M | { | 76 | 609M | char c0 = str[i]; | 77 | 609M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 598M | return c0; | 79 | 598M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 702k | if (i + 1 >= str.length()) { | 81 | 12.0k | return JSONNET_CODEPOINT_ERROR; | 82 | 12.0k | } | 83 | 690k | char c1 = str[++i]; | 84 | 690k | if ((c1 & 0xC0) != 0x80) { | 85 | 574k | return JSONNET_CODEPOINT_ERROR; | 86 | 574k | } | 87 | 115k | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 10.5M | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 1.13M | if (i + 2 >= str.length()) { | 90 | 6.33k | return JSONNET_CODEPOINT_ERROR; | 91 | 6.33k | } | 92 | 1.12M | char c1 = str[++i]; | 93 | 1.12M | if ((c1 & 0xC0) != 0x80) { | 94 | 1.01M | return JSONNET_CODEPOINT_ERROR; | 95 | 1.01M | } | 96 | 114k | char c2 = str[++i]; | 97 | 114k | if ((c2 & 0xC0) != 0x80) { | 98 | 35.0k | return JSONNET_CODEPOINT_ERROR; | 99 | 35.0k | } | 100 | 79.6k | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 9.37M | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 364k | if (i + 3 >= str.length()) { | 103 | 7.30k | return JSONNET_CODEPOINT_ERROR; | 104 | 7.30k | } | 105 | 356k | char c1 = str[++i]; | 106 | 356k | if ((c1 & 0xC0) != 0x80) { | 107 | 213k | return JSONNET_CODEPOINT_ERROR; | 108 | 213k | } | 109 | 143k | char c2 = str[++i]; | 110 | 143k | if ((c2 & 0xC0) != 0x80) { | 111 | 10.6k | return JSONNET_CODEPOINT_ERROR; | 112 | 10.6k | } | 113 | 132k | char c3 = str[++i]; | 114 | 132k | if ((c3 & 0xC0) != 0x80) { | 115 | 25.9k | return JSONNET_CODEPOINT_ERROR; | 116 | 25.9k | } | 117 | 106k | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 9.00M | } else { | 119 | 9.00M | return JSONNET_CODEPOINT_ERROR; | 120 | 9.00M | } | 121 | 609M | } |
Unexecuted instantiation: pass.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Unexecuted instantiation: static_analysis.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) string_utils.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 166M | { | 76 | 166M | char c0 = str[i]; | 77 | 166M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 166M | return c0; | 79 | 166M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 166M | } |
vm.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 18.8M | { | 76 | 18.8M | char c0 = str[i]; | 77 | 18.8M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 18.8M | return c0; | 79 | 18.8M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 18.8M | } |
desugarer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 114k | { | 76 | 114k | char c0 = str[i]; | 77 | 114k | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 114k | return c0; | 79 | 114k | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 114k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) |
122 | | |
123 | | /** A string class capable of holding unicode codepoints. */ |
124 | | typedef std::basic_string<char32_t> UString; |
125 | | |
126 | | static inline void encode_utf8(const UString &s, std::string &r) |
127 | 3.14M | { |
128 | 3.14M | for (char32_t cp : s) |
129 | 1.50G | encode_utf8(cp, r); |
130 | 3.14M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) static_analysis.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 1.76k | { | 128 | 1.76k | for (char32_t cp : s) | 129 | 24.1M | encode_utf8(cp, r); | 130 | 1.76k | } |
Unexecuted instantiation: string_utils.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) vm.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 2.25M | { | 128 | 2.25M | for (char32_t cp : s) | 129 | 1.47G | encode_utf8(cp, r); | 130 | 2.25M | } |
desugarer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 882k | { | 128 | 882k | for (char32_t cp : s) | 129 | 5.90M | encode_utf8(cp, r); | 130 | 882k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) |
131 | | |
132 | | static inline std::string encode_utf8(const UString &s) |
133 | 3.14M | { |
134 | 3.14M | std::string r; |
135 | 3.14M | encode_utf8(s, r); |
136 | 3.14M | return r; |
137 | 3.14M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) static_analysis.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 1.76k | { | 134 | 1.76k | std::string r; | 135 | 1.76k | encode_utf8(s, r); | 136 | 1.76k | return r; | 137 | 1.76k | } |
Unexecuted instantiation: string_utils.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) vm.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 2.25M | { | 134 | 2.25M | std::string r; | 135 | 2.25M | encode_utf8(s, r); | 136 | 2.25M | return r; | 137 | 2.25M | } |
desugarer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 882k | { | 134 | 882k | std::string r; | 135 | 882k | encode_utf8(s, r); | 136 | 882k | return r; | 137 | 882k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) |
138 | | |
139 | | static inline UString decode_utf8(const std::string &s) |
140 | 136M | { |
141 | 136M | UString r; |
142 | 931M | for (size_t i = 0; i < s.length(); ++i) |
143 | 795M | r.push_back(decode_utf8(s, i)); |
144 | 136M | return r; |
145 | 136M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) parser.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 102M | { | 141 | 102M | UString r; | 142 | 712M | for (size_t i = 0; i < s.length(); ++i) | 143 | 609M | r.push_back(decode_utf8(s, i)); | 144 | 102M | return r; | 145 | 102M | } |
Unexecuted instantiation: pass.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Unexecuted instantiation: static_analysis.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) string_utils.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 27.7M | { | 141 | 27.7M | UString r; | 142 | 194M | for (size_t i = 0; i < s.length(); ++i) | 143 | 166M | r.push_back(decode_utf8(s, i)); | 144 | 27.7M | return r; | 145 | 27.7M | } |
vm.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 5.99M | { | 141 | 5.99M | UString r; | 142 | 24.8M | for (size_t i = 0; i < s.length(); ++i) | 143 | 18.8M | r.push_back(decode_utf8(s, i)); | 144 | 5.99M | return r; | 145 | 5.99M | } |
desugarer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 21.0k | { | 141 | 21.0k | UString r; | 142 | 135k | for (size_t i = 0; i < s.length(); ++i) | 143 | 114k | r.push_back(decode_utf8(s, i)); | 144 | 21.0k | return r; | 145 | 21.0k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) |
146 | | |
147 | | /** A stringstream-like class capable of holding unicode codepoints. |
148 | | * The C++ standard does not support std::basic_stringstream<char32_t. |
149 | | */ |
150 | | class UStringStream { |
151 | | UString buf; |
152 | | |
153 | | public: |
154 | | UStringStream &operator<<(const UString &s) |
155 | 49.8M | { |
156 | 49.8M | buf.append(s); |
157 | 49.8M | return *this; |
158 | 49.8M | } |
159 | | UStringStream &operator<<(const char32_t *s) |
160 | 2.10G | { |
161 | 2.10G | buf.append(s); |
162 | 2.10G | return *this; |
163 | 2.10G | } |
164 | | UStringStream &operator<<(char32_t c) |
165 | 125M | { |
166 | 125M | buf.push_back(c); |
167 | 125M | return *this; |
168 | 125M | } |
169 | | template <class T> |
170 | | UStringStream &operator<<(T c) |
171 | 2.97M | { |
172 | 2.97M | std::stringstream ss; |
173 | 2.97M | ss << c; |
174 | 2.97M | for (char c : ss.str()) |
175 | 5.72M | buf.push_back(char32_t(c)); |
176 | 2.97M | return *this; |
177 | 2.97M | } jsonnet::internal::UStringStream& jsonnet::internal::UStringStream::operator<< <int>(int) Line | Count | Source | 171 | 2.41M | { | 172 | 2.41M | std::stringstream ss; | 173 | 2.41M | ss << c; | 174 | 2.41M | for (char c : ss.str()) | 175 | 3.86M | buf.push_back(char32_t(c)); | 176 | 2.41M | return *this; | 177 | 2.41M | } |
jsonnet::internal::UStringStream& jsonnet::internal::UStringStream::operator<< <unsigned int>(unsigned int) Line | Count | Source | 171 | 559k | { | 172 | 559k | std::stringstream ss; | 173 | 559k | ss << c; | 174 | 559k | for (char c : ss.str()) | 175 | 1.86M | buf.push_back(char32_t(c)); | 176 | 559k | return *this; | 177 | 559k | } |
|
178 | | UString str() |
179 | 12.7M | { |
180 | 12.7M | return buf; |
181 | 12.7M | } |
182 | | }; |
183 | | |
184 | | } // namespace jsonnet::internal |
185 | | |
186 | | #endif // JSONNET_UNICODE_H |