/src/jsonnet/core/unicode.h
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #ifndef JSONNET_UNICODE_H |
18 | | #define JSONNET_UNICODE_H |
19 | | |
20 | | /** Substituted when a unicode translation format encoding error is encountered. */ |
21 | 11.8M | #define JSONNET_CODEPOINT_ERROR 0xfffd |
22 | 1.98G | #define JSONNET_CODEPOINT_MAX 0x110000 |
23 | | |
24 | | namespace jsonnet::internal { |
25 | | |
26 | | /** Convert a unicode codepoint to UTF8. |
27 | | * |
28 | | * \param x The unicode codepoint. |
29 | | * \param s The UTF-8 string to append to. |
30 | | * \returns The number of characters appended. |
31 | | */ |
32 | | static inline int encode_utf8(char32_t x, std::string &s) |
33 | 1.98G | { |
34 | 1.98G | if (x >= JSONNET_CODEPOINT_MAX) |
35 | 2.58k | x = JSONNET_CODEPOINT_ERROR; |
36 | | |
37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx |
38 | 1.98G | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); |
39 | | |
40 | 1.98G | if (x < 0x80) { |
41 | 1.98G | s.push_back((char)x); |
42 | 1.98G | return 1; |
43 | 1.98G | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 |
44 | 47.0k | bytes |= 0xC080; |
45 | 47.0k | s.push_back((bytes >> 8) & 0xFF); |
46 | 47.0k | s.push_back((bytes >> 0) & 0xFF); |
47 | 47.0k | return 2; |
48 | 3.54M | } else if (x < 0x10000) { // note that 'z' bits must be 0 |
49 | 3.51M | bytes |= 0xE08080; |
50 | 3.51M | s.push_back((bytes >> 16) & 0xFF); |
51 | 3.51M | s.push_back((bytes >> 8) & 0xFF); |
52 | 3.51M | s.push_back((bytes >> 0) & 0xFF); |
53 | 3.51M | return 3; |
54 | 3.51M | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 |
55 | 31.8k | bytes |= 0xF0808080; |
56 | 31.8k | s.push_back((bytes >> 24) & 0xFF); |
57 | 31.8k | s.push_back((bytes >> 16) & 0xFF); |
58 | 31.8k | s.push_back((bytes >> 8) & 0xFF); |
59 | 31.8k | s.push_back((bytes >> 0) & 0xFF); |
60 | 31.8k | return 4; |
61 | 31.8k | } else { |
62 | 0 | std::cerr << "Should never get here." << std::endl; |
63 | 0 | abort(); |
64 | 0 | } |
65 | 1.98G | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) static_analysis.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 34.1M | { | 34 | 34.1M | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 0 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 34.1M | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 34.1M | if (x < 0x80) { | 41 | 34.1M | s.push_back((char)x); | 42 | 34.1M | return 1; | 43 | 34.1M | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 0 | bytes |= 0xC080; | 45 | 0 | s.push_back((bytes >> 8) & 0xFF); | 46 | 0 | s.push_back((bytes >> 0) & 0xFF); | 47 | 0 | return 2; | 48 | 0 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 0 | bytes |= 0xE08080; | 50 | 0 | s.push_back((bytes >> 16) & 0xFF); | 51 | 0 | s.push_back((bytes >> 8) & 0xFF); | 52 | 0 | s.push_back((bytes >> 0) & 0xFF); | 53 | 0 | return 3; | 54 | 0 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 0 | bytes |= 0xF0808080; | 56 | 0 | s.push_back((bytes >> 24) & 0xFF); | 57 | 0 | s.push_back((bytes >> 16) & 0xFF); | 58 | 0 | s.push_back((bytes >> 8) & 0xFF); | 59 | 0 | s.push_back((bytes >> 0) & 0xFF); | 60 | 0 | return 4; | 61 | 0 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 34.1M | } |
string_utils.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 405 | { | 34 | 405 | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 5 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 405 | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 405 | if (x < 0x80) { | 41 | 213 | s.push_back((char)x); | 42 | 213 | return 1; | 43 | 213 | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 25 | bytes |= 0xC080; | 45 | 25 | s.push_back((bytes >> 8) & 0xFF); | 46 | 25 | s.push_back((bytes >> 0) & 0xFF); | 47 | 25 | return 2; | 48 | 167 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 137 | bytes |= 0xE08080; | 50 | 137 | s.push_back((bytes >> 16) & 0xFF); | 51 | 137 | s.push_back((bytes >> 8) & 0xFF); | 52 | 137 | s.push_back((bytes >> 0) & 0xFF); | 53 | 137 | return 3; | 54 | 137 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 30 | bytes |= 0xF0808080; | 56 | 30 | s.push_back((bytes >> 24) & 0xFF); | 57 | 30 | s.push_back((bytes >> 16) & 0xFF); | 58 | 30 | s.push_back((bytes >> 8) & 0xFF); | 59 | 30 | s.push_back((bytes >> 0) & 0xFF); | 60 | 30 | return 4; | 61 | 30 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 405 | } |
vm.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 1.94G | { | 34 | 1.94G | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 2.57k | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 1.94G | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 1.94G | if (x < 0x80) { | 41 | 1.94G | s.push_back((char)x); | 42 | 1.94G | return 1; | 43 | 1.94G | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 47.0k | bytes |= 0xC080; | 45 | 47.0k | s.push_back((bytes >> 8) & 0xFF); | 46 | 47.0k | s.push_back((bytes >> 0) & 0xFF); | 47 | 47.0k | return 2; | 48 | 3.54M | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 3.51M | bytes |= 0xE08080; | 50 | 3.51M | s.push_back((bytes >> 16) & 0xFF); | 51 | 3.51M | s.push_back((bytes >> 8) & 0xFF); | 52 | 3.51M | s.push_back((bytes >> 0) & 0xFF); | 53 | 3.51M | return 3; | 54 | 3.51M | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 31.8k | bytes |= 0xF0808080; | 56 | 31.8k | s.push_back((bytes >> 24) & 0xFF); | 57 | 31.8k | s.push_back((bytes >> 16) & 0xFF); | 58 | 31.8k | s.push_back((bytes >> 8) & 0xFF); | 59 | 31.8k | s.push_back((bytes >> 0) & 0xFF); | 60 | 31.8k | return 4; | 61 | 31.8k | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 1.94G | } |
desugarer.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 33 | 6.06M | { | 34 | 6.06M | if (x >= JSONNET_CODEPOINT_MAX) | 35 | 0 | x = JSONNET_CODEPOINT_ERROR; | 36 | | | 37 | | // 00ZZZzzz 00zzYYYY 00Yyyyxx 00xxxxxx | 38 | 6.06M | long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F); | 39 | | | 40 | 6.06M | if (x < 0x80) { | 41 | 6.06M | s.push_back((char)x); | 42 | 6.06M | return 1; | 43 | 6.06M | } else if (x < 0x800) { // note that capital 'Y' bits must be 0 | 44 | 0 | bytes |= 0xC080; | 45 | 0 | s.push_back((bytes >> 8) & 0xFF); | 46 | 0 | s.push_back((bytes >> 0) & 0xFF); | 47 | 0 | return 2; | 48 | 0 | } else if (x < 0x10000) { // note that 'z' bits must be 0 | 49 | 0 | bytes |= 0xE08080; | 50 | 0 | s.push_back((bytes >> 16) & 0xFF); | 51 | 0 | s.push_back((bytes >> 8) & 0xFF); | 52 | 0 | s.push_back((bytes >> 0) & 0xFF); | 53 | 0 | return 3; | 54 | 0 | } else if (x < 0x110000) { // note that capital 'Z' bits must be 0 | 55 | 0 | bytes |= 0xF0808080; | 56 | 0 | s.push_back((bytes >> 24) & 0xFF); | 57 | 0 | s.push_back((bytes >> 16) & 0xFF); | 58 | 0 | s.push_back((bytes >> 8) & 0xFF); | 59 | 0 | s.push_back((bytes >> 0) & 0xFF); | 60 | 0 | return 4; | 61 | 0 | } else { | 62 | 0 | std::cerr << "Should never get here." << std::endl; | 63 | 0 | abort(); | 64 | 0 | } | 65 | 6.06M | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(char32_t, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) |
66 | | |
67 | | /** Convert the UTF8 byte sequence in the given string to a unicode code point. |
68 | | * |
69 | | * \param str The string. |
70 | | * \param i The index of the string from which to start decoding and returns the index of the last |
71 | | * byte of the encoded codepoint. |
72 | | * \returns The decoded unicode codepoint. |
73 | | */ |
74 | | static inline char32_t decode_utf8(const std::string &str, size_t &i) |
75 | 970M | { |
76 | 970M | char c0 = str[i]; |
77 | 970M | if ((c0 & 0x80) == 0) { // 0xxxxxxx |
78 | 958M | return c0; |
79 | 958M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx |
80 | 789k | if (i + 1 >= str.length()) { |
81 | 13.2k | return JSONNET_CODEPOINT_ERROR; |
82 | 13.2k | } |
83 | 776k | char c1 = str[++i]; |
84 | 776k | if ((c1 & 0xC0) != 0x80) { |
85 | 653k | return JSONNET_CODEPOINT_ERROR; |
86 | 653k | } |
87 | 122k | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); |
88 | 11.3M | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx |
89 | 1.40M | if (i + 2 >= str.length()) { |
90 | 6.14k | return JSONNET_CODEPOINT_ERROR; |
91 | 6.14k | } |
92 | 1.40M | char c1 = str[++i]; |
93 | 1.40M | if ((c1 & 0xC0) != 0x80) { |
94 | 1.28M | return JSONNET_CODEPOINT_ERROR; |
95 | 1.28M | } |
96 | 121k | char c2 = str[++i]; |
97 | 121k | if ((c2 & 0xC0) != 0x80) { |
98 | 34.6k | return JSONNET_CODEPOINT_ERROR; |
99 | 34.6k | } |
100 | 87.0k | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); |
101 | 9.94M | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx |
102 | 448k | if (i + 3 >= str.length()) { |
103 | 7.89k | return JSONNET_CODEPOINT_ERROR; |
104 | 7.89k | } |
105 | 440k | char c1 = str[++i]; |
106 | 440k | if ((c1 & 0xC0) != 0x80) { |
107 | 264k | return JSONNET_CODEPOINT_ERROR; |
108 | 264k | } |
109 | 176k | char c2 = str[++i]; |
110 | 176k | if ((c2 & 0xC0) != 0x80) { |
111 | 11.1k | return JSONNET_CODEPOINT_ERROR; |
112 | 11.1k | } |
113 | 165k | char c3 = str[++i]; |
114 | 165k | if ((c3 & 0xC0) != 0x80) { |
115 | 25.1k | return JSONNET_CODEPOINT_ERROR; |
116 | 25.1k | } |
117 | 140k | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); |
118 | 9.50M | } else { |
119 | 9.50M | return JSONNET_CODEPOINT_ERROR; |
120 | 9.50M | } |
121 | 970M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) parser.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 673M | { | 76 | 673M | char c0 = str[i]; | 77 | 673M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 661M | return c0; | 79 | 661M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 789k | if (i + 1 >= str.length()) { | 81 | 13.2k | return JSONNET_CODEPOINT_ERROR; | 82 | 13.2k | } | 83 | 776k | char c1 = str[++i]; | 84 | 776k | if ((c1 & 0xC0) != 0x80) { | 85 | 653k | return JSONNET_CODEPOINT_ERROR; | 86 | 653k | } | 87 | 122k | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 11.3M | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 1.40M | if (i + 2 >= str.length()) { | 90 | 6.14k | return JSONNET_CODEPOINT_ERROR; | 91 | 6.14k | } | 92 | 1.40M | char c1 = str[++i]; | 93 | 1.40M | if ((c1 & 0xC0) != 0x80) { | 94 | 1.28M | return JSONNET_CODEPOINT_ERROR; | 95 | 1.28M | } | 96 | 121k | char c2 = str[++i]; | 97 | 121k | if ((c2 & 0xC0) != 0x80) { | 98 | 34.6k | return JSONNET_CODEPOINT_ERROR; | 99 | 34.6k | } | 100 | 87.0k | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 9.94M | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 448k | if (i + 3 >= str.length()) { | 103 | 7.89k | return JSONNET_CODEPOINT_ERROR; | 104 | 7.89k | } | 105 | 440k | char c1 = str[++i]; | 106 | 440k | if ((c1 & 0xC0) != 0x80) { | 107 | 264k | return JSONNET_CODEPOINT_ERROR; | 108 | 264k | } | 109 | 176k | char c2 = str[++i]; | 110 | 176k | if ((c2 & 0xC0) != 0x80) { | 111 | 11.1k | return JSONNET_CODEPOINT_ERROR; | 112 | 11.1k | } | 113 | 165k | char c3 = str[++i]; | 114 | 165k | if ((c3 & 0xC0) != 0x80) { | 115 | 25.1k | return JSONNET_CODEPOINT_ERROR; | 116 | 25.1k | } | 117 | 140k | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 9.50M | } else { | 119 | 9.50M | return JSONNET_CODEPOINT_ERROR; | 120 | 9.50M | } | 121 | 673M | } |
Unexecuted instantiation: pass.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Unexecuted instantiation: static_analysis.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) string_utils.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 270M | { | 76 | 270M | char c0 = str[i]; | 77 | 270M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 270M | return c0; | 79 | 270M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 270M | } |
vm.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 26.5M | { | 76 | 26.5M | char c0 = str[i]; | 77 | 26.5M | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 26.5M | return c0; | 79 | 26.5M | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 26.5M | } |
desugarer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Line | Count | Source | 75 | 123k | { | 76 | 123k | char c0 = str[i]; | 77 | 123k | if ((c0 & 0x80) == 0) { // 0xxxxxxx | 78 | 123k | return c0; | 79 | 123k | } else if ((c0 & 0xE0) == 0xC0) { // 110yyyxx 10xxxxxx | 80 | 0 | if (i + 1 >= str.length()) { | 81 | 0 | return JSONNET_CODEPOINT_ERROR; | 82 | 0 | } | 83 | 0 | char c1 = str[++i]; | 84 | 0 | if ((c1 & 0xC0) != 0x80) { | 85 | 0 | return JSONNET_CODEPOINT_ERROR; | 86 | 0 | } | 87 | 0 | return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F); | 88 | 0 | } else if ((c0 & 0xF0) == 0xE0) { // 1110yyyy 10yyyyxx 10xxxxxx | 89 | 0 | if (i + 2 >= str.length()) { | 90 | 0 | return JSONNET_CODEPOINT_ERROR; | 91 | 0 | } | 92 | 0 | char c1 = str[++i]; | 93 | 0 | if ((c1 & 0xC0) != 0x80) { | 94 | 0 | return JSONNET_CODEPOINT_ERROR; | 95 | 0 | } | 96 | 0 | char c2 = str[++i]; | 97 | 0 | if ((c2 & 0xC0) != 0x80) { | 98 | 0 | return JSONNET_CODEPOINT_ERROR; | 99 | 0 | } | 100 | 0 | return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); | 101 | 0 | } else if ((c0 & 0xF8) == 0xF0) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx | 102 | 0 | if (i + 3 >= str.length()) { | 103 | 0 | return JSONNET_CODEPOINT_ERROR; | 104 | 0 | } | 105 | 0 | char c1 = str[++i]; | 106 | 0 | if ((c1 & 0xC0) != 0x80) { | 107 | 0 | return JSONNET_CODEPOINT_ERROR; | 108 | 0 | } | 109 | 0 | char c2 = str[++i]; | 110 | 0 | if ((c2 & 0xC0) != 0x80) { | 111 | 0 | return JSONNET_CODEPOINT_ERROR; | 112 | 0 | } | 113 | 0 | char c3 = str[++i]; | 114 | 0 | if ((c3 & 0xC0) != 0x80) { | 115 | 0 | return JSONNET_CODEPOINT_ERROR; | 116 | 0 | } | 117 | 0 | return ((c0 & 0x7) << 18ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); | 118 | 0 | } else { | 119 | 0 | return JSONNET_CODEPOINT_ERROR; | 120 | 0 | } | 121 | 123k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long&) |
122 | | |
123 | | /** A string class capable of holding unicode codepoints. */ |
124 | | typedef std::basic_string<char32_t> UString; |
125 | | |
126 | | static inline void encode_utf8(const UString &s, std::string &r) |
127 | 3.44M | { |
128 | 3.44M | for (char32_t cp : s) |
129 | 1.98G | encode_utf8(cp, r); |
130 | 3.44M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) static_analysis.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 2.11k | { | 128 | 2.11k | for (char32_t cp : s) | 129 | 34.1M | encode_utf8(cp, r); | 130 | 2.11k | } |
Unexecuted instantiation: string_utils.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) vm.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 2.51M | { | 128 | 2.51M | for (char32_t cp : s) | 129 | 1.94G | encode_utf8(cp, r); | 130 | 2.51M | } |
desugarer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Line | Count | Source | 127 | 934k | { | 128 | 934k | for (char32_t cp : s) | 129 | 6.06M | encode_utf8(cp, r); | 130 | 934k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&) |
131 | | |
132 | | static inline std::string encode_utf8(const UString &s) |
133 | 3.44M | { |
134 | 3.44M | std::string r; |
135 | 3.44M | encode_utf8(s, r); |
136 | 3.44M | return r; |
137 | 3.44M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: parser.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: pass.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) static_analysis.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 2.11k | { | 134 | 2.11k | std::string r; | 135 | 2.11k | encode_utf8(s, r); | 136 | 2.11k | return r; | 137 | 2.11k | } |
Unexecuted instantiation: string_utils.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) vm.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 2.51M | { | 134 | 2.51M | std::string r; | 135 | 2.51M | encode_utf8(s, r); | 136 | 2.51M | return r; | 137 | 2.51M | } |
desugarer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Line | Count | Source | 133 | 934k | { | 134 | 934k | std::string r; | 135 | 934k | encode_utf8(s, r); | 136 | 934k | return r; | 137 | 934k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::encode_utf8(std::__1::basic_string<char32_t, std::__1::char_traits<char32_t>, std::__1::allocator<char32_t> > const&) |
138 | | |
139 | | static inline UString decode_utf8(const std::string &s) |
140 | 163M | { |
141 | 163M | UString r; |
142 | 1.13G | for (size_t i = 0; i < s.length(); ++i) |
143 | 970M | r.push_back(decode_utf8(s, i)); |
144 | 163M | return r; |
145 | 163M | } Unexecuted instantiation: libjsonnet.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) parser.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 111M | { | 141 | 111M | UString r; | 142 | 784M | for (size_t i = 0; i < s.length(); ++i) | 143 | 673M | r.push_back(decode_utf8(s, i)); | 144 | 111M | return r; | 145 | 111M | } |
Unexecuted instantiation: pass.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Unexecuted instantiation: static_analysis.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) string_utils.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 45.1M | { | 141 | 45.1M | UString r; | 142 | 315M | for (size_t i = 0; i < s.length(); ++i) | 143 | 270M | r.push_back(decode_utf8(s, i)); | 144 | 45.1M | return r; | 145 | 45.1M | } |
vm.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 7.46M | { | 141 | 7.46M | UString r; | 142 | 34.0M | for (size_t i = 0; i < s.length(); ++i) | 143 | 26.5M | r.push_back(decode_utf8(s, i)); | 144 | 7.46M | return r; | 145 | 7.46M | } |
desugarer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Line | Count | Source | 140 | 22.7k | { | 141 | 22.7k | UString r; | 142 | 146k | for (size_t i = 0; i < s.length(); ++i) | 143 | 123k | r.push_back(decode_utf8(s, i)); | 144 | 22.7k | return r; | 145 | 22.7k | } |
Unexecuted instantiation: formatter.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Unexecuted instantiation: lexer.cpp:jsonnet::internal::decode_utf8(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) |
146 | | |
147 | | /** A stringstream-like class capable of holding unicode codepoints. |
148 | | * The C++ standard does not support std::basic_stringstream<char32_t. |
149 | | */ |
150 | | class UStringStream { |
151 | | UString buf; |
152 | | |
153 | | public: |
154 | | UStringStream &operator<<(const UString &s) |
155 | 73.0M | { |
156 | 73.0M | buf.append(s); |
157 | 73.0M | return *this; |
158 | 73.0M | } |
159 | | UStringStream &operator<<(const char32_t *s) |
160 | 2.51G | { |
161 | 2.51G | buf.append(s); |
162 | 2.51G | return *this; |
163 | 2.51G | } |
164 | | UStringStream &operator<<(char32_t c) |
165 | 175M | { |
166 | 175M | buf.push_back(c); |
167 | 175M | return *this; |
168 | 175M | } |
169 | | template <class T> |
170 | | UStringStream &operator<<(T c) |
171 | 3.62M | { |
172 | 3.62M | std::stringstream ss; |
173 | 3.62M | ss << c; |
174 | 3.62M | for (char c : ss.str()) |
175 | 7.60M | buf.push_back(char32_t(c)); |
176 | 3.62M | return *this; |
177 | 3.62M | } jsonnet::internal::UStringStream& jsonnet::internal::UStringStream::operator<< <int>(int) Line | Count | Source | 171 | 2.67M | { | 172 | 2.67M | std::stringstream ss; | 173 | 2.67M | ss << c; | 174 | 2.67M | for (char c : ss.str()) | 175 | 4.12M | buf.push_back(char32_t(c)); | 176 | 2.67M | return *this; | 177 | 2.67M | } |
jsonnet::internal::UStringStream& jsonnet::internal::UStringStream::operator<< <unsigned int>(unsigned int) Line | Count | Source | 171 | 945k | { | 172 | 945k | std::stringstream ss; | 173 | 945k | ss << c; | 174 | 945k | for (char c : ss.str()) | 175 | 3.48M | buf.push_back(char32_t(c)); | 176 | 945k | return *this; | 177 | 945k | } |
|
178 | | UString str() |
179 | 15.9M | { |
180 | 15.9M | return buf; |
181 | 15.9M | } |
182 | | }; |
183 | | |
184 | | } // namespace jsonnet::internal |
185 | | |
186 | | #endif // JSONNET_UNICODE_H |