Coverage Report

Created: 2025-11-09 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jsonnet/core/string_utils.cpp
Line
Count
Source
1
/*
2
Copyright 2015 Google Inc. All rights reserved.
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
    http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
*/
16
17
#include <iomanip>
18
19
#include "static_error.h"
20
#include "string_utils.h"
21
22
namespace jsonnet::internal {
23
24
UString jsonnet_string_unparse(const UString &str, bool single)
25
1.67M
{
26
1.67M
    UStringStream ss;
27
1.67M
    ss << (single ? U'\'' : U'\"');
28
1.67M
    ss << jsonnet_string_escape(str, single);
29
1.67M
    ss << (single ? U'\'' : U'\"');
30
1.67M
    return ss.str();
31
1.67M
}
32
33
UString jsonnet_string_escape(const UString &str, bool single)
34
1.67M
{
35
1.67M
    UStringStream ss;
36
2.71G
    for (std::size_t i = 0; i < str.length(); ++i) {
37
2.71G
        char32_t c = str[i];
38
2.71G
        switch (c) {
39
1.57M
            case U'\"': ss << (single ? U"\"" : U"\\\""); break;
40
1.50M
            case U'\'': ss << (single ? U"\\\'" : U"\'"); break;
41
2.44G
            case U'\\': ss << U"\\\\"; break;
42
1.62M
            case U'\b': ss << U"\\b"; break;
43
4.00M
            case U'\f': ss << U"\\f"; break;
44
23.5M
            case U'\n': ss << U"\\n"; break;
45
1.99M
            case U'\r': ss << U"\\r"; break;
46
4.32M
            case U'\t': ss << U"\\t"; break;
47
367k
            case U'\0': ss << U"\\u0000"; break;
48
232M
            default: {
49
232M
                if (c < 0x20 || (c >= 0x7f && c <= 0x9f)) {
50
                    // Unprintable, use \u
51
46.0M
                    std::stringstream ss8;
52
46.0M
                    ss8 << "\\u" << std::hex << std::setfill('0') << std::setw(4)
53
46.0M
                        << (unsigned long)(c);
54
46.0M
                    ss << decode_utf8(ss8.str());
55
186M
                } else {
56
                    // Printable, write verbatim
57
186M
                    ss << c;
58
186M
                }
59
232M
            }
60
2.71G
        }
61
2.71G
    }
62
1.67M
    return ss.str();
63
1.67M
}
64
65
unsigned long jsonnet_string_parse_unicode(const LocationRange &loc, const char32_t *c)
66
76.8k
{
67
76.8k
    unsigned long codepoint = 0;
68
    // Expect 4 hex digits.
69
383k
    for (unsigned i = 0; i < 4; ++i) {
70
306k
        auto x = (unsigned char)(c[i]);
71
306k
        unsigned digit;
72
306k
        if (x == '\0') {
73
72
            auto msg = "Truncated unicode escape sequence in string literal.";
74
72
            throw StaticError(loc, msg);
75
306k
        } else if (x >= '0' && x <= '9') {
76
242k
            digit = x - '0';
77
242k
        } else if (x >= 'a' && x <= 'f') {
78
23.8k
            digit = x - 'a' + 10;
79
39.9k
        } else if (x >= 'A' && x <= 'F') {
80
39.7k
            digit = x - 'A' + 10;
81
39.7k
        } else {
82
213
            std::stringstream ss;
83
213
            ss << "Malformed unicode escape character, "
84
213
               << "should be hex: '" << x << "'";
85
213
            throw StaticError(loc, ss.str());
86
213
        }
87
306k
        codepoint *= 16;
88
306k
        codepoint += digit;
89
306k
    }
90
76.5k
    return codepoint;
91
76.8k
}
92
93
bool is_bmp_codepoint(const unsigned long codepoint)
94
75.4k
{
95
75.4k
    return codepoint < 0xd800 || (codepoint >= 0xe000 && codepoint < 0x10000);
96
75.4k
}
97
98
char32_t decode_utf16_surrogates(const LocationRange &loc, const unsigned long high, const unsigned long low)
99
1.06k
{
100
1.06k
    if (high >= 0xd800 && high < 0xdc00 && low >= 0xdc00 && low < 0xe000) {
101
1.02k
        return 0x10000 + ((high & 0x03ff) << 10) + (low & 0x03ff);
102
1.02k
    } else {
103
44
        std::stringstream ss;
104
44
        ss << "Invalid UTF-16 bytes";
105
44
        throw StaticError(loc, ss.str());
106
44
    }
107
1.06k
}
108
109
UString jsonnet_string_unescape(const LocationRange &loc, const UString &s)
110
10.9M
{
111
10.9M
    UString r;
112
10.9M
    const char32_t *s_ptr = s.c_str();
113
187M
    for (const char32_t *c = s_ptr; *c != U'\0'; ++c) {
114
176M
        switch (*c) {
115
1.32M
            case '\\':
116
1.32M
                switch (*(++c)) {
117
50.7k
                    case '"':
118
75.9k
                    case '\'': r += *c; break;
119
120
273k
                    case '\\': r += *c; break;
121
122
7.79k
                    case '/': r += *c; break;
123
124
33.3k
                    case 'b': r += '\b'; break;
125
126
84.4k
                    case 'f': r += '\f'; break;
127
128
644k
                    case 'n': r += '\n'; break;
129
130
46.8k
                    case 'r': r += '\r'; break;
131
132
78.9k
                    case 't': r += '\t'; break;
133
134
75.7k
                    case 'u': {
135
75.7k
                        ++c;  // Consume the 'u'.
136
75.7k
                        unsigned long codepoint = jsonnet_string_parse_unicode(loc, c);
137
138
                        // Leave us on the last char, ready for the ++c at
139
                        // the outer for loop.
140
75.7k
                        c += 3;
141
75.7k
                        if (!is_bmp_codepoint(codepoint)) {
142
1.15k
                           if (*(++c) != '\\') {
143
51
                                std::stringstream ss;
144
51
                                ss << "Invalid non-BMP Unicode escape in string literal";
145
51
                                throw StaticError(loc, ss.str());
146
51
                           }
147
1.10k
                           if (*(++c) != 'u') {
148
21
                                std::stringstream ss;
149
21
                                ss << "Invalid non-BMP Unicode escape in string literal";
150
21
                                throw StaticError(loc, ss.str());
151
21
                           }
152
1.08k
                           ++c;
153
1.08k
                           unsigned long codepoint2 = jsonnet_string_parse_unicode(loc, c);
154
1.08k
                           c += 3;
155
1.08k
                           codepoint = decode_utf16_surrogates(loc, codepoint, codepoint2);
156
1.08k
                       }
157
75.6k
                       r += codepoint;
158
75.6k
                    } break;
159
160
8
                    case '\0': {
161
8
                        auto msg = "Truncated escape sequence in string literal.";
162
8
                        throw StaticError(loc, msg);
163
75.7k
                    }
164
165
402
                    default: {
166
402
                        std::stringstream ss;
167
402
                        std::string utf8;
168
402
                        encode_utf8(*c, utf8);
169
402
                        ss << "Unknown escape sequence in string literal: '" << utf8 << "'";
170
402
                        throw StaticError(loc, ss.str());
171
75.7k
                    }
172
1.32M
                }
173
1.32M
                break;
174
175
175M
            default:
176
                // Just a regular letter.
177
175M
                r += *c;
178
176M
        }
179
176M
    }
180
10.9M
    return r;
181
10.9M
}
182
183
}  // namespace jsonnet::internal