Coverage Report

Created: 2025-11-24 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jsonnet/core/string_utils.cpp
Line
Count
Source
1
/*
2
Copyright 2015 Google Inc. All rights reserved.
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
    http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
*/
16
17
#include <iomanip>
18
19
#include "static_error.h"
20
#include "string_utils.h"
21
22
namespace jsonnet::internal {
23
24
UString jsonnet_string_unparse(const UString &str, bool single)
25
1.95M
{
26
1.95M
    UStringStream ss;
27
1.95M
    ss << (single ? U'\'' : U'\"');
28
1.95M
    ss << jsonnet_string_escape(str, single);
29
1.95M
    ss << (single ? U'\'' : U'\"');
30
1.95M
    return ss.str();
31
1.95M
}
32
33
UString jsonnet_string_escape(const UString &str, bool single)
34
1.95M
{
35
1.95M
    UStringStream ss;
36
3.00G
    for (std::size_t i = 0; i < str.length(); ++i) {
37
3.00G
        char32_t c = str[i];
38
3.00G
        switch (c) {
39
1.70M
            case U'\"': ss << (single ? U"\"" : U"\\\""); break;
40
1.76M
            case U'\'': ss << (single ? U"\\\'" : U"\'"); break;
41
2.55G
            case U'\\': ss << U"\\\\"; break;
42
1.98M
            case U'\b': ss << U"\\b"; break;
43
3.84M
            case U'\f': ss << U"\\f"; break;
44
39.4M
            case U'\n': ss << U"\\n"; break;
45
2.60M
            case U'\r': ss << U"\\r"; break;
46
6.26M
            case U'\t': ss << U"\\t"; break;
47
464k
            case U'\0': ss << U"\\u0000"; break;
48
396M
            default: {
49
396M
                if (c < 0x20 || (c >= 0x7f && c <= 0x9f)) {
50
                    // Unprintable, use \u
51
51.1M
                    std::stringstream ss8;
52
51.1M
                    ss8 << "\\u" << std::hex << std::setfill('0') << std::setw(4)
53
51.1M
                        << (unsigned long)(c);
54
51.1M
                    ss << decode_utf8(ss8.str());
55
344M
                } else {
56
                    // Printable, write verbatim
57
344M
                    ss << c;
58
344M
                }
59
396M
            }
60
3.00G
        }
61
3.00G
    }
62
1.95M
    return ss.str();
63
1.95M
}
64
65
unsigned long jsonnet_string_parse_unicode(const LocationRange &loc, const char32_t *c)
66
83.5k
{
67
83.5k
    unsigned long codepoint = 0;
68
    // Expect 4 hex digits.
69
416k
    for (unsigned i = 0; i < 4; ++i) {
70
333k
        auto x = (unsigned char)(c[i]);
71
333k
        unsigned digit;
72
333k
        if (x == '\0') {
73
70
            auto msg = "Truncated unicode escape sequence in string literal.";
74
70
            throw StaticError(loc, msg);
75
333k
        } else if (x >= '0' && x <= '9') {
76
267k
            digit = x - '0';
77
267k
        } else if (x >= 'a' && x <= 'f') {
78
22.3k
            digit = x - 'a' + 10;
79
43.5k
        } else if (x >= 'A' && x <= 'F') {
80
43.2k
            digit = x - 'A' + 10;
81
43.2k
        } else {
82
231
            std::stringstream ss;
83
231
            ss << "Malformed unicode escape character, "
84
231
               << "should be hex: '" << x << "'";
85
231
            throw StaticError(loc, ss.str());
86
231
        }
87
333k
        codepoint *= 16;
88
333k
        codepoint += digit;
89
333k
    }
90
83.2k
    return codepoint;
91
83.5k
}
92
93
bool is_bmp_codepoint(const unsigned long codepoint)
94
82.1k
{
95
82.1k
    return codepoint < 0xd800 || (codepoint >= 0xe000 && codepoint < 0x10000);
96
82.1k
}
97
98
char32_t decode_utf16_surrogates(const LocationRange &loc, const unsigned long high, const unsigned long low)
99
1.02k
{
100
1.02k
    if (high >= 0xd800 && high < 0xdc00 && low >= 0xdc00 && low < 0xe000) {
101
980
        return 0x10000 + ((high & 0x03ff) << 10) + (low & 0x03ff);
102
980
    } else {
103
43
        std::stringstream ss;
104
43
        ss << "Invalid UTF-16 bytes";
105
43
        throw StaticError(loc, ss.str());
106
43
    }
107
1.02k
}
108
109
UString jsonnet_string_unescape(const LocationRange &loc, const UString &s)
110
11.8M
{
111
11.8M
    UString r;
112
11.8M
    const char32_t *s_ptr = s.c_str();
113
214M
    for (const char32_t *c = s_ptr; *c != U'\0'; ++c) {
114
202M
        switch (*c) {
115
1.43M
            case '\\':
116
1.43M
                switch (*(++c)) {
117
55.2k
                    case '"':
118
83.1k
                    case '\'': r += *c; break;
119
120
293k
                    case '\\': r += *c; break;
121
122
7.70k
                    case '/': r += *c; break;
123
124
35.3k
                    case 'b': r += '\b'; break;
125
126
95.9k
                    case 'f': r += '\f'; break;
127
128
699k
                    case 'n': r += '\n'; break;
129
130
50.9k
                    case 'r': r += '\r'; break;
131
132
88.7k
                    case 't': r += '\t'; break;
133
134
82.4k
                    case 'u': {
135
82.4k
                        ++c;  // Consume the 'u'.
136
82.4k
                        unsigned long codepoint = jsonnet_string_parse_unicode(loc, c);
137
138
                        // Leave us on the last char, ready for the ++c at
139
                        // the outer for loop.
140
82.4k
                        c += 3;
141
82.4k
                        if (!is_bmp_codepoint(codepoint)) {
142
1.11k
                           if (*(++c) != '\\') {
143
52
                                std::stringstream ss;
144
52
                                ss << "Invalid non-BMP Unicode escape in string literal";
145
52
                                throw StaticError(loc, ss.str());
146
52
                           }
147
1.06k
                           if (*(++c) != 'u') {
148
22
                                std::stringstream ss;
149
22
                                ss << "Invalid non-BMP Unicode escape in string literal";
150
22
                                throw StaticError(loc, ss.str());
151
22
                           }
152
1.04k
                           ++c;
153
1.04k
                           unsigned long codepoint2 = jsonnet_string_parse_unicode(loc, c);
154
1.04k
                           c += 3;
155
1.04k
                           codepoint = decode_utf16_surrogates(loc, codepoint, codepoint2);
156
1.04k
                       }
157
82.3k
                       r += codepoint;
158
82.3k
                    } break;
159
160
8
                    case '\0': {
161
8
                        auto msg = "Truncated escape sequence in string literal.";
162
8
                        throw StaticError(loc, msg);
163
82.4k
                    }
164
165
427
                    default: {
166
427
                        std::stringstream ss;
167
427
                        std::string utf8;
168
427
                        encode_utf8(*c, utf8);
169
427
                        ss << "Unknown escape sequence in string literal: '" << utf8 << "'";
170
427
                        throw StaticError(loc, ss.str());
171
82.4k
                    }
172
1.43M
                }
173
1.43M
                break;
174
175
200M
            default:
176
                // Just a regular letter.
177
200M
                r += *c;
178
202M
        }
179
202M
    }
180
11.8M
    return r;
181
11.8M
}
182
183
}  // namespace jsonnet::internal