Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jsonnet/core/string_utils.cpp
Line
Count
Source
1
/*
2
Copyright 2015 Google Inc. All rights reserved.
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
    http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
*/
16
17
#include <iomanip>
18
19
#include "static_error.h"
20
#include "string_utils.h"
21
22
namespace jsonnet::internal {
23
24
UString jsonnet_string_unparse(const UString &str, bool single)
25
1.29M
{
26
1.29M
    UStringStream ss;
27
1.29M
    ss << (single ? U'\'' : U'\"');
28
1.29M
    ss << jsonnet_string_escape(str, single);
29
1.29M
    ss << (single ? U'\'' : U'\"');
30
1.29M
    return ss.str();
31
1.29M
}
32
33
UString jsonnet_string_escape(const UString &str, bool single)
34
1.29M
{
35
1.29M
    UStringStream ss;
36
2.32G
    for (std::size_t i = 0; i < str.length(); ++i) {
37
2.32G
        char32_t c = str[i];
38
2.32G
        switch (c) {
39
1.28M
            case U'\"': ss << (single ? U"\"" : U"\\\""); break;
40
1.26M
            case U'\'': ss << (single ? U"\\\'" : U"\'"); break;
41
2.15G
            case U'\\': ss << U"\\\\"; break;
42
1.41M
            case U'\b': ss << U"\\b"; break;
43
1.66M
            case U'\f': ss << U"\\f"; break;
44
19.5M
            case U'\n': ss << U"\\n"; break;
45
721k
            case U'\r': ss << U"\\r"; break;
46
2.12M
            case U'\t': ss << U"\\t"; break;
47
169k
            case U'\0': ss << U"\\u0000"; break;
48
142M
            default: {
49
142M
                if (c < 0x20 || (c >= 0x7f && c <= 0x9f)) {
50
                    // Unprintable, use \u
51
27.8M
                    std::stringstream ss8;
52
27.8M
                    ss8 << "\\u" << std::hex << std::setfill('0') << std::setw(4)
53
27.8M
                        << (unsigned long)(c);
54
27.8M
                    ss << decode_utf8(ss8.str());
55
114M
                } else {
56
                    // Printable, write verbatim
57
114M
                    ss << c;
58
114M
                }
59
142M
            }
60
2.32G
        }
61
2.32G
    }
62
1.29M
    return ss.str();
63
1.29M
}
64
65
unsigned long jsonnet_string_parse_unicode(const LocationRange &loc, const char32_t *c)
66
70.4k
{
67
70.4k
    unsigned long codepoint = 0;
68
    // Expect 4 hex digits.
69
351k
    for (unsigned i = 0; i < 4; ++i) {
70
281k
        auto x = (unsigned char)(c[i]);
71
281k
        unsigned digit;
72
281k
        if (x == '\0') {
73
54
            auto msg = "Truncated unicode escape sequence in string literal.";
74
54
            throw StaticError(loc, msg);
75
281k
        } else if (x >= '0' && x <= '9') {
76
220k
            digit = x - '0';
77
220k
        } else if (x >= 'a' && x <= 'f') {
78
25.9k
            digit = x - 'a' + 10;
79
35.1k
        } else if (x >= 'A' && x <= 'F') {
80
34.9k
            digit = x - 'A' + 10;
81
34.9k
        } else {
82
117
            std::stringstream ss;
83
117
            ss << "Malformed unicode escape character, "
84
117
               << "should be hex: '" << x << "'";
85
117
            throw StaticError(loc, ss.str());
86
117
        }
87
281k
        codepoint *= 16;
88
281k
        codepoint += digit;
89
281k
    }
90
70.3k
    return codepoint;
91
70.4k
}
92
93
bool is_bmp_codepoint(const unsigned long codepoint)
94
69.2k
{
95
69.2k
    return codepoint < 0xd800 || (codepoint >= 0xe000 && codepoint < 0x10000);
96
69.2k
}
97
98
char32_t decode_utf16_surrogates(const LocationRange &loc, const unsigned long high, const unsigned long low)
99
1.10k
{
100
1.10k
    if (high >= 0xd800 && high < 0xdc00 && low >= 0xdc00 && low < 0xe000) {
101
1.06k
        return 0x10000 + ((high & 0x03ff) << 10) + (low & 0x03ff);
102
1.06k
    } else {
103
41
        std::stringstream ss;
104
41
        ss << "Invalid UTF-16 bytes";
105
41
        throw StaticError(loc, ss.str());
106
41
    }
107
1.10k
}
108
109
UString jsonnet_string_unescape(const LocationRange &loc, const UString &s)
110
10.3M
{
111
10.3M
    UString r;
112
10.3M
    const char32_t *s_ptr = s.c_str();
113
190M
    for (const char32_t *c = s_ptr; *c != U'\0'; ++c) {
114
180M
        switch (*c) {
115
1.26M
            case '\\':
116
1.26M
                switch (*(++c)) {
117
46.8k
                    case '"':
118
60.5k
                    case '\'': r += *c; break;
119
120
260k
                    case '\\': r += *c; break;
121
122
7.27k
                    case '/': r += *c; break;
123
124
31.8k
                    case 'b': r += '\b'; break;
125
126
85.3k
                    case 'f': r += '\f'; break;
127
128
605k
                    case 'n': r += '\n'; break;
129
130
44.7k
                    case 'r': r += '\r'; break;
131
132
99.2k
                    case 't': r += '\t'; break;
133
134
69.3k
                    case 'u': {
135
69.3k
                        ++c;  // Consume the 'u'.
136
69.3k
                        unsigned long codepoint = jsonnet_string_parse_unicode(loc, c);
137
138
                        // Leave us on the last char, ready for the ++c at
139
                        // the outer for loop.
140
69.3k
                        c += 3;
141
69.3k
                        if (!is_bmp_codepoint(codepoint)) {
142
1.17k
                           if (*(++c) != '\\') {
143
42
                                std::stringstream ss;
144
42
                                ss << "Invalid non-BMP Unicode escape in string literal";
145
42
                                throw StaticError(loc, ss.str());
146
42
                           }
147
1.13k
                           if (*(++c) != 'u') {
148
17
                                std::stringstream ss;
149
17
                                ss << "Invalid non-BMP Unicode escape in string literal";
150
17
                                throw StaticError(loc, ss.str());
151
17
                           }
152
1.11k
                           ++c;
153
1.11k
                           unsigned long codepoint2 = jsonnet_string_parse_unicode(loc, c);
154
1.11k
                           c += 3;
155
1.11k
                           codepoint = decode_utf16_surrogates(loc, codepoint, codepoint2);
156
1.11k
                       }
157
69.3k
                       r += codepoint;
158
69.3k
                    } break;
159
160
11
                    case '\0': {
161
11
                        auto msg = "Truncated escape sequence in string literal.";
162
11
                        throw StaticError(loc, msg);
163
69.3k
                    }
164
165
397
                    default: {
166
397
                        std::stringstream ss;
167
397
                        std::string utf8;
168
397
                        encode_utf8(*c, utf8);
169
397
                        ss << "Unknown escape sequence in string literal: '" << utf8 << "'";
170
397
                        throw StaticError(loc, ss.str());
171
69.3k
                    }
172
1.26M
                }
173
1.26M
                break;
174
175
179M
            default:
176
                // Just a regular letter.
177
179M
                r += *c;
178
180M
        }
179
180M
    }
180
10.3M
    return r;
181
10.3M
}
182
183
}  // namespace jsonnet::internal