Coverage Report

Created: 2026-05-27 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/internal/strings.cc
Line
Count
Source
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "internal/strings.h"
16
17
#include <string>
18
19
#include "absl/base/attributes.h"
20
#include "absl/status/status.h"
21
#include "absl/strings/ascii.h"
22
#include "absl/strings/cord.h"
23
#include "absl/strings/escaping.h"
24
#include "absl/strings/match.h"
25
#include "absl/strings/str_cat.h"
26
#include "absl/strings/string_view.h"
27
#include "internal/lexis.h"
28
#include "internal/unicode.h"
29
#include "internal/utf8.h"
30
31
namespace cel::internal {
32
33
namespace {
34
35
constexpr char kHexTable[] = "0123456789abcdef";
36
37
32.3k
constexpr int HexDigitToInt(char x) {
38
32.3k
  if (x > '9') {
39
14.8k
    x += 9;
40
14.8k
  }
41
32.3k
  return x & 0xf;
42
32.3k
}
43
44
9.35k
constexpr bool IsOctalDigit(char x) { return x >= '0' && x <= '7'; }
45
46
// Returns true when following conditions are met:
47
// - <closing_str> is a suffix of <source>.
48
// - No other unescaped occurrence of <closing_str> inside <source> (apart from
49
//   being a suffix).
50
// Returns false otherwise. If <error> is non-NULL, returns an error message in
51
// <error>. If <error_offset> is non-NULL, returns the offset in <source> that
52
// corresponds to the location of the error.
53
bool CheckForClosingString(absl::string_view source,
54
298k
                           absl::string_view closing_str, std::string* error) {
55
298k
  if (closing_str.empty()) return true;
56
57
298k
  const char* p = source.data();
58
298k
  const char* end = p + source.size();
59
60
298k
  bool is_closed = false;
61
12.9M
  while (p + closing_str.length() <= end) {
62
12.6M
    if (*p != '\\') {
63
12.6M
      size_t cur_pos = p - source.data();
64
12.6M
      bool is_closing =
65
12.6M
          absl::StartsWith(absl::ClippedSubstr(source, cur_pos), closing_str);
66
12.6M
      if (is_closing && p + closing_str.length() < end) {
67
0
        if (error) {
68
0
          *error =
69
0
              absl::StrCat("String cannot contain unescaped ", closing_str);
70
0
        }
71
0
        return false;
72
0
      }
73
12.6M
      is_closed = is_closing && (p + closing_str.length() == end);
74
12.6M
    } else {
75
26.0k
      p++;  // Read past the escaped character.
76
26.0k
    }
77
12.6M
    p++;
78
12.6M
  }
79
80
298k
  if (!is_closed) {
81
1.26k
    if (error) {
82
1.26k
      *error = absl::StrCat("String must end with ", closing_str);
83
1.26k
    }
84
1.26k
    return false;
85
1.26k
  }
86
87
297k
  return true;
88
298k
}
89
90
// ----------------------------------------------------------------------
91
// CUnescapeInternal()
92
//    Unescapes C escape sequences and is the reverse of CEscape().
93
//
94
//    If 'source' is valid, stores the unescaped string and its size in
95
//    'dest' and 'dest_len' respectively, and returns true. Otherwise
96
//    returns false and optionally stores the error description in
97
//    'error' and the error offset in 'error_offset'. If 'error' is
98
//    nonempty on return, 'error_offset' is in range [0, str.size()].
99
//    Set 'error' and 'error_offset' to NULL to disable error reporting.
100
//
101
//    'dest' must point to a buffer that is at least as big as 'source'.  The
102
//    unescaped string cannot grow bigger than the source string since no
103
//    unescaped sequence is longer than the corresponding escape sequence.
104
//    'source' and 'dest' must not be the same.
105
//
106
// If <closing_str> is non-empty, for <source> to be valid:
107
// - It must end with <closing_str>.
108
// - Should not contain any other unescaped occurrence of <closing_str>.
109
// ----------------------------------------------------------------------
110
bool UnescapeInternal(absl::string_view source, absl::string_view closing_str,
111
                      bool is_raw_literal, bool is_bytes_literal,
112
298k
                      std::string* dest, std::string* error) {
113
298k
  if (!CheckForClosingString(source, closing_str, error)) {
114
1.26k
    return false;
115
1.26k
  }
116
117
297k
  if (ABSL_PREDICT_FALSE(source.empty())) {
118
0
    *dest = std::string();
119
0
    return true;
120
0
  }
121
122
  // Strip off the closing_str from the end before unescaping.
123
297k
  source = source.substr(0, source.size() - closing_str.size());
124
297k
  if (!is_bytes_literal) {
125
287k
    if (!Utf8IsValid(source)) {
126
0
      if (error) {
127
0
        *error = absl::StrCat("Structurally invalid UTF8 string: ",
128
0
                              EscapeBytes(source));
129
0
      }
130
0
      return false;
131
0
    }
132
287k
  }
133
134
297k
  dest->reserve(source.size());
135
136
297k
  const char* p = source.data();
137
297k
  const char* end = p + source.size();
138
297k
  const char* last_byte = end - 1;
139
140
12.2M
  while (p < end) {
141
11.9M
    if (*p != '\\') {
142
11.9M
      if (*p != '\r') {
143
11.9M
        dest->push_back(*p++);
144
11.9M
      } else {
145
        // All types of newlines in different platforms i.e. '\r', '\n', '\r\n'
146
        // are replaced with '\n'.
147
3.27k
        dest->push_back('\n');
148
3.27k
        p++;
149
3.27k
        if (p < end && *p == '\n') {
150
1.19k
          p++;
151
1.19k
        }
152
3.27k
      }
153
11.9M
    } else {
154
24.5k
      if ((p + 1) > last_byte) {
155
0
        if (error) {
156
0
          *error = is_raw_literal
157
0
                       ? "Raw literals cannot end with odd number of \\"
158
0
                   : is_bytes_literal ? "Bytes literal cannot end with \\"
159
0
                                      : "String literal cannot end with \\";
160
0
        }
161
0
        return false;
162
0
      }
163
24.5k
      if (is_raw_literal) {
164
        // For raw literals, all escapes are valid and those characters ('\\'
165
        // and the escaped character) come through literally in the string.
166
2.52k
        dest->push_back(*p++);
167
2.52k
        dest->push_back(*p++);
168
2.52k
        continue;
169
2.52k
      }
170
      // Any error that occurs in the escape is accounted to the start of
171
      // the escape.
172
22.0k
      p++;  // Read past the escape character.
173
174
22.0k
      switch (*p) {
175
913
        case 'a':
176
913
          dest->push_back('\a');
177
913
          break;
178
699
        case 'b':
179
699
          dest->push_back('\b');
180
699
          break;
181
489
        case 'f':
182
489
          dest->push_back('\f');
183
489
          break;
184
412
        case 'n':
185
412
          dest->push_back('\n');
186
412
          break;
187
426
        case 'r':
188
426
          dest->push_back('\r');
189
426
          break;
190
583
        case 't':
191
583
          dest->push_back('\t');
192
583
          break;
193
656
        case 'v':
194
656
          dest->push_back('\v');
195
656
          break;
196
2.70k
        case '\\':
197
2.70k
          dest->push_back('\\');
198
2.70k
          break;
199
730
        case '?':
200
730
          dest->push_back('\?');
201
730
          break;  // \?  Who knew?
202
552
        case '\'':
203
552
          dest->push_back('\'');
204
552
          break;
205
1.43k
        case '"':
206
1.43k
          dest->push_back('\"');
207
1.43k
          break;
208
794
        case '`':
209
794
          dest->push_back('`');
210
794
          break;
211
1.24k
        case '0':
212
1.24k
          ABSL_FALLTHROUGH_INTENDED;
213
2.04k
        case '1':
214
2.04k
          ABSL_FALLTHROUGH_INTENDED;
215
2.72k
        case '2':
216
2.72k
          ABSL_FALLTHROUGH_INTENDED;
217
3.11k
        case '3': {
218
          // Octal escape '\ddd': requires exactly 3 octal digits.  Note that
219
          // the highest valid escape sequence is '\377'.
220
          // For string literals, octal and hex escape sequences are interpreted
221
          // as unicode code points, and the related UTF8-encoded character is
222
          // added to the destination.  For bytes literals, octal and hex
223
          // escape sequences are interpreted as a single byte value.
224
3.11k
          const char* octal_start = p;
225
3.11k
          if (p + 2 >= end) {
226
0
            if (error) {
227
0
              *error =
228
0
                  "Illegal escape sequence: Octal escape must be followed by 3 "
229
0
                  "octal digits but saw: \\" +
230
0
                  std::string(octal_start, end - p);
231
0
            }
232
            // Error offset was set to the start of the escape above the switch.
233
0
            return false;
234
0
          }
235
3.11k
          const char* octal_end = p + 2;
236
3.11k
          char32_t ch = 0;
237
12.4k
          for (; p <= octal_end; ++p) {
238
9.35k
            if (IsOctalDigit(*p)) {
239
9.35k
              ch = ch * 8 + *p - '0';
240
9.35k
            } else {
241
0
              if (error) {
242
0
                *error =
243
0
                    "Illegal escape sequence: Octal escape must be followed by "
244
0
                    "3 octal digits but saw: \\" +
245
0
                    std::string(octal_start, 3);
246
0
              }
247
              // Error offset was set to the start of the escape above the
248
              // switch.
249
0
              return false;
250
0
            }
251
9.35k
          }
252
3.11k
          p = octal_end;  // p points at last digit.
253
3.11k
          if (is_bytes_literal) {
254
1.44k
            dest->push_back(static_cast<char>(ch));
255
1.67k
          } else {
256
1.67k
            Utf8Encode(*dest, ch);
257
1.67k
          }
258
3.11k
          break;
259
3.11k
        }
260
502
        case 'x':
261
502
          ABSL_FALLTHROUGH_INTENDED;
262
1.64k
        case 'X': {
263
          // Hex escape '\xhh': requires exactly 2 hex digits.
264
          // For string literals, octal and hex escape sequences are
265
          // interpreted as unicode code points, and the related UTF8-encoded
266
          // character is added to the destination.  For bytes literals, octal
267
          // and hex escape sequences are interpreted as a single byte value.
268
1.64k
          const char* hex_start = p;
269
1.64k
          if (p + 2 >= end) {
270
0
            if (error) {
271
0
              *error =
272
0
                  "Illegal escape sequence: Hex escape must be followed by 2 "
273
0
                  "hex digits but saw: \\" +
274
0
                  std::string(hex_start, end - p);
275
0
            }
276
            // Error offset was set to the start of the escape above the switch.
277
0
            return false;
278
0
          }
279
1.64k
          char32_t ch = 0;
280
1.64k
          const char* hex_end = p + 2;
281
4.93k
          for (++p; p <= hex_end; ++p) {
282
3.28k
            if (absl::ascii_isxdigit(*p)) {
283
3.28k
              ch = (ch << 4) + HexDigitToInt(*p);
284
3.28k
            } else {
285
0
              if (error) {
286
0
                *error =
287
0
                    "Illegal escape sequence: Hex escape must be followed by 2 "
288
0
                    "hex digits but saw: \\" +
289
0
                    std::string(hex_start, 3);
290
0
              }
291
              // Error offset was set to the start of the escape above the
292
              // switch.
293
0
              return false;
294
0
            }
295
3.28k
          }
296
1.64k
          p = hex_end;  // p points at last digit.
297
1.64k
          if (is_bytes_literal) {
298
925
            dest->push_back(static_cast<char>(ch));
299
925
          } else {
300
719
            Utf8Encode(*dest, ch);
301
719
          }
302
1.64k
          break;
303
1.64k
        }
304
3.80k
        case 'u': {
305
3.80k
          if (is_bytes_literal) {
306
1.15k
            if (error) {
307
1.15k
              *error =
308
1.15k
                  std::string(
309
1.15k
                      "Illegal escape sequence: Unicode escape sequence \\") +
310
1.15k
                  *p + " cannot be used in bytes literals";
311
1.15k
            }
312
            // Error offset was set to the start of the escape above the switch.
313
1.15k
            return false;
314
1.15k
          }
315
          // \uhhhh => Read 4 hex digits as a code point,
316
          //           then write it as UTF-8 bytes.
317
2.65k
          char32_t cp = 0;
318
2.65k
          const char* hex_start = p;
319
2.65k
          if (p + 4 >= end) {
320
0
            if (error) {
321
0
              *error =
322
0
                  "Illegal escape sequence: \\u must be followed by 4 hex "
323
0
                  "digits but saw: \\" +
324
0
                  std::string(hex_start, end - p);
325
0
            }
326
            // Error offset was set to the start of the escape above the switch.
327
0
            return false;
328
0
          }
329
13.2k
          for (int i = 0; i < 4; ++i) {
330
            // Look one char ahead.
331
10.6k
            if (absl::ascii_isxdigit(p[1])) {
332
10.6k
              cp = (cp << 4) + HexDigitToInt(*++p);  // Advance p.
333
10.6k
            } else {
334
0
              if (error) {
335
0
                *error =
336
0
                    "Illegal escape sequence: \\u must be followed by 4 "
337
0
                    "hex digits but saw: \\" +
338
0
                    std::string(hex_start, 5);
339
0
              }
340
              // Error offset was set to the start of the escape above the
341
              // switch.
342
0
              return false;
343
0
            }
344
10.6k
          }
345
2.65k
          if (!UnicodeIsValid(cp)) {
346
850
            if (error) {
347
850
              *error = "Illegal escape sequence: Unicode value \\" +
348
850
                       std::string(hex_start, 5) + " is invalid";
349
850
            }
350
            // Error offset was set to the start of the escape above the switch.
351
850
            return false;
352
850
          }
353
1.80k
          Utf8Encode(*dest, cp);
354
1.80k
          break;
355
2.65k
        }
356
3.09k
        case 'U': {
357
3.09k
          if (is_bytes_literal) {
358
718
            if (error) {
359
718
              *error =
360
718
                  std::string(
361
718
                      "Illegal escape sequence: Unicode escape sequence \\") +
362
718
                  *p + " cannot be used in bytes literals";
363
718
            }
364
718
            return false;
365
718
          }
366
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8.  Note that the
367
          // first two digits must be 00: The valid range is
368
          // '\U00000000' to '\U0010FFFF' (excluding surrogates).
369
2.37k
          char32_t cp = 0;
370
2.37k
          const char* hex_start = p;
371
2.37k
          if (p + 8 >= end) {
372
0
            if (error) {
373
0
              *error =
374
0
                  "Illegal escape sequence: \\U must be followed by 8 hex "
375
0
                  "digits but saw: \\" +
376
0
                  std::string(hex_start, end - p);
377
0
            }
378
            // Error offset was set to the start of the escape above the switch.
379
0
            return false;
380
0
          }
381
20.1k
          for (int i = 0; i < 8; ++i) {
382
            // Look one char ahead.
383
18.5k
            if (absl::ascii_isxdigit(p[1])) {
384
18.5k
              cp = (cp << 4) + HexDigitToInt(*++p);
385
18.5k
              if (cp > 0x10FFFF) {
386
694
                if (error) {
387
694
                  *error = "Illegal escape sequence: Value of \\" +
388
694
                           std::string(hex_start, 9) +
389
694
                           " exceeds Unicode limit (0x0010FFFF)";
390
694
                }
391
                // Error offset was set to the start of the escape above the
392
                // switch.
393
694
                return false;
394
694
              }
395
18.5k
            } else {
396
0
              if (error) {
397
0
                *error =
398
0
                    "Illegal escape sequence: \\U must be followed by 8 "
399
0
                    "hex digits but saw: \\" +
400
0
                    std::string(hex_start, 9);
401
0
              }
402
              // Error offset was set to the start of the escape above the
403
              // switch.
404
0
              return false;
405
0
            }
406
18.5k
          }
407
1.67k
          if (!UnicodeIsValid(cp)) {
408
599
            if (error) {
409
599
              *error = "Illegal escape sequence: Unicode value \\" +
410
599
                       std::string(hex_start, 9) + " is invalid";
411
599
            }
412
            // Error offset was set to the start of the escape above the switch.
413
599
            return false;
414
599
          }
415
1.08k
          Utf8Encode(*dest, cp);
416
1.08k
          break;
417
1.67k
        }
418
0
        case '\r':
419
0
          ABSL_FALLTHROUGH_INTENDED;
420
0
        case '\n': {
421
0
          if (error) {
422
0
            *error = "Illegal escaped newline";
423
0
          }
424
          // Error offset was set to the start of the escape above the switch.
425
0
          return false;
426
0
        }
427
0
        default: {
428
0
          if (error) {
429
0
            *error = std::string("Illegal escape sequence: \\") + *p;
430
0
          }
431
          // Error offset was set to the start of the escape above the switch.
432
0
          return false;
433
0
        }
434
22.0k
      }
435
18.0k
      p++;  // read past letter we escaped
436
18.0k
    }
437
11.9M
  }
438
439
293k
  dest->shrink_to_fit();
440
441
293k
  return true;
442
297k
}
443
444
std::string EscapeInternal(absl::string_view src, bool escape_all_bytes,
445
2.30k
                           char escape_quote_char) {
446
2.30k
  std::string dest;
447
  // Worst case size is every byte has to be hex escaped, so 4 char for every
448
  // byte.
449
2.30k
  dest.reserve(src.size() * 4);
450
2.30k
  bool last_hex_escape = false;  // true if last output char was \xNN.
451
2.30k
  const char* p = src.data();
452
2.30k
  const char* end = p + src.size();
453
29.6k
  for (; p < end; ++p) {
454
27.3k
    unsigned char c = static_cast<unsigned char>(*p);
455
27.3k
    bool is_hex_escape = false;
456
27.3k
    switch (c) {
457
189
      case '\n':
458
189
        dest.append("\\n");
459
189
        break;
460
98
      case '\r':
461
98
        dest.append("\\r");
462
98
        break;
463
443
      case '\t':
464
443
        dest.append("\\t");
465
443
        break;
466
300
      case '\\':
467
300
        dest.append("\\\\");
468
300
        break;
469
626
      case '\'':
470
626
        ABSL_FALLTHROUGH_INTENDED;
471
1.09k
      case '\"':
472
1.09k
        ABSL_FALLTHROUGH_INTENDED;
473
1.45k
      case '`':
474
        // Escape only quote chars that match escape_quote_char.
475
1.45k
        if (escape_quote_char == 0 || c == escape_quote_char) {
476
423
          dest.push_back('\\');
477
423
        }
478
1.45k
        dest.push_back(c);
479
1.45k
        break;
480
24.8k
      default:
481
        // Note that if we emit \xNN and the src character after that is a hex
482
        // digit then that digit must be escaped too to prevent it being
483
        // interpreted as part of the character code by C.
484
24.8k
        if ((!escape_all_bytes || c < 0x80) &&
485
23.9k
            (!absl::ascii_isprint(c) ||
486
19.0k
             (last_hex_escape && absl::ascii_isxdigit(c)))) {
487
5.49k
          dest.append("\\x");
488
5.49k
          dest.push_back(kHexTable[c / 16]);
489
5.49k
          dest.push_back(kHexTable[c % 16]);
490
5.49k
          is_hex_escape = true;
491
19.4k
        } else {
492
19.4k
          dest.push_back(c);
493
19.4k
          break;
494
19.4k
        }
495
27.3k
    }
496
27.3k
    last_hex_escape = is_hex_escape;
497
27.3k
  }
498
2.30k
  dest.shrink_to_fit();
499
2.30k
  return dest;
500
2.30k
}
501
502
298k
bool MayBeTripleQuotedString(absl::string_view str) {
503
298k
  return (str.size() >= 6 &&
504
18.5k
          ((absl::StartsWith(str, "\"\"\"") && absl::EndsWith(str, "\"\"\"")) ||
505
14.6k
           (absl::StartsWith(str, "'''") && absl::EndsWith(str, "'''"))));
506
298k
}
507
508
288k
bool MayBeStringLiteral(absl::string_view str) {
509
288k
  return (str.size() >= 2 && str[0] == str[str.size() - 1] &&
510
286k
          (str[0] == '\'' || str[0] == '"'));
511
288k
}
512
513
10.5k
bool MayBeBytesLiteral(absl::string_view str) {
514
10.5k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "b") &&
515
10.5k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
516
10.5k
}
517
518
288k
bool MayBeRawStringLiteral(absl::string_view str) {
519
288k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "r") &&
520
1.48k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
521
288k
}
522
523
10.5k
bool MayBeRawBytesLiteral(absl::string_view str) {
524
10.5k
  return (str.size() >= 4 &&
525
8.90k
          (absl::StartsWithIgnoreCase(str, "rb") ||
526
8.90k
           absl::StartsWithIgnoreCase(str, "br")) &&
527
2.40k
          (str[2] == str[str.size() - 1]) && (str[2] == '\'' || str[2] == '"'));
528
10.5k
}
529
530
}  // namespace
531
532
0
absl::StatusOr<std::string> UnescapeString(absl::string_view str) {
533
0
  std::string out;
534
0
  std::string error;
535
0
  if (!UnescapeInternal(str, "", false, false, &out, &error)) {
536
0
    return absl::InvalidArgumentError(
537
0
        absl::StrCat("Invalid escaped string: ", error));
538
0
  }
539
0
  return out;
540
0
}
541
542
0
absl::StatusOr<std::string> UnescapeBytes(absl::string_view str) {
543
0
  std::string out;
544
0
  std::string error;
545
0
  if (!UnescapeInternal(str, "", false, true, &out, &error)) {
546
0
    return absl::InvalidArgumentError(
547
0
        absl::StrCat("Invalid escaped bytes: ", error));
548
0
  }
549
0
  return out;
550
0
}
551
552
0
std::string EscapeString(absl::string_view str) {
553
0
  return EscapeInternal(str, true, '\0');
554
0
}
555
556
std::string EscapeBytes(absl::string_view str, bool escape_all_bytes,
557
0
                        char escape_quote_char) {
558
0
  std::string escaped_bytes;
559
0
  const char* p = str.data();
560
0
  const char* end = p + str.size();
561
0
  for (; p < end; ++p) {
562
0
    unsigned char c = *p;
563
0
    if (escape_all_bytes || !absl::ascii_isprint(c)) {
564
0
      escaped_bytes += "\\x";
565
0
      escaped_bytes += absl::BytesToHexString(absl::string_view(p, 1));
566
0
    } else {
567
0
      switch (c) {
568
        // Note that we only handle printable escape characters here.  All
569
        // unprintable (\n, \r, \t, etc.) are hex escaped above.
570
0
        case '\\':
571
0
          escaped_bytes += "\\\\";
572
0
          break;
573
0
        case '\'':
574
0
        case '"':
575
0
        case '`':
576
          // Escape only quote chars that match escape_quote_char.
577
0
          if (escape_quote_char == 0 || c == escape_quote_char) {
578
0
            escaped_bytes += '\\';
579
0
          }
580
0
          escaped_bytes += c;
581
0
          break;
582
0
        default:
583
0
          escaped_bytes += c;
584
0
          break;
585
0
      }
586
0
    }
587
0
  }
588
0
  return escaped_bytes;
589
0
}
590
591
288k
absl::StatusOr<std::string> ParseStringLiteral(absl::string_view str) {
592
288k
  std::string out;
593
288k
  bool is_string_literal = MayBeStringLiteral(str);
594
288k
  bool is_raw_string_literal = MayBeRawStringLiteral(str);
595
288k
  if (!is_string_literal && !is_raw_string_literal) {
596
0
    return absl::InvalidArgumentError("Invalid string literal");
597
0
  }
598
599
288k
  absl::string_view copy_str = str;
600
288k
  if (is_raw_string_literal) {
601
    // Strip off the prefix 'r' from the raw string content before parsing.
602
1.48k
    copy_str = absl::ClippedSubstr(copy_str, 1);
603
1.48k
  }
604
605
288k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
606
  // Starts after the opening quotes {""", '''} or {", '}.
607
288k
  int quotes_length = is_triple_quoted ? 3 : 1;
608
288k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
609
288k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
610
288k
  std::string error;
611
288k
  if (!UnescapeInternal(copy_str, quotes, is_raw_string_literal, false, &out,
612
288k
                        &error)) {
613
2.89k
    return absl::InvalidArgumentError(
614
2.89k
        absl::StrCat("Invalid string literal: ", error));
615
2.89k
  }
616
285k
  return out;
617
288k
}
618
619
10.5k
absl::StatusOr<std::string> ParseBytesLiteral(absl::string_view str) {
620
10.5k
  std::string out;
621
10.5k
  bool is_bytes_literal = MayBeBytesLiteral(str);
622
10.5k
  bool is_raw_bytes_literal = MayBeRawBytesLiteral(str);
623
10.5k
  if (!is_bytes_literal && !is_raw_bytes_literal) {
624
0
    return absl::InvalidArgumentError("Invalid bytes literal");
625
0
  }
626
627
10.5k
  absl::string_view copy_str = str;
628
10.5k
  if (is_raw_bytes_literal) {
629
    // Strip off the prefix {"rb", "br"} from the raw bytes content before
630
2.40k
    copy_str = absl::ClippedSubstr(copy_str, 2);
631
8.16k
  } else {
632
    // Strip off the prefix 'b' from the bytes content before parsing.
633
8.16k
    copy_str = absl::ClippedSubstr(copy_str, 1);
634
8.16k
  }
635
636
10.5k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
637
  // Starts after the opening quotes {""", '''} or {", '}.
638
10.5k
  int quotes_length = is_triple_quoted ? 3 : 1;
639
10.5k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
640
  // Includes the closing quotes.
641
10.5k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
642
10.5k
  std::string error;
643
10.5k
  if (!UnescapeInternal(copy_str, quotes, is_raw_bytes_literal, true, &out,
644
10.5k
                        &error)) {
645
2.38k
    return absl::InvalidArgumentError(
646
2.38k
        absl::StrCat("Invalid bytes literal: ", error));
647
2.38k
  }
648
8.18k
  return out;
649
10.5k
}
650
651
2.30k
std::string FormatStringLiteral(absl::string_view str) {
652
2.30k
  absl::string_view quote =
653
2.30k
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
654
2.30k
  return absl::StrCat(quote, EscapeInternal(str, true, quote[0]), quote);
655
2.30k
}
656
657
0
std::string FormatStringLiteral(const absl::Cord& str) {
658
0
  if (auto flat = str.TryFlat(); flat) {
659
0
    return FormatStringLiteral(*flat);
660
0
  }
661
0
  return FormatStringLiteral(static_cast<std::string>(str));
662
0
}
663
664
0
std::string FormatSingleQuotedStringLiteral(absl::string_view str) {
665
0
  return absl::StrCat("'", EscapeInternal(str, true, '\''), "'");
666
0
}
667
668
0
std::string FormatDoubleQuotedStringLiteral(absl::string_view str) {
669
0
  return absl::StrCat("\"", EscapeInternal(str, true, '"'), "\"");
670
0
}
671
672
0
std::string FormatBytesLiteral(absl::string_view str) {
673
0
  absl::string_view quote =
674
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
675
0
  return absl::StrCat("b", quote, EscapeBytes(str, false, quote[0]), quote);
676
0
}
677
678
0
std::string FormatSingleQuotedBytesLiteral(absl::string_view str) {
679
0
  return absl::StrCat("b'", EscapeBytes(str, false, '\''), "'");
680
0
}
681
682
0
std::string FormatDoubleQuotedBytesLiteral(absl::string_view str) {
683
0
  return absl::StrCat("b\"", EscapeBytes(str, false, '"'), "\"");
684
0
}
685
686
0
absl::StatusOr<std::string> ParseIdentifier(absl::string_view str) {
687
0
  if (!LexisIsIdentifier(str)) {
688
0
    return absl::InvalidArgumentError("Invalid identifier");
689
0
  }
690
0
  return std::string(str);
691
0
}
692
693
}  // namespace cel::internal