Coverage Report

Created: 2025-12-29 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/internal/strings.cc
Line
Count
Source
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "internal/strings.h"
16
17
#include <string>
18
19
#include "absl/base/attributes.h"
20
#include "absl/status/status.h"
21
#include "absl/strings/ascii.h"
22
#include "absl/strings/cord.h"
23
#include "absl/strings/escaping.h"
24
#include "absl/strings/match.h"
25
#include "absl/strings/str_cat.h"
26
#include "absl/strings/string_view.h"
27
#include "internal/lexis.h"
28
#include "internal/unicode.h"
29
#include "internal/utf8.h"
30
31
namespace cel::internal {
32
33
namespace {
34
35
constexpr char kHexTable[] = "0123456789abcdef";
36
37
29.5k
constexpr int HexDigitToInt(char x) {
38
29.5k
  if (x > '9') {
39
14.9k
    x += 9;
40
14.9k
  }
41
29.5k
  return x & 0xf;
42
29.5k
}
43
44
6.41k
constexpr bool IsOctalDigit(char x) { return x >= '0' && x <= '7'; }
45
46
// Returns true when following conditions are met:
47
// - <closing_str> is a suffix of <source>.
48
// - No other unescaped occurrence of <closing_str> inside <source> (apart from
49
//   being a suffix).
50
// Returns false otherwise. If <error> is non-NULL, returns an error message in
51
// <error>. If <error_offset> is non-NULL, returns the offset in <source> that
52
// corresponds to the location of the error.
53
bool CheckForClosingString(absl::string_view source,
54
295k
                           absl::string_view closing_str, std::string* error) {
55
295k
  if (closing_str.empty()) return true;
56
57
295k
  const char* p = source.data();
58
295k
  const char* end = p + source.size();
59
60
295k
  bool is_closed = false;
61
11.6M
  while (p + closing_str.length() <= end) {
62
11.3M
    if (*p != '\\') {
63
11.3M
      size_t cur_pos = p - source.data();
64
11.3M
      bool is_closing =
65
11.3M
          absl::StartsWith(absl::ClippedSubstr(source, cur_pos), closing_str);
66
11.3M
      if (is_closing && p + closing_str.length() < end) {
67
0
        if (error) {
68
0
          *error =
69
0
              absl::StrCat("String cannot contain unescaped ", closing_str);
70
0
        }
71
0
        return false;
72
0
      }
73
11.3M
      is_closed = is_closing && (p + closing_str.length() == end);
74
11.3M
    } else {
75
22.6k
      p++;  // Read past the escaped character.
76
22.6k
    }
77
11.3M
    p++;
78
11.3M
  }
79
80
295k
  if (!is_closed) {
81
1.49k
    if (error) {
82
1.49k
      *error = absl::StrCat("String must end with ", closing_str);
83
1.49k
    }
84
1.49k
    return false;
85
1.49k
  }
86
87
294k
  return true;
88
295k
}
89
90
// ----------------------------------------------------------------------
91
// CUnescapeInternal()
92
//    Unescapes C escape sequences and is the reverse of CEscape().
93
//
94
//    If 'source' is valid, stores the unescaped string and its size in
95
//    'dest' and 'dest_len' respectively, and returns true. Otherwise
96
//    returns false and optionally stores the error description in
97
//    'error' and the error offset in 'error_offset'. If 'error' is
98
//    nonempty on return, 'error_offset' is in range [0, str.size()].
99
//    Set 'error' and 'error_offset' to NULL to disable error reporting.
100
//
101
//    'dest' must point to a buffer that is at least as big as 'source'.  The
102
//    unescaped string cannot grow bigger than the source string since no
103
//    unescaped sequence is longer than the corresponding escape sequence.
104
//    'source' and 'dest' must not be the same.
105
//
106
// If <closing_str> is non-empty, for <source> to be valid:
107
// - It must end with <closing_str>.
108
// - Should not contain any other unescaped occurrence of <closing_str>.
109
// ----------------------------------------------------------------------
110
bool UnescapeInternal(absl::string_view source, absl::string_view closing_str,
111
                      bool is_raw_literal, bool is_bytes_literal,
112
295k
                      std::string* dest, std::string* error) {
113
295k
  if (!CheckForClosingString(source, closing_str, error)) {
114
1.49k
    return false;
115
1.49k
  }
116
117
294k
  if (ABSL_PREDICT_FALSE(source.empty())) {
118
0
    *dest = std::string();
119
0
    return true;
120
0
  }
121
122
  // Strip off the closing_str from the end before unescaping.
123
294k
  source = source.substr(0, source.size() - closing_str.size());
124
294k
  if (!is_bytes_literal) {
125
287k
    if (!Utf8IsValid(source)) {
126
0
      if (error) {
127
0
        *error = absl::StrCat("Structurally invalid UTF8 string: ",
128
0
                              EscapeBytes(source));
129
0
      }
130
0
      return false;
131
0
    }
132
287k
  }
133
134
294k
  dest->reserve(source.size());
135
136
294k
  const char* p = source.data();
137
294k
  const char* end = p + source.size();
138
294k
  const char* last_byte = end - 1;
139
140
10.8M
  while (p < end) {
141
10.5M
    if (*p != '\\') {
142
10.5M
      if (*p != '\r') {
143
10.5M
        dest->push_back(*p++);
144
10.5M
      } else {
145
        // All types of newlines in different platforms i.e. '\r', '\n', '\r\n'
146
        // are replaced with '\n'.
147
1.87k
        dest->push_back('\n');
148
1.87k
        p++;
149
1.87k
        if (p < end && *p == '\n') {
150
436
          p++;
151
436
        }
152
1.87k
      }
153
10.5M
    } else {
154
20.8k
      if ((p + 1) > last_byte) {
155
0
        if (error) {
156
0
          *error = is_raw_literal
157
0
                       ? "Raw literals cannot end with odd number of \\"
158
0
                   : is_bytes_literal ? "Bytes literal cannot end with \\"
159
0
                                      : "String literal cannot end with \\";
160
0
        }
161
0
        return false;
162
0
      }
163
20.8k
      if (is_raw_literal) {
164
        // For raw literals, all escapes are valid and those characters ('\\'
165
        // and the escaped character) come through literally in the string.
166
3.66k
        dest->push_back(*p++);
167
3.66k
        dest->push_back(*p++);
168
3.66k
        continue;
169
3.66k
      }
170
      // Any error that occurs in the escape is accounted to the start of
171
      // the escape.
172
17.2k
      p++;  // Read past the escape character.
173
174
17.2k
      switch (*p) {
175
520
        case 'a':
176
520
          dest->push_back('\a');
177
520
          break;
178
304
        case 'b':
179
304
          dest->push_back('\b');
180
304
          break;
181
538
        case 'f':
182
538
          dest->push_back('\f');
183
538
          break;
184
197
        case 'n':
185
197
          dest->push_back('\n');
186
197
          break;
187
269
        case 'r':
188
269
          dest->push_back('\r');
189
269
          break;
190
371
        case 't':
191
371
          dest->push_back('\t');
192
371
          break;
193
278
        case 'v':
194
278
          dest->push_back('\v');
195
278
          break;
196
2.69k
        case '\\':
197
2.69k
          dest->push_back('\\');
198
2.69k
          break;
199
295
        case '?':
200
295
          dest->push_back('\?');
201
295
          break;  // \?  Who knew?
202
391
        case '\'':
203
391
          dest->push_back('\'');
204
391
          break;
205
661
        case '"':
206
661
          dest->push_back('\"');
207
661
          break;
208
367
        case '`':
209
367
          dest->push_back('`');
210
367
          break;
211
500
        case '0':
212
500
          ABSL_FALLTHROUGH_INTENDED;
213
1.07k
        case '1':
214
1.07k
          ABSL_FALLTHROUGH_INTENDED;
215
1.84k
        case '2':
216
1.84k
          ABSL_FALLTHROUGH_INTENDED;
217
2.13k
        case '3': {
218
          // Octal escape '\ddd': requires exactly 3 octal digits.  Note that
219
          // the highest valid escape sequence is '\377'.
220
          // For string literals, octal and hex escape sequences are interpreted
221
          // as unicode code points, and the related UTF8-encoded character is
222
          // added to the destination.  For bytes literals, octal and hex
223
          // escape sequences are interpreted as a single byte value.
224
2.13k
          const char* octal_start = p;
225
2.13k
          if (p + 2 >= end) {
226
0
            if (error) {
227
0
              *error =
228
0
                  "Illegal escape sequence: Octal escape must be followed by 3 "
229
0
                  "octal digits but saw: \\" +
230
0
                  std::string(octal_start, end - p);
231
0
            }
232
            // Error offset was set to the start of the escape above the switch.
233
0
            return false;
234
0
          }
235
2.13k
          const char* octal_end = p + 2;
236
2.13k
          char32_t ch = 0;
237
8.55k
          for (; p <= octal_end; ++p) {
238
6.41k
            if (IsOctalDigit(*p)) {
239
6.41k
              ch = ch * 8 + *p - '0';
240
6.41k
            } else {
241
0
              if (error) {
242
0
                *error =
243
0
                    "Illegal escape sequence: Octal escape must be followed by "
244
0
                    "3 octal digits but saw: \\" +
245
0
                    std::string(octal_start, 3);
246
0
              }
247
              // Error offset was set to the start of the escape above the
248
              // switch.
249
0
              return false;
250
0
            }
251
6.41k
          }
252
2.13k
          p = octal_end;  // p points at last digit.
253
2.13k
          if (is_bytes_literal) {
254
1.06k
            dest->push_back(static_cast<char>(ch));
255
1.07k
          } else {
256
1.07k
            Utf8Encode(*dest, ch);
257
1.07k
          }
258
2.13k
          break;
259
2.13k
        }
260
282
        case 'x':
261
282
          ABSL_FALLTHROUGH_INTENDED;
262
924
        case 'X': {
263
          // Hex escape '\xhh': requires exactly 2 hex digits.
264
          // For string literals, octal and hex escape sequences are
265
          // interpreted as unicode code points, and the related UTF8-encoded
266
          // character is added to the destination.  For bytes literals, octal
267
          // and hex escape sequences are interpreted as a single byte value.
268
924
          const char* hex_start = p;
269
924
          if (p + 2 >= end) {
270
0
            if (error) {
271
0
              *error =
272
0
                  "Illegal escape sequence: Hex escape must be followed by 2 "
273
0
                  "hex digits but saw: \\" +
274
0
                  std::string(hex_start, end - p);
275
0
            }
276
            // Error offset was set to the start of the escape above the switch.
277
0
            return false;
278
0
          }
279
924
          char32_t ch = 0;
280
924
          const char* hex_end = p + 2;
281
2.77k
          for (++p; p <= hex_end; ++p) {
282
1.84k
            if (absl::ascii_isxdigit(*p)) {
283
1.84k
              ch = (ch << 4) + HexDigitToInt(*p);
284
1.84k
            } else {
285
0
              if (error) {
286
0
                *error =
287
0
                    "Illegal escape sequence: Hex escape must be followed by 2 "
288
0
                    "hex digits but saw: \\" +
289
0
                    std::string(hex_start, 3);
290
0
              }
291
              // Error offset was set to the start of the escape above the
292
              // switch.
293
0
              return false;
294
0
            }
295
1.84k
          }
296
924
          p = hex_end;  // p points at last digit.
297
924
          if (is_bytes_literal) {
298
313
            dest->push_back(static_cast<char>(ch));
299
611
          } else {
300
611
            Utf8Encode(*dest, ch);
301
611
          }
302
924
          break;
303
924
        }
304
4.21k
        case 'u': {
305
4.21k
          if (is_bytes_literal) {
306
1.95k
            if (error) {
307
1.95k
              *error =
308
1.95k
                  std::string(
309
1.95k
                      "Illegal escape sequence: Unicode escape sequence \\") +
310
1.95k
                  *p + " cannot be used in bytes literals";
311
1.95k
            }
312
            // Error offset was set to the start of the escape above the switch.
313
1.95k
            return false;
314
1.95k
          }
315
          // \uhhhh => Read 4 hex digits as a code point,
316
          //           then write it as UTF-8 bytes.
317
2.26k
          char32_t cp = 0;
318
2.26k
          const char* hex_start = p;
319
2.26k
          if (p + 4 >= end) {
320
0
            if (error) {
321
0
              *error =
322
0
                  "Illegal escape sequence: \\u must be followed by 4 hex "
323
0
                  "digits but saw: \\" +
324
0
                  std::string(hex_start, end - p);
325
0
            }
326
            // Error offset was set to the start of the escape above the switch.
327
0
            return false;
328
0
          }
329
11.3k
          for (int i = 0; i < 4; ++i) {
330
            // Look one char ahead.
331
9.05k
            if (absl::ascii_isxdigit(p[1])) {
332
9.05k
              cp = (cp << 4) + HexDigitToInt(*++p);  // Advance p.
333
9.05k
            } else {
334
0
              if (error) {
335
0
                *error =
336
0
                    "Illegal escape sequence: \\u must be followed by 4 "
337
0
                    "hex digits but saw: \\" +
338
0
                    std::string(hex_start, 5);
339
0
              }
340
              // Error offset was set to the start of the escape above the
341
              // switch.
342
0
              return false;
343
0
            }
344
9.05k
          }
345
2.26k
          if (!UnicodeIsValid(cp)) {
346
916
            if (error) {
347
916
              *error = "Illegal escape sequence: Unicode value \\" +
348
916
                       std::string(hex_start, 5) + " is invalid";
349
916
            }
350
            // Error offset was set to the start of the escape above the switch.
351
916
            return false;
352
916
          }
353
1.34k
          Utf8Encode(*dest, cp);
354
1.34k
          break;
355
2.26k
        }
356
3.05k
        case 'U': {
357
3.05k
          if (is_bytes_literal) {
358
659
            if (error) {
359
659
              *error =
360
659
                  std::string(
361
659
                      "Illegal escape sequence: Unicode escape sequence \\") +
362
659
                  *p + " cannot be used in bytes literals";
363
659
            }
364
659
            return false;
365
659
          }
366
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8.  Note that the
367
          // first two digits must be 00: The valid range is
368
          // '\U00000000' to '\U0010FFFF' (excluding surrogates).
369
2.39k
          char32_t cp = 0;
370
2.39k
          const char* hex_start = p;
371
2.39k
          if (p + 8 >= end) {
372
0
            if (error) {
373
0
              *error =
374
0
                  "Illegal escape sequence: \\U must be followed by 8 hex "
375
0
                  "digits but saw: \\" +
376
0
                  std::string(hex_start, end - p);
377
0
            }
378
            // Error offset was set to the start of the escape above the switch.
379
0
            return false;
380
0
          }
381
20.2k
          for (int i = 0; i < 8; ++i) {
382
            // Look one char ahead.
383
18.6k
            if (absl::ascii_isxdigit(p[1])) {
384
18.6k
              cp = (cp << 4) + HexDigitToInt(*++p);
385
18.6k
              if (cp > 0x10FFFF) {
386
768
                if (error) {
387
768
                  *error = "Illegal escape sequence: Value of \\" +
388
768
                           std::string(hex_start, 9) +
389
768
                           " exceeds Unicode limit (0x0010FFFF)";
390
768
                }
391
                // Error offset was set to the start of the escape above the
392
                // switch.
393
768
                return false;
394
768
              }
395
18.6k
            } else {
396
0
              if (error) {
397
0
                *error =
398
0
                    "Illegal escape sequence: \\U must be followed by 8 "
399
0
                    "hex digits but saw: \\" +
400
0
                    std::string(hex_start, 9);
401
0
              }
402
              // Error offset was set to the start of the escape above the
403
              // switch.
404
0
              return false;
405
0
            }
406
18.6k
          }
407
1.62k
          if (!UnicodeIsValid(cp)) {
408
617
            if (error) {
409
617
              *error = "Illegal escape sequence: Unicode value \\" +
410
617
                       std::string(hex_start, 9) + " is invalid";
411
617
            }
412
            // Error offset was set to the start of the escape above the switch.
413
617
            return false;
414
617
          }
415
1.00k
          Utf8Encode(*dest, cp);
416
1.00k
          break;
417
1.62k
        }
418
0
        case '\r':
419
0
          ABSL_FALLTHROUGH_INTENDED;
420
0
        case '\n': {
421
0
          if (error) {
422
0
            *error = "Illegal escaped newline";
423
0
          }
424
          // Error offset was set to the start of the escape above the switch.
425
0
          return false;
426
0
        }
427
0
        default: {
428
0
          if (error) {
429
0
            *error = std::string("Illegal escape sequence: \\") + *p;
430
0
          }
431
          // Error offset was set to the start of the escape above the switch.
432
0
          return false;
433
0
        }
434
17.2k
      }
435
12.3k
      p++;  // read past letter we escaped
436
12.3k
    }
437
10.5M
  }
438
439
289k
  dest->shrink_to_fit();
440
441
289k
  return true;
442
294k
}
443
444
std::string EscapeInternal(absl::string_view src, bool escape_all_bytes,
445
0
                           char escape_quote_char) {
446
0
  std::string dest;
447
  // Worst case size is every byte has to be hex escaped, so 4 char for every
448
  // byte.
449
0
  dest.reserve(src.size() * 4);
450
0
  bool last_hex_escape = false;  // true if last output char was \xNN.
451
0
  const char* p = src.data();
452
0
  const char* end = p + src.size();
453
0
  for (; p < end; ++p) {
454
0
    unsigned char c = static_cast<unsigned char>(*p);
455
0
    bool is_hex_escape = false;
456
0
    switch (c) {
457
0
      case '\n':
458
0
        dest.append("\\n");
459
0
        break;
460
0
      case '\r':
461
0
        dest.append("\\r");
462
0
        break;
463
0
      case '\t':
464
0
        dest.append("\\t");
465
0
        break;
466
0
      case '\\':
467
0
        dest.append("\\\\");
468
0
        break;
469
0
      case '\'':
470
0
        ABSL_FALLTHROUGH_INTENDED;
471
0
      case '\"':
472
0
        ABSL_FALLTHROUGH_INTENDED;
473
0
      case '`':
474
        // Escape only quote chars that match escape_quote_char.
475
0
        if (escape_quote_char == 0 || c == escape_quote_char) {
476
0
          dest.push_back('\\');
477
0
        }
478
0
        dest.push_back(c);
479
0
        break;
480
0
      default:
481
        // Note that if we emit \xNN and the src character after that is a hex
482
        // digit then that digit must be escaped too to prevent it being
483
        // interpreted as part of the character code by C.
484
0
        if ((!escape_all_bytes || c < 0x80) &&
485
0
            (!absl::ascii_isprint(c) ||
486
0
             (last_hex_escape && absl::ascii_isxdigit(c)))) {
487
0
          dest.append("\\x");
488
0
          dest.push_back(kHexTable[c / 16]);
489
0
          dest.push_back(kHexTable[c % 16]);
490
0
          is_hex_escape = true;
491
0
        } else {
492
0
          dest.push_back(c);
493
0
          break;
494
0
        }
495
0
    }
496
0
    last_hex_escape = is_hex_escape;
497
0
  }
498
0
  dest.shrink_to_fit();
499
0
  return dest;
500
0
}
501
502
295k
bool MayBeTripleQuotedString(absl::string_view str) {
503
295k
  return (str.size() >= 6 &&
504
10.1k
          ((absl::StartsWith(str, "\"\"\"") && absl::EndsWith(str, "\"\"\"")) ||
505
9.41k
           (absl::StartsWith(str, "'''") && absl::EndsWith(str, "'''"))));
506
295k
}
507
508
288k
bool MayBeStringLiteral(absl::string_view str) {
509
288k
  return (str.size() >= 2 && str[0] == str[str.size() - 1] &&
510
286k
          (str[0] == '\'' || str[0] == '"'));
511
288k
}
512
513
7.21k
bool MayBeBytesLiteral(absl::string_view str) {
514
7.21k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "b") &&
515
7.21k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
516
7.21k
}
517
518
288k
bool MayBeRawStringLiteral(absl::string_view str) {
519
288k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "r") &&
520
1.61k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
521
288k
}
522
523
7.21k
bool MayBeRawBytesLiteral(absl::string_view str) {
524
7.21k
  return (str.size() >= 4 &&
525
6.32k
          (absl::StartsWithIgnoreCase(str, "rb") ||
526
6.32k
           absl::StartsWithIgnoreCase(str, "br")) &&
527
666
          (str[2] == str[str.size() - 1]) && (str[2] == '\'' || str[2] == '"'));
528
7.21k
}
529
530
}  // namespace
531
532
0
absl::StatusOr<std::string> UnescapeString(absl::string_view str) {
533
0
  std::string out;
534
0
  std::string error;
535
0
  if (!UnescapeInternal(str, "", false, false, &out, &error)) {
536
0
    return absl::InvalidArgumentError(
537
0
        absl::StrCat("Invalid escaped string: ", error));
538
0
  }
539
0
  return out;
540
0
}
541
542
0
absl::StatusOr<std::string> UnescapeBytes(absl::string_view str) {
543
0
  std::string out;
544
0
  std::string error;
545
0
  if (!UnescapeInternal(str, "", false, true, &out, &error)) {
546
0
    return absl::InvalidArgumentError(
547
0
        absl::StrCat("Invalid escaped bytes: ", error));
548
0
  }
549
0
  return out;
550
0
}
551
552
0
std::string EscapeString(absl::string_view str) {
553
0
  return EscapeInternal(str, true, '\0');
554
0
}
555
556
std::string EscapeBytes(absl::string_view str, bool escape_all_bytes,
557
0
                        char escape_quote_char) {
558
0
  std::string escaped_bytes;
559
0
  const char* p = str.data();
560
0
  const char* end = p + str.size();
561
0
  for (; p < end; ++p) {
562
0
    unsigned char c = *p;
563
0
    if (escape_all_bytes || !absl::ascii_isprint(c)) {
564
0
      escaped_bytes += "\\x";
565
0
      escaped_bytes += absl::BytesToHexString(absl::string_view(p, 1));
566
0
    } else {
567
0
      switch (c) {
568
        // Note that we only handle printable escape characters here.  All
569
        // unprintable (\n, \r, \t, etc.) are hex escaped above.
570
0
        case '\\':
571
0
          escaped_bytes += "\\\\";
572
0
          break;
573
0
        case '\'':
574
0
        case '"':
575
0
        case '`':
576
          // Escape only quote chars that match escape_quote_char.
577
0
          if (escape_quote_char == 0 || c == escape_quote_char) {
578
0
            escaped_bytes += '\\';
579
0
          }
580
0
          escaped_bytes += c;
581
0
          break;
582
0
        default:
583
0
          escaped_bytes += c;
584
0
          break;
585
0
      }
586
0
    }
587
0
  }
588
0
  return escaped_bytes;
589
0
}
590
591
288k
absl::StatusOr<std::string> ParseStringLiteral(absl::string_view str) {
592
288k
  std::string out;
593
288k
  bool is_string_literal = MayBeStringLiteral(str);
594
288k
  bool is_raw_string_literal = MayBeRawStringLiteral(str);
595
288k
  if (!is_string_literal && !is_raw_string_literal) {
596
0
    return absl::InvalidArgumentError("Invalid string literal");
597
0
  }
598
599
288k
  absl::string_view copy_str = str;
600
288k
  if (is_raw_string_literal) {
601
    // Strip off the prefix 'r' from the raw string content before parsing.
602
1.61k
    copy_str = absl::ClippedSubstr(copy_str, 1);
603
1.61k
  }
604
605
288k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
606
  // Starts after the opening quotes {""", '''} or {", '}.
607
288k
  int quotes_length = is_triple_quoted ? 3 : 1;
608
288k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
609
288k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
610
288k
  std::string error;
611
288k
  if (!UnescapeInternal(copy_str, quotes, is_raw_string_literal, false, &out,
612
288k
                        &error)) {
613
3.43k
    return absl::InvalidArgumentError(
614
3.43k
        absl::StrCat("Invalid string literal: ", error));
615
3.43k
  }
616
285k
  return out;
617
288k
}
618
619
7.21k
absl::StatusOr<std::string> ParseBytesLiteral(absl::string_view str) {
620
7.21k
  std::string out;
621
7.21k
  bool is_bytes_literal = MayBeBytesLiteral(str);
622
7.21k
  bool is_raw_bytes_literal = MayBeRawBytesLiteral(str);
623
7.21k
  if (!is_bytes_literal && !is_raw_bytes_literal) {
624
0
    return absl::InvalidArgumentError("Invalid bytes literal");
625
0
  }
626
627
7.21k
  absl::string_view copy_str = str;
628
7.21k
  if (is_raw_bytes_literal) {
629
    // Strip off the prefix {"rb", "br"} from the raw bytes content before
630
666
    copy_str = absl::ClippedSubstr(copy_str, 2);
631
6.54k
  } else {
632
    // Strip off the prefix 'b' from the bytes content before parsing.
633
6.54k
    copy_str = absl::ClippedSubstr(copy_str, 1);
634
6.54k
  }
635
636
7.21k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
637
  // Starts after the opening quotes {""", '''} or {", '}.
638
7.21k
  int quotes_length = is_triple_quoted ? 3 : 1;
639
7.21k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
640
  // Includes the closing quotes.
641
7.21k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
642
7.21k
  std::string error;
643
7.21k
  if (!UnescapeInternal(copy_str, quotes, is_raw_bytes_literal, true, &out,
644
7.21k
                        &error)) {
645
2.97k
    return absl::InvalidArgumentError(
646
2.97k
        absl::StrCat("Invalid bytes literal: ", error));
647
2.97k
  }
648
4.23k
  return out;
649
7.21k
}
650
651
0
std::string FormatStringLiteral(absl::string_view str) {
652
0
  absl::string_view quote =
653
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
654
0
  return absl::StrCat(quote, EscapeInternal(str, true, quote[0]), quote);
655
0
}
656
657
0
std::string FormatStringLiteral(const absl::Cord& str) {
658
0
  if (auto flat = str.TryFlat(); flat) {
659
0
    return FormatStringLiteral(*flat);
660
0
  }
661
0
  return FormatStringLiteral(static_cast<std::string>(str));
662
0
}
663
664
0
std::string FormatSingleQuotedStringLiteral(absl::string_view str) {
665
0
  return absl::StrCat("'", EscapeInternal(str, true, '\''), "'");
666
0
}
667
668
0
std::string FormatDoubleQuotedStringLiteral(absl::string_view str) {
669
0
  return absl::StrCat("\"", EscapeInternal(str, true, '"'), "\"");
670
0
}
671
672
0
std::string FormatBytesLiteral(absl::string_view str) {
673
0
  absl::string_view quote =
674
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
675
0
  return absl::StrCat("b", quote, EscapeBytes(str, false, quote[0]), quote);
676
0
}
677
678
0
std::string FormatSingleQuotedBytesLiteral(absl::string_view str) {
679
0
  return absl::StrCat("b'", EscapeBytes(str, false, '\''), "'");
680
0
}
681
682
0
std::string FormatDoubleQuotedBytesLiteral(absl::string_view str) {
683
0
  return absl::StrCat("b\"", EscapeBytes(str, false, '"'), "\"");
684
0
}
685
686
0
absl::StatusOr<std::string> ParseIdentifier(absl::string_view str) {
687
0
  if (!LexisIsIdentifier(str)) {
688
0
    return absl::InvalidArgumentError("Invalid identifier");
689
0
  }
690
0
  return std::string(str);
691
0
}
692
693
}  // namespace cel::internal