Coverage Report

Created: 2025-11-29 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/internal/strings.cc
Line
Count
Source
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "internal/strings.h"
16
17
#include <string>
18
19
#include "absl/base/attributes.h"
20
#include "absl/status/status.h"
21
#include "absl/strings/ascii.h"
22
#include "absl/strings/cord.h"
23
#include "absl/strings/escaping.h"
24
#include "absl/strings/match.h"
25
#include "absl/strings/str_cat.h"
26
#include "absl/strings/string_view.h"
27
#include "internal/lexis.h"
28
#include "internal/unicode.h"
29
#include "internal/utf8.h"
30
31
namespace cel::internal {
32
33
namespace {
34
35
constexpr char kHexTable[] = "0123456789abcdef";
36
37
11.1k
constexpr int HexDigitToInt(char x) {
38
11.1k
  if (x > '9') {
39
5.00k
    x += 9;
40
5.00k
  }
41
11.1k
  return x & 0xf;
42
11.1k
}
43
44
4.59k
constexpr bool IsOctalDigit(char x) { return x >= '0' && x <= '7'; }
45
46
// Returns true when following conditions are met:
47
// - <closing_str> is a suffix of <source>.
48
// - No other unescaped occurrence of <closing_str> inside <source> (apart from
49
//   being a suffix).
50
// Returns false otherwise. If <error> is non-NULL, returns an error message in
51
// <error>. If <error_offset> is non-NULL, returns the offset in <source> that
52
// corresponds to the location of the error.
53
bool CheckForClosingString(absl::string_view source,
54
395k
                           absl::string_view closing_str, std::string* error) {
55
395k
  if (closing_str.empty()) return true;
56
57
395k
  const char* p = source.data();
58
395k
  const char* end = p + source.size();
59
60
395k
  bool is_closed = false;
61
4.22M
  while (p + closing_str.length() <= end) {
62
3.83M
    if (*p != '\\') {
63
3.81M
      size_t cur_pos = p - source.data();
64
3.81M
      bool is_closing =
65
3.81M
          absl::StartsWith(absl::ClippedSubstr(source, cur_pos), closing_str);
66
3.81M
      if (is_closing && p + closing_str.length() < end) {
67
0
        if (error) {
68
0
          *error =
69
0
              absl::StrCat("String cannot contain unescaped ", closing_str);
70
0
        }
71
0
        return false;
72
0
      }
73
3.81M
      is_closed = is_closing && (p + closing_str.length() == end);
74
3.81M
    } else {
75
15.1k
      p++;  // Read past the escaped character.
76
15.1k
    }
77
3.83M
    p++;
78
3.83M
  }
79
80
395k
  if (!is_closed) {
81
780
    if (error) {
82
780
      *error = absl::StrCat("String must end with ", closing_str);
83
780
    }
84
780
    return false;
85
780
  }
86
87
394k
  return true;
88
395k
}
89
90
// ----------------------------------------------------------------------
91
// CUnescapeInternal()
92
//    Unescapes C escape sequences and is the reverse of CEscape().
93
//
94
//    If 'source' is valid, stores the unescaped string and its size in
95
//    'dest' and 'dest_len' respectively, and returns true. Otherwise
96
//    returns false and optionally stores the error description in
97
//    'error' and the error offset in 'error_offset'. If 'error' is
98
//    nonempty on return, 'error_offset' is in range [0, str.size()].
99
//    Set 'error' and 'error_offset' to NULL to disable error reporting.
100
//
101
//    'dest' must point to a buffer that is at least as big as 'source'.  The
102
//    unescaped string cannot grow bigger than the source string since no
103
//    unescaped sequence is longer than the corresponding escape sequence.
104
//    'source' and 'dest' must not be the same.
105
//
106
// If <closing_str> is non-empty, for <source> to be valid:
107
// - It must end with <closing_str>.
108
// - Should not contain any other unescaped occurrence of <closing_str>.
109
// ----------------------------------------------------------------------
110
bool UnescapeInternal(absl::string_view source, absl::string_view closing_str,
111
                      bool is_raw_literal, bool is_bytes_literal,
112
395k
                      std::string* dest, std::string* error) {
113
395k
  if (!CheckForClosingString(source, closing_str, error)) {
114
780
    return false;
115
780
  }
116
117
394k
  if (ABSL_PREDICT_FALSE(source.empty())) {
118
0
    *dest = std::string();
119
0
    return true;
120
0
  }
121
122
  // Strip off the closing_str from the end before unescaping.
123
394k
  source = source.substr(0, source.size() - closing_str.size());
124
394k
  if (!is_bytes_literal) {
125
392k
    if (!Utf8IsValid(source)) {
126
0
      if (error) {
127
0
        *error = absl::StrCat("Structurally invalid UTF8 string: ",
128
0
                              EscapeBytes(source));
129
0
      }
130
0
      return false;
131
0
    }
132
392k
  }
133
134
394k
  dest->reserve(source.size());
135
136
394k
  const char* p = source.data();
137
394k
  const char* end = p + source.size();
138
394k
  const char* last_byte = end - 1;
139
140
3.61M
  while (p < end) {
141
3.22M
    if (*p != '\\') {
142
3.20M
      if (*p != '\r') {
143
3.20M
        dest->push_back(*p++);
144
3.20M
      } else {
145
        // All types of newlines in different platforms i.e. '\r', '\n', '\r\n'
146
        // are replaced with '\n'.
147
684
        dest->push_back('\n');
148
684
        p++;
149
684
        if (p < end && *p == '\n') {
150
221
          p++;
151
221
        }
152
684
      }
153
3.20M
    } else {
154
14.2k
      if ((p + 1) > last_byte) {
155
0
        if (error) {
156
0
          *error = is_raw_literal
157
0
                       ? "Raw literals cannot end with odd number of \\"
158
0
                   : is_bytes_literal ? "Bytes literal cannot end with \\"
159
0
                                      : "String literal cannot end with \\";
160
0
        }
161
0
        return false;
162
0
      }
163
14.2k
      if (is_raw_literal) {
164
        // For raw literals, all escapes are valid and those characters ('\\'
165
        // and the escaped character) come through literally in the string.
166
1.57k
        dest->push_back(*p++);
167
1.57k
        dest->push_back(*p++);
168
1.57k
        continue;
169
1.57k
      }
170
      // Any error that occurs in the escape is accounted to the start of
171
      // the escape.
172
12.6k
      p++;  // Read past the escape character.
173
174
12.6k
      switch (*p) {
175
783
        case 'a':
176
783
          dest->push_back('\a');
177
783
          break;
178
388
        case 'b':
179
388
          dest->push_back('\b');
180
388
          break;
181
362
        case 'f':
182
362
          dest->push_back('\f');
183
362
          break;
184
197
        case 'n':
185
197
          dest->push_back('\n');
186
197
          break;
187
202
        case 'r':
188
202
          dest->push_back('\r');
189
202
          break;
190
542
        case 't':
191
542
          dest->push_back('\t');
192
542
          break;
193
253
        case 'v':
194
253
          dest->push_back('\v');
195
253
          break;
196
3.16k
        case '\\':
197
3.16k
          dest->push_back('\\');
198
3.16k
          break;
199
288
        case '?':
200
288
          dest->push_back('\?');
201
288
          break;  // \?  Who knew?
202
354
        case '\'':
203
354
          dest->push_back('\'');
204
354
          break;
205
691
        case '"':
206
691
          dest->push_back('\"');
207
691
          break;
208
606
        case '`':
209
606
          dest->push_back('`');
210
606
          break;
211
583
        case '0':
212
583
          ABSL_FALLTHROUGH_INTENDED;
213
783
        case '1':
214
783
          ABSL_FALLTHROUGH_INTENDED;
215
1.24k
        case '2':
216
1.24k
          ABSL_FALLTHROUGH_INTENDED;
217
1.53k
        case '3': {
218
          // Octal escape '\ddd': requires exactly 3 octal digits.  Note that
219
          // the highest valid escape sequence is '\377'.
220
          // For string literals, octal and hex escape sequences are interpreted
221
          // as unicode code points, and the related UTF8-encoded character is
222
          // added to the destination.  For bytes literals, octal and hex
223
          // escape sequences are interpreted as a single byte value.
224
1.53k
          const char* octal_start = p;
225
1.53k
          if (p + 2 >= end) {
226
0
            if (error) {
227
0
              *error =
228
0
                  "Illegal escape sequence: Octal escape must be followed by 3 "
229
0
                  "octal digits but saw: \\" +
230
0
                  std::string(octal_start, end - p);
231
0
            }
232
            // Error offset was set to the start of the escape above the switch.
233
0
            return false;
234
0
          }
235
1.53k
          const char* octal_end = p + 2;
236
1.53k
          char32_t ch = 0;
237
6.13k
          for (; p <= octal_end; ++p) {
238
4.59k
            if (IsOctalDigit(*p)) {
239
4.59k
              ch = ch * 8 + *p - '0';
240
4.59k
            } else {
241
0
              if (error) {
242
0
                *error =
243
0
                    "Illegal escape sequence: Octal escape must be followed by "
244
0
                    "3 octal digits but saw: \\" +
245
0
                    std::string(octal_start, 3);
246
0
              }
247
              // Error offset was set to the start of the escape above the
248
              // switch.
249
0
              return false;
250
0
            }
251
4.59k
          }
252
1.53k
          p = octal_end;  // p points at last digit.
253
1.53k
          if (is_bytes_literal) {
254
439
            dest->push_back(static_cast<char>(ch));
255
1.09k
          } else {
256
1.09k
            Utf8Encode(*dest, ch);
257
1.09k
          }
258
1.53k
          break;
259
1.53k
        }
260
218
        case 'x':
261
218
          ABSL_FALLTHROUGH_INTENDED;
262
914
        case 'X': {
263
          // Hex escape '\xhh': requires exactly 2 hex digits.
264
          // For string literals, octal and hex escape sequences are
265
          // interpreted as unicode code points, and the related UTF8-encoded
266
          // character is added to the destination.  For bytes literals, octal
267
          // and hex escape sequences are interpreted as a single byte value.
268
914
          const char* hex_start = p;
269
914
          if (p + 2 >= end) {
270
0
            if (error) {
271
0
              *error =
272
0
                  "Illegal escape sequence: Hex escape must be followed by 2 "
273
0
                  "hex digits but saw: \\" +
274
0
                  std::string(hex_start, end - p);
275
0
            }
276
            // Error offset was set to the start of the escape above the switch.
277
0
            return false;
278
0
          }
279
914
          char32_t ch = 0;
280
914
          const char* hex_end = p + 2;
281
2.74k
          for (++p; p <= hex_end; ++p) {
282
1.82k
            if (absl::ascii_isxdigit(*p)) {
283
1.82k
              ch = (ch << 4) + HexDigitToInt(*p);
284
1.82k
            } else {
285
0
              if (error) {
286
0
                *error =
287
0
                    "Illegal escape sequence: Hex escape must be followed by 2 "
288
0
                    "hex digits but saw: \\" +
289
0
                    std::string(hex_start, 3);
290
0
              }
291
              // Error offset was set to the start of the escape above the
292
              // switch.
293
0
              return false;
294
0
            }
295
1.82k
          }
296
914
          p = hex_end;  // p points at last digit.
297
914
          if (is_bytes_literal) {
298
288
            dest->push_back(static_cast<char>(ch));
299
626
          } else {
300
626
            Utf8Encode(*dest, ch);
301
626
          }
302
914
          break;
303
914
        }
304
1.46k
        case 'u': {
305
1.46k
          if (is_bytes_literal) {
306
495
            if (error) {
307
495
              *error =
308
495
                  std::string(
309
495
                      "Illegal escape sequence: Unicode escape sequence \\") +
310
495
                  *p + " cannot be used in bytes literals";
311
495
            }
312
            // Error offset was set to the start of the escape above the switch.
313
495
            return false;
314
495
          }
315
          // \uhhhh => Read 4 hex digits as a code point,
316
          //           then write it as UTF-8 bytes.
317
967
          char32_t cp = 0;
318
967
          const char* hex_start = p;
319
967
          if (p + 4 >= end) {
320
0
            if (error) {
321
0
              *error =
322
0
                  "Illegal escape sequence: \\u must be followed by 4 hex "
323
0
                  "digits but saw: \\" +
324
0
                  std::string(hex_start, end - p);
325
0
            }
326
            // Error offset was set to the start of the escape above the switch.
327
0
            return false;
328
0
          }
329
4.83k
          for (int i = 0; i < 4; ++i) {
330
            // Look one char ahead.
331
3.86k
            if (absl::ascii_isxdigit(p[1])) {
332
3.86k
              cp = (cp << 4) + HexDigitToInt(*++p);  // Advance p.
333
3.86k
            } else {
334
0
              if (error) {
335
0
                *error =
336
0
                    "Illegal escape sequence: \\u must be followed by 4 "
337
0
                    "hex digits but saw: \\" +
338
0
                    std::string(hex_start, 5);
339
0
              }
340
              // Error offset was set to the start of the escape above the
341
              // switch.
342
0
              return false;
343
0
            }
344
3.86k
          }
345
967
          if (!UnicodeIsValid(cp)) {
346
170
            if (error) {
347
170
              *error = "Illegal escape sequence: Unicode value \\" +
348
170
                       std::string(hex_start, 5) + " is invalid";
349
170
            }
350
            // Error offset was set to the start of the escape above the switch.
351
170
            return false;
352
170
          }
353
797
          Utf8Encode(*dest, cp);
354
797
          break;
355
967
        }
356
939
        case 'U': {
357
939
          if (is_bytes_literal) {
358
229
            if (error) {
359
229
              *error =
360
229
                  std::string(
361
229
                      "Illegal escape sequence: Unicode escape sequence \\") +
362
229
                  *p + " cannot be used in bytes literals";
363
229
            }
364
229
            return false;
365
229
          }
366
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8.  Note that the
367
          // first two digits must be 00: The valid range is
368
          // '\U00000000' to '\U0010FFFF' (excluding surrogates).
369
710
          char32_t cp = 0;
370
710
          const char* hex_start = p;
371
710
          if (p + 8 >= end) {
372
0
            if (error) {
373
0
              *error =
374
0
                  "Illegal escape sequence: \\U must be followed by 8 hex "
375
0
                  "digits but saw: \\" +
376
0
                  std::string(hex_start, end - p);
377
0
            }
378
            // Error offset was set to the start of the escape above the switch.
379
0
            return false;
380
0
          }
381
5.97k
          for (int i = 0; i < 8; ++i) {
382
            // Look one char ahead.
383
5.48k
            if (absl::ascii_isxdigit(p[1])) {
384
5.48k
              cp = (cp << 4) + HexDigitToInt(*++p);
385
5.48k
              if (cp > 0x10FFFF) {
386
216
                if (error) {
387
216
                  *error = "Illegal escape sequence: Value of \\" +
388
216
                           std::string(hex_start, 9) +
389
216
                           " exceeds Unicode limit (0x0010FFFF)";
390
216
                }
391
                // Error offset was set to the start of the escape above the
392
                // switch.
393
216
                return false;
394
216
              }
395
5.48k
            } else {
396
0
              if (error) {
397
0
                *error =
398
0
                    "Illegal escape sequence: \\U must be followed by 8 "
399
0
                    "hex digits but saw: \\" +
400
0
                    std::string(hex_start, 9);
401
0
              }
402
              // Error offset was set to the start of the escape above the
403
              // switch.
404
0
              return false;
405
0
            }
406
5.48k
          }
407
494
          if (!UnicodeIsValid(cp)) {
408
99
            if (error) {
409
99
              *error = "Illegal escape sequence: Unicode value \\" +
410
99
                       std::string(hex_start, 9) + " is invalid";
411
99
            }
412
            // Error offset was set to the start of the escape above the switch.
413
99
            return false;
414
99
          }
415
395
          Utf8Encode(*dest, cp);
416
395
          break;
417
494
        }
418
0
        case '\r':
419
0
          ABSL_FALLTHROUGH_INTENDED;
420
0
        case '\n': {
421
0
          if (error) {
422
0
            *error = "Illegal escaped newline";
423
0
          }
424
          // Error offset was set to the start of the escape above the switch.
425
0
          return false;
426
0
        }
427
0
        default: {
428
0
          if (error) {
429
0
            *error = std::string("Illegal escape sequence: \\") + *p;
430
0
          }
431
          // Error offset was set to the start of the escape above the switch.
432
0
          return false;
433
0
        }
434
12.6k
      }
435
11.4k
      p++;  // read past letter we escaped
436
11.4k
    }
437
3.22M
  }
438
439
393k
  dest->shrink_to_fit();
440
441
393k
  return true;
442
394k
}
443
444
std::string EscapeInternal(absl::string_view src, bool escape_all_bytes,
445
0
                           char escape_quote_char) {
446
0
  std::string dest;
447
  // Worst case size is every byte has to be hex escaped, so 4 char for every
448
  // byte.
449
0
  dest.reserve(src.size() * 4);
450
0
  bool last_hex_escape = false;  // true if last output char was \xNN.
451
0
  const char* p = src.data();
452
0
  const char* end = p + src.size();
453
0
  for (; p < end; ++p) {
454
0
    unsigned char c = static_cast<unsigned char>(*p);
455
0
    bool is_hex_escape = false;
456
0
    switch (c) {
457
0
      case '\n':
458
0
        dest.append("\\n");
459
0
        break;
460
0
      case '\r':
461
0
        dest.append("\\r");
462
0
        break;
463
0
      case '\t':
464
0
        dest.append("\\t");
465
0
        break;
466
0
      case '\\':
467
0
        dest.append("\\\\");
468
0
        break;
469
0
      case '\'':
470
0
        ABSL_FALLTHROUGH_INTENDED;
471
0
      case '\"':
472
0
        ABSL_FALLTHROUGH_INTENDED;
473
0
      case '`':
474
        // Escape only quote chars that match escape_quote_char.
475
0
        if (escape_quote_char == 0 || c == escape_quote_char) {
476
0
          dest.push_back('\\');
477
0
        }
478
0
        dest.push_back(c);
479
0
        break;
480
0
      default:
481
        // Note that if we emit \xNN and the src character after that is a hex
482
        // digit then that digit must be escaped too to prevent it being
483
        // interpreted as part of the character code by C.
484
0
        if ((!escape_all_bytes || c < 0x80) &&
485
0
            (!absl::ascii_isprint(c) ||
486
0
             (last_hex_escape && absl::ascii_isxdigit(c)))) {
487
0
          dest.append("\\x");
488
0
          dest.push_back(kHexTable[c / 16]);
489
0
          dest.push_back(kHexTable[c % 16]);
490
0
          is_hex_escape = true;
491
0
        } else {
492
0
          dest.push_back(c);
493
0
          break;
494
0
        }
495
0
    }
496
0
    last_hex_escape = is_hex_escape;
497
0
  }
498
0
  dest.shrink_to_fit();
499
0
  return dest;
500
0
}
501
502
395k
bool MayBeTripleQuotedString(absl::string_view str) {
503
395k
  return (str.size() >= 6 &&
504
3.91k
          ((absl::StartsWith(str, "\"\"\"") && absl::EndsWith(str, "\"\"\"")) ||
505
3.75k
           (absl::StartsWith(str, "'''") && absl::EndsWith(str, "'''"))));
506
395k
}
507
508
392k
bool MayBeStringLiteral(absl::string_view str) {
509
392k
  return (str.size() >= 2 && str[0] == str[str.size() - 1] &&
510
392k
          (str[0] == '\'' || str[0] == '"'));
511
392k
}
512
513
2.35k
bool MayBeBytesLiteral(absl::string_view str) {
514
2.35k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "b") &&
515
2.35k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
516
2.35k
}
517
518
392k
bool MayBeRawStringLiteral(absl::string_view str) {
519
392k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "r") &&
520
628
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
521
392k
}
522
523
2.35k
bool MayBeRawBytesLiteral(absl::string_view str) {
524
2.35k
  return (str.size() >= 4 &&
525
1.98k
          (absl::StartsWithIgnoreCase(str, "rb") ||
526
1.98k
           absl::StartsWithIgnoreCase(str, "br")) &&
527
651
          (str[2] == str[str.size() - 1]) && (str[2] == '\'' || str[2] == '"'));
528
2.35k
}
529
530
}  // namespace
531
532
0
absl::StatusOr<std::string> UnescapeString(absl::string_view str) {
533
0
  std::string out;
534
0
  std::string error;
535
0
  if (!UnescapeInternal(str, "", false, false, &out, &error)) {
536
0
    return absl::InvalidArgumentError(
537
0
        absl::StrCat("Invalid escaped string: ", error));
538
0
  }
539
0
  return out;
540
0
}
541
542
0
absl::StatusOr<std::string> UnescapeBytes(absl::string_view str) {
543
0
  std::string out;
544
0
  std::string error;
545
0
  if (!UnescapeInternal(str, "", false, true, &out, &error)) {
546
0
    return absl::InvalidArgumentError(
547
0
        absl::StrCat("Invalid escaped bytes: ", error));
548
0
  }
549
0
  return out;
550
0
}
551
552
0
std::string EscapeString(absl::string_view str) {
553
0
  return EscapeInternal(str, true, '\0');
554
0
}
555
556
std::string EscapeBytes(absl::string_view str, bool escape_all_bytes,
557
0
                        char escape_quote_char) {
558
0
  std::string escaped_bytes;
559
0
  const char* p = str.data();
560
0
  const char* end = p + str.size();
561
0
  for (; p < end; ++p) {
562
0
    unsigned char c = *p;
563
0
    if (escape_all_bytes || !absl::ascii_isprint(c)) {
564
0
      escaped_bytes += "\\x";
565
0
      escaped_bytes += absl::BytesToHexString(absl::string_view(p, 1));
566
0
    } else {
567
0
      switch (c) {
568
        // Note that we only handle printable escape characters here.  All
569
        // unprintable (\n, \r, \t, etc.) are hex escaped above.
570
0
        case '\\':
571
0
          escaped_bytes += "\\\\";
572
0
          break;
573
0
        case '\'':
574
0
        case '"':
575
0
        case '`':
576
          // Escape only quote chars that match escape_quote_char.
577
0
          if (escape_quote_char == 0 || c == escape_quote_char) {
578
0
            escaped_bytes += '\\';
579
0
          }
580
0
          escaped_bytes += c;
581
0
          break;
582
0
        default:
583
0
          escaped_bytes += c;
584
0
          break;
585
0
      }
586
0
    }
587
0
  }
588
0
  return escaped_bytes;
589
0
}
590
591
392k
absl::StatusOr<std::string> ParseStringLiteral(absl::string_view str) {
592
392k
  std::string out;
593
392k
  bool is_string_literal = MayBeStringLiteral(str);
594
392k
  bool is_raw_string_literal = MayBeRawStringLiteral(str);
595
392k
  if (!is_string_literal && !is_raw_string_literal) {
596
0
    return absl::InvalidArgumentError("Invalid string literal");
597
0
  }
598
599
392k
  absl::string_view copy_str = str;
600
392k
  if (is_raw_string_literal) {
601
    // Strip off the prefix 'r' from the raw string content before parsing.
602
628
    copy_str = absl::ClippedSubstr(copy_str, 1);
603
628
  }
604
605
392k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
606
  // Starts after the opening quotes {""", '''} or {", '}.
607
392k
  int quotes_length = is_triple_quoted ? 3 : 1;
608
392k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
609
392k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
610
392k
  std::string error;
611
392k
  if (!UnescapeInternal(copy_str, quotes, is_raw_string_literal, false, &out,
612
392k
                        &error)) {
613
987
    return absl::InvalidArgumentError(
614
987
        absl::StrCat("Invalid string literal: ", error));
615
987
  }
616
391k
  return out;
617
392k
}
618
619
2.35k
absl::StatusOr<std::string> ParseBytesLiteral(absl::string_view str) {
620
2.35k
  std::string out;
621
2.35k
  bool is_bytes_literal = MayBeBytesLiteral(str);
622
2.35k
  bool is_raw_bytes_literal = MayBeRawBytesLiteral(str);
623
2.35k
  if (!is_bytes_literal && !is_raw_bytes_literal) {
624
0
    return absl::InvalidArgumentError("Invalid bytes literal");
625
0
  }
626
627
2.35k
  absl::string_view copy_str = str;
628
2.35k
  if (is_raw_bytes_literal) {
629
    // Strip off the prefix {"rb", "br"} from the raw bytes content before
630
651
    copy_str = absl::ClippedSubstr(copy_str, 2);
631
1.70k
  } else {
632
    // Strip off the prefix 'b' from the bytes content before parsing.
633
1.70k
    copy_str = absl::ClippedSubstr(copy_str, 1);
634
1.70k
  }
635
636
2.35k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
637
  // Starts after the opening quotes {""", '''} or {", '}.
638
2.35k
  int quotes_length = is_triple_quoted ? 3 : 1;
639
2.35k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
640
  // Includes the closing quotes.
641
2.35k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
642
2.35k
  std::string error;
643
2.35k
  if (!UnescapeInternal(copy_str, quotes, is_raw_bytes_literal, true, &out,
644
2.35k
                        &error)) {
645
1.00k
    return absl::InvalidArgumentError(
646
1.00k
        absl::StrCat("Invalid bytes literal: ", error));
647
1.00k
  }
648
1.35k
  return out;
649
2.35k
}
650
651
0
std::string FormatStringLiteral(absl::string_view str) {
652
0
  absl::string_view quote =
653
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
654
0
  return absl::StrCat(quote, EscapeInternal(str, true, quote[0]), quote);
655
0
}
656
657
0
std::string FormatStringLiteral(const absl::Cord& str) {
658
0
  if (auto flat = str.TryFlat(); flat) {
659
0
    return FormatStringLiteral(*flat);
660
0
  }
661
0
  return FormatStringLiteral(static_cast<std::string>(str));
662
0
}
663
664
0
std::string FormatSingleQuotedStringLiteral(absl::string_view str) {
665
0
  return absl::StrCat("'", EscapeInternal(str, true, '\''), "'");
666
0
}
667
668
0
std::string FormatDoubleQuotedStringLiteral(absl::string_view str) {
669
0
  return absl::StrCat("\"", EscapeInternal(str, true, '"'), "\"");
670
0
}
671
672
0
std::string FormatBytesLiteral(absl::string_view str) {
673
0
  absl::string_view quote =
674
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
675
0
  return absl::StrCat("b", quote, EscapeBytes(str, false, quote[0]), quote);
676
0
}
677
678
0
std::string FormatSingleQuotedBytesLiteral(absl::string_view str) {
679
0
  return absl::StrCat("b'", EscapeBytes(str, false, '\''), "'");
680
0
}
681
682
0
std::string FormatDoubleQuotedBytesLiteral(absl::string_view str) {
683
0
  return absl::StrCat("b\"", EscapeBytes(str, false, '"'), "\"");
684
0
}
685
686
0
absl::StatusOr<std::string> ParseIdentifier(absl::string_view str) {
687
0
  if (!LexisIsIdentifier(str)) {
688
0
    return absl::InvalidArgumentError("Invalid identifier");
689
0
  }
690
0
  return std::string(str);
691
0
}
692
693
}  // namespace cel::internal