Coverage Report

Created: 2023-05-25 06:18

/proc/self/cwd/internal/strings.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "internal/strings.h"
16
17
#include <string>
18
19
#include "absl/base/attributes.h"
20
#include "absl/status/status.h"
21
#include "absl/strings/ascii.h"
22
#include "absl/strings/escaping.h"
23
#include "absl/strings/match.h"
24
#include "absl/strings/str_cat.h"
25
#include "internal/lexis.h"
26
#include "internal/unicode.h"
27
#include "internal/utf8.h"
28
29
namespace cel::internal {
30
31
namespace {
32
33
constexpr char kHexTable[] = "0123456789abcdef";
34
35
34.8k
constexpr int HexDigitToInt(char x) {
36
34.8k
  if (x > '9') {
37
9.75k
    x += 9;
38
9.75k
  }
39
34.8k
  return x & 0xf;
40
34.8k
}
41
42
7.56k
constexpr bool IsOctalDigit(char x) { return x >= '0' && x <= '7'; }
43
44
// Returns true when following conditions are met:
45
// - <closing_str> is a suffix of <source>.
46
// - No other unescaped occurrence of <closing_str> inside <source> (apart from
47
//   being a suffix).
48
// Returns false otherwise. If <error> is non-NULL, returns an error message in
49
// <error>. If <error_offset> is non-NULL, returns the offset in <source> that
50
// corresponds to the location of the error.
51
bool CheckForClosingString(absl::string_view source,
52
501k
                           absl::string_view closing_str, std::string* error) {
53
501k
  if (closing_str.empty()) return true;
54
55
501k
  const char* p = source.data();
56
501k
  const char* end = source.end();
57
58
501k
  bool is_closed = false;
59
6.79M
  while (p + closing_str.length() <= end) {
60
6.28M
    if (*p != '\\') {
61
6.23M
      size_t cur_pos = p - source.begin();
62
6.23M
      bool is_closing =
63
6.23M
          absl::StartsWith(absl::ClippedSubstr(source, cur_pos), closing_str);
64
6.23M
      if (is_closing && p + closing_str.length() < end) {
65
0
        if (error) {
66
0
          *error =
67
0
              absl::StrCat("String cannot contain unescaped ", closing_str);
68
0
        }
69
0
        return false;
70
0
      }
71
6.23M
      is_closed = is_closing && (p + closing_str.length() == end);
72
6.23M
    } else {
73
50.0k
      p++;  // Read past the escaped character.
74
50.0k
    }
75
6.28M
    p++;
76
6.28M
  }
77
78
501k
  if (!is_closed) {
79
2.20k
    if (error) {
80
2.20k
      *error = absl::StrCat("String must end with ", closing_str);
81
2.20k
    }
82
2.20k
    return false;
83
2.20k
  }
84
85
499k
  return true;
86
501k
}
87
88
// ----------------------------------------------------------------------
89
// CUnescapeInternal()
90
//    Unescapes C escape sequences and is the reverse of CEscape().
91
//
92
//    If 'source' is valid, stores the unescaped string and its size in
93
//    'dest' and 'dest_len' respectively, and returns true. Otherwise
94
//    returns false and optionally stores the error description in
95
//    'error' and the error offset in 'error_offset'. If 'error' is
96
//    nonempty on return, 'error_offset' is in range [0, str.size()].
97
//    Set 'error' and 'error_offset' to NULL to disable error reporting.
98
//
99
//    'dest' must point to a buffer that is at least as big as 'source'.  The
100
//    unescaped string cannot grow bigger than the source string since no
101
//    unescaped sequence is longer than the corresponding escape sequence.
102
//    'source' and 'dest' must not be the same.
103
//
104
// If <closing_str> is non-empty, for <source> to be valid:
105
// - It must end with <closing_str>.
106
// - Should not contain any other unescaped occurrence of <closing_str>.
107
// ----------------------------------------------------------------------
108
bool UnescapeInternal(absl::string_view source, absl::string_view closing_str,
109
                      bool is_raw_literal, bool is_bytes_literal,
110
501k
                      std::string* dest, std::string* error) {
111
501k
  if (!CheckForClosingString(source, closing_str, error)) {
112
2.20k
    return false;
113
2.20k
  }
114
115
499k
  if (ABSL_PREDICT_FALSE(source.empty())) {
116
0
    *dest = std::string();
117
0
    return true;
118
0
  }
119
120
  // Strip off the closing_str from the end before unescaping.
121
499k
  source = source.substr(0, source.size() - closing_str.size());
122
499k
  if (!is_bytes_literal) {
123
495k
    if (!Utf8IsValid(source)) {
124
0
      if (error) {
125
0
        *error = absl::StrCat("Structurally invalid UTF8 string: ",
126
0
                              EscapeBytes(source));
127
0
      }
128
0
      return false;
129
0
    }
130
495k
  }
131
132
499k
  dest->reserve(source.size());
133
134
499k
  const char* p = source.data();
135
499k
  const char* end = source.end();
136
499k
  const char* last_byte = end - 1;
137
138
5.82M
  while (p < end) {
139
5.32M
    if (*p != '\\') {
140
5.28M
      if (*p != '\r') {
141
5.27M
        dest->push_back(*p++);
142
5.27M
      } else {
143
        // All types of newlines in different platforms i.e. '\r', '\n', '\r\n'
144
        // are replaced with '\n'.
145
3.70k
        dest->push_back('\n');
146
3.70k
        p++;
147
3.70k
        if (p < end && *p == '\n') {
148
573
          p++;
149
573
        }
150
3.70k
      }
151
5.28M
    } else {
152
47.6k
      if ((p + 1) > last_byte) {
153
0
        if (error) {
154
0
          *error = is_raw_literal
155
0
                       ? "Raw literals cannot end with odd number of \\"
156
0
                   : is_bytes_literal ? "Bytes literal cannot end with \\"
157
0
                                      : "String literal cannot end with \\";
158
0
        }
159
0
        return false;
160
0
      }
161
47.6k
      if (is_raw_literal) {
162
        // For raw literals, all escapes are valid and those characters ('\\'
163
        // and the escaped character) come through literally in the string.
164
17.9k
        dest->push_back(*p++);
165
17.9k
        dest->push_back(*p++);
166
17.9k
        continue;
167
17.9k
      }
168
      // Any error that occurs in the escape is accounted to the start of
169
      // the escape.
170
29.7k
      p++;  // Read past the escape character.
171
172
29.7k
      switch (*p) {
173
1.98k
        case 'a':
174
1.98k
          dest->push_back('\a');
175
1.98k
          break;
176
1.51k
        case 'b':
177
1.51k
          dest->push_back('\b');
178
1.51k
          break;
179
2.44k
        case 'f':
180
2.44k
          dest->push_back('\f');
181
2.44k
          break;
182
1.36k
        case 'n':
183
1.36k
          dest->push_back('\n');
184
1.36k
          break;
185
275
        case 'r':
186
275
          dest->push_back('\r');
187
275
          break;
188
4.34k
        case 't':
189
4.34k
          dest->push_back('\t');
190
4.34k
          break;
191
360
        case 'v':
192
360
          dest->push_back('\v');
193
360
          break;
194
1.38k
        case '\\':
195
1.38k
          dest->push_back('\\');
196
1.38k
          break;
197
283
        case '?':
198
283
          dest->push_back('\?');
199
283
          break;  // \?  Who knew?
200
705
        case '\'':
201
705
          dest->push_back('\'');
202
705
          break;
203
964
        case '"':
204
964
          dest->push_back('\"');
205
964
          break;
206
287
        case '`':
207
287
          dest->push_back('`');
208
287
          break;
209
607
        case '0':
210
607
          ABSL_FALLTHROUGH_INTENDED;
211
1.60k
        case '1':
212
1.60k
          ABSL_FALLTHROUGH_INTENDED;
213
2.11k
        case '2':
214
2.11k
          ABSL_FALLTHROUGH_INTENDED;
215
2.52k
        case '3': {
216
          // Octal escape '\ddd': requires exactly 3 octal digits.  Note that
217
          // the highest valid escape sequence is '\377'.
218
          // For string literals, octal and hex escape sequences are interpreted
219
          // as unicode code points, and the related UTF8-encoded character is
220
          // added to the destination.  For bytes literals, octal and hex
221
          // escape sequences are interpreted as a single byte value.
222
2.52k
          const char* octal_start = p;
223
2.52k
          if (p + 2 >= end) {
224
0
            if (error) {
225
0
              *error =
226
0
                  "Illegal escape sequence: Octal escape must be followed by 3 "
227
0
                  "octal digits but saw: \\" +
228
0
                  std::string(octal_start, end - p);
229
0
            }
230
            // Error offset was set to the start of the escape above the switch.
231
0
            return false;
232
0
          }
233
2.52k
          const char* octal_end = p + 2;
234
2.52k
          char32_t ch = 0;
235
10.0k
          for (; p <= octal_end; ++p) {
236
7.56k
            if (IsOctalDigit(*p)) {
237
7.56k
              ch = ch * 8 + *p - '0';
238
7.56k
            } else {
239
0
              if (error) {
240
0
                *error =
241
0
                    "Illegal escape sequence: Octal escape must be followed by "
242
0
                    "3 octal digits but saw: \\" +
243
0
                    std::string(octal_start, 3);
244
0
              }
245
              // Error offset was set to the start of the escape above the
246
              // switch.
247
0
              return false;
248
0
            }
249
7.56k
          }
250
2.52k
          p = octal_end;  // p points at last digit.
251
2.52k
          if (is_bytes_literal) {
252
836
            dest->push_back(static_cast<char>(ch));
253
1.68k
          } else {
254
1.68k
            Utf8Encode(dest, ch);
255
1.68k
          }
256
2.52k
          break;
257
2.52k
        }
258
5.67k
        case 'x':
259
5.67k
          ABSL_FALLTHROUGH_INTENDED;
260
6.36k
        case 'X': {
261
          // Hex escape '\xhh': requires exactly 2 hex digits.
262
          // For string literals, octal and hex escape sequences are
263
          // interpreted as unicode code points, and the related UTF8-encoded
264
          // character is added to the destination.  For bytes literals, octal
265
          // and hex escape sequences are interpreted as a single byte value.
266
6.36k
          const char* hex_start = p;
267
6.36k
          if (p + 2 >= end) {
268
0
            if (error) {
269
0
              *error =
270
0
                  "Illegal escape sequence: Hex escape must be followed by 2 "
271
0
                  "hex digits but saw: \\" +
272
0
                  std::string(hex_start, end - p);
273
0
            }
274
            // Error offset was set to the start of the escape above the switch.
275
0
            return false;
276
0
          }
277
6.36k
          char32_t ch = 0;
278
6.36k
          const char* hex_end = p + 2;
279
19.0k
          for (++p; p <= hex_end; ++p) {
280
12.7k
            if (absl::ascii_isxdigit(*p)) {
281
12.7k
              ch = (ch << 4) + HexDigitToInt(*p);
282
12.7k
            } else {
283
0
              if (error) {
284
0
                *error =
285
0
                    "Illegal escape sequence: Hex escape must be followed by 2 "
286
0
                    "hex digits but saw: \\" +
287
0
                    std::string(hex_start, 3);
288
0
              }
289
              // Error offset was set to the start of the escape above the
290
              // switch.
291
0
              return false;
292
0
            }
293
12.7k
          }
294
6.36k
          p = hex_end;  // p points at last digit.
295
6.36k
          if (is_bytes_literal) {
296
945
            dest->push_back(static_cast<char>(ch));
297
5.41k
          } else {
298
5.41k
            Utf8Encode(dest, ch);
299
5.41k
          }
300
6.36k
          break;
301
6.36k
        }
302
2.32k
        case 'u': {
303
2.32k
          if (is_bytes_literal) {
304
867
            if (error) {
305
867
              *error =
306
867
                  std::string(
307
867
                      "Illegal escape sequence: Unicode escape sequence \\") +
308
867
                  *p + " cannot be used in bytes literals";
309
867
            }
310
            // Error offset was set to the start of the escape above the switch.
311
867
            return false;
312
867
          }
313
          // \uhhhh => Read 4 hex digits as a code point,
314
          //           then write it as UTF-8 bytes.
315
1.45k
          char32_t cp = 0;
316
1.45k
          const char* hex_start = p;
317
1.45k
          if (p + 4 >= end) {
318
0
            if (error) {
319
0
              *error =
320
0
                  "Illegal escape sequence: \\u must be followed by 4 hex "
321
0
                  "digits but saw: \\" +
322
0
                  std::string(hex_start, end - p);
323
0
            }
324
            // Error offset was set to the start of the escape above the switch.
325
0
            return false;
326
0
          }
327
7.29k
          for (int i = 0; i < 4; ++i) {
328
            // Look one char ahead.
329
5.83k
            if (absl::ascii_isxdigit(p[1])) {
330
5.83k
              cp = (cp << 4) + HexDigitToInt(*++p);  // Advance p.
331
5.83k
            } else {
332
0
              if (error) {
333
0
                *error =
334
0
                    "Illegal escape sequence: \\u must be followed by 4 "
335
0
                    "hex digits but saw: \\" +
336
0
                    std::string(hex_start, 5);
337
0
              }
338
              // Error offset was set to the start of the escape above the
339
              // switch.
340
0
              return false;
341
0
            }
342
5.83k
          }
343
1.45k
          if (!UnicodeIsValid(cp)) {
344
621
            if (error) {
345
621
              *error = "Illegal escape sequence: Unicode value \\" +
346
621
                       std::string(hex_start, 5) + " is invalid";
347
621
            }
348
            // Error offset was set to the start of the escape above the switch.
349
621
            return false;
350
621
          }
351
837
          Utf8Encode(dest, cp);
352
837
          break;
353
1.45k
        }
354
2.59k
        case 'U': {
355
2.59k
          if (is_bytes_literal) {
356
355
            if (error) {
357
355
              *error =
358
355
                  std::string(
359
355
                      "Illegal escape sequence: Unicode escape sequence \\") +
360
355
                  *p + " cannot be used in bytes literals";
361
355
            }
362
355
            return false;
363
355
          }
364
          // \Uhhhhhhhh => convert 8 hex digits to UTF-8.  Note that the
365
          // first two digits must be 00: The valid range is
366
          // '\U00000000' to '\U0010FFFF' (excluding surrogates).
367
2.23k
          char32_t cp = 0;
368
2.23k
          const char* hex_start = p;
369
2.23k
          if (p + 8 >= end) {
370
0
            if (error) {
371
0
              *error =
372
0
                  "Illegal escape sequence: \\U must be followed by 8 hex "
373
0
                  "digits but saw: \\" +
374
0
                  std::string(hex_start, end - p);
375
0
            }
376
            // Error offset was set to the start of the escape above the switch.
377
0
            return false;
378
0
          }
379
17.7k
          for (int i = 0; i < 8; ++i) {
380
            // Look one char ahead.
381
16.2k
            if (absl::ascii_isxdigit(p[1])) {
382
16.2k
              cp = (cp << 4) + HexDigitToInt(*++p);
383
16.2k
              if (cp > 0x10FFFF) {
384
809
                if (error) {
385
809
                  *error = "Illegal escape sequence: Value of \\" +
386
809
                           std::string(hex_start, 9) +
387
809
                           " exceeds Unicode limit (0x0010FFFF)";
388
809
                }
389
                // Error offset was set to the start of the escape above the
390
                // switch.
391
809
                return false;
392
809
              }
393
16.2k
            } else {
394
0
              if (error) {
395
0
                *error =
396
0
                    "Illegal escape sequence: \\U must be followed by 8 "
397
0
                    "hex digits but saw: \\" +
398
0
                    std::string(hex_start, 9);
399
0
              }
400
              // Error offset was set to the start of the escape above the
401
              // switch.
402
0
              return false;
403
0
            }
404
16.2k
          }
405
1.42k
          if (!UnicodeIsValid(cp)) {
406
500
            if (error) {
407
500
              *error = "Illegal escape sequence: Unicode value \\" +
408
500
                       std::string(hex_start, 9) + " is invalid";
409
500
            }
410
            // Error offset was set to the start of the escape above the switch.
411
500
            return false;
412
500
          }
413
927
          Utf8Encode(dest, cp);
414
927
          break;
415
1.42k
        }
416
0
        case '\r':
417
0
          ABSL_FALLTHROUGH_INTENDED;
418
0
        case '\n': {
419
0
          if (error) {
420
0
            *error = "Illegal escaped newline";
421
0
          }
422
          // Error offset was set to the start of the escape above the switch.
423
0
          return false;
424
0
        }
425
0
        default: {
426
0
          if (error) {
427
0
            *error = std::string("Illegal escape sequence: \\") + *p;
428
0
          }
429
          // Error offset was set to the start of the escape above the switch.
430
0
          return false;
431
0
        }
432
29.7k
      }
433
26.5k
      p++;  // read past letter we escaped
434
26.5k
    }
435
5.32M
  }
436
437
496k
  dest->shrink_to_fit();
438
439
496k
  return true;
440
499k
}
441
442
std::string EscapeInternal(absl::string_view src, bool escape_all_bytes,
443
0
                           char escape_quote_char) {
444
0
  std::string dest;
445
  // Worst case size is every byte has to be hex escaped, so 4 char for every
446
  // byte.
447
0
  dest.reserve(src.size() * 4);
448
0
  bool last_hex_escape = false;  // true if last output char was \xNN.
449
0
  for (const char* p = src.begin(); p < src.end(); ++p) {
450
0
    unsigned char c = static_cast<unsigned char>(*p);
451
0
    bool is_hex_escape = false;
452
0
    switch (c) {
453
0
      case '\n':
454
0
        dest.append("\\n");
455
0
        break;
456
0
      case '\r':
457
0
        dest.append("\\r");
458
0
        break;
459
0
      case '\t':
460
0
        dest.append("\\t");
461
0
        break;
462
0
      case '\\':
463
0
        dest.append("\\\\");
464
0
        break;
465
0
      case '\'':
466
0
        ABSL_FALLTHROUGH_INTENDED;
467
0
      case '\"':
468
0
        ABSL_FALLTHROUGH_INTENDED;
469
0
      case '`':
470
        // Escape only quote chars that match escape_quote_char.
471
0
        if (escape_quote_char == 0 || c == escape_quote_char) {
472
0
          dest.push_back('\\');
473
0
        }
474
0
        dest.push_back(c);
475
0
        break;
476
0
      default:
477
        // Note that if we emit \xNN and the src character after that is a hex
478
        // digit then that digit must be escaped too to prevent it being
479
        // interpreted as part of the character code by C.
480
0
        if ((!escape_all_bytes || c < 0x80) &&
481
0
            (!absl::ascii_isprint(c) ||
482
0
             (last_hex_escape && absl::ascii_isxdigit(c)))) {
483
0
          dest.append("\\x");
484
0
          dest.push_back(kHexTable[c / 16]);
485
0
          dest.push_back(kHexTable[c % 16]);
486
0
          is_hex_escape = true;
487
0
        } else {
488
0
          dest.push_back(c);
489
0
          break;
490
0
        }
491
0
    }
492
0
    last_hex_escape = is_hex_escape;
493
0
  }
494
0
  dest.shrink_to_fit();
495
0
  return dest;
496
0
}
497
498
501k
bool MayBeTripleQuotedString(absl::string_view str) {
499
501k
  return (str.size() >= 6 &&
500
501k
          ((absl::StartsWith(str, "\"\"\"") && absl::EndsWith(str, "\"\"\"")) ||
501
7.04k
           (absl::StartsWith(str, "'''") && absl::EndsWith(str, "'''"))));
502
501k
}
503
504
497k
bool MayBeStringLiteral(absl::string_view str) {
505
497k
  return (str.size() >= 2 && str[0] == str[str.size() - 1] &&
506
497k
          (str[0] == '\'' || str[0] == '"'));
507
497k
}
508
509
3.89k
bool MayBeBytesLiteral(absl::string_view str) {
510
3.89k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "b") &&
511
3.89k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
512
3.89k
}
513
514
497k
bool MayBeRawStringLiteral(absl::string_view str) {
515
497k
  return (str.size() >= 3 && absl::StartsWithIgnoreCase(str, "r") &&
516
497k
          str[1] == str[str.size() - 1] && (str[1] == '\'' || str[1] == '"'));
517
497k
}
518
519
3.89k
bool MayBeRawBytesLiteral(absl::string_view str) {
520
3.89k
  return (str.size() >= 4 &&
521
3.89k
          (absl::StartsWithIgnoreCase(str, "rb") ||
522
3.42k
           absl::StartsWithIgnoreCase(str, "br")) &&
523
3.89k
          (str[2] == str[str.size() - 1]) && (str[2] == '\'' || str[2] == '"'));
524
3.89k
}
525
526
}  // namespace
527
528
0
absl::StatusOr<std::string> UnescapeString(absl::string_view str) {
529
0
  std::string out;
530
0
  std::string error;
531
0
  if (!UnescapeInternal(str, "", false, false, &out, &error)) {
532
0
    return absl::InvalidArgumentError(
533
0
        absl::StrCat("Invalid escaped string: ", error));
534
0
  }
535
0
  return out;
536
0
}
537
538
0
absl::StatusOr<std::string> UnescapeBytes(absl::string_view str) {
539
0
  std::string out;
540
0
  std::string error;
541
0
  if (!UnescapeInternal(str, "", false, true, &out, &error)) {
542
0
    return absl::InvalidArgumentError(
543
0
        absl::StrCat("Invalid escaped bytes: ", error));
544
0
  }
545
0
  return out;
546
0
}
547
548
0
std::string EscapeString(absl::string_view str) {
549
0
  return EscapeInternal(str, true, '\0');
550
0
}
551
552
std::string EscapeBytes(absl::string_view str, bool escape_all_bytes,
553
0
                        char escape_quote_char) {
554
0
  std::string escaped_bytes;
555
0
  for (const char* p = str.begin(); p < str.end(); ++p) {
556
0
    unsigned char c = *p;
557
0
    if (escape_all_bytes || !absl::ascii_isprint(c)) {
558
0
      escaped_bytes += "\\x";
559
0
      escaped_bytes += absl::BytesToHexString(absl::string_view(p, 1));
560
0
    } else {
561
0
      switch (c) {
562
        // Note that we only handle printable escape characters here.  All
563
        // unprintable (\n, \r, \t, etc.) are hex escaped above.
564
0
        case '\\':
565
0
          escaped_bytes += "\\\\";
566
0
          break;
567
0
        case '\'':
568
0
        case '"':
569
0
        case '`':
570
          // Escape only quote chars that match escape_quote_char.
571
0
          if (escape_quote_char == 0 || c == escape_quote_char) {
572
0
            escaped_bytes += '\\';
573
0
          }
574
0
          escaped_bytes += c;
575
0
          break;
576
0
        default:
577
0
          escaped_bytes += c;
578
0
          break;
579
0
      }
580
0
    }
581
0
  }
582
0
  return escaped_bytes;
583
0
}
584
585
497k
absl::StatusOr<std::string> ParseStringLiteral(absl::string_view str) {
586
497k
  std::string out;
587
497k
  bool is_string_literal = MayBeStringLiteral(str);
588
497k
  bool is_raw_string_literal = MayBeRawStringLiteral(str);
589
497k
  if (!is_string_literal && !is_raw_string_literal) {
590
0
    return absl::InvalidArgumentError("Invalid string literal");
591
0
  }
592
593
497k
  absl::string_view copy_str = str;
594
497k
  if (is_raw_string_literal) {
595
    // Strip off the prefix 'r' from the raw string content before parsing.
596
2.50k
    copy_str = absl::ClippedSubstr(copy_str, 1);
597
2.50k
  }
598
599
497k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
600
  // Starts after the opening quotes {""", '''} or {", '}.
601
497k
  int quotes_length = is_triple_quoted ? 3 : 1;
602
497k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
603
497k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
604
497k
  std::string error;
605
497k
  if (!UnescapeInternal(copy_str, quotes, is_raw_string_literal, false, &out,
606
497k
                        &error)) {
607
3.75k
    return absl::InvalidArgumentError(
608
3.75k
        absl::StrCat("Invalid string literal: ", error));
609
3.75k
  }
610
493k
  return out;
611
497k
}
612
613
3.89k
absl::StatusOr<std::string> ParseBytesLiteral(absl::string_view str) {
614
3.89k
  std::string out;
615
3.89k
  bool is_bytes_literal = MayBeBytesLiteral(str);
616
3.89k
  bool is_raw_bytes_literal = MayBeRawBytesLiteral(str);
617
3.89k
  if (!is_bytes_literal && !is_raw_bytes_literal) {
618
0
    return absl::InvalidArgumentError("Invalid bytes literal");
619
0
  }
620
621
3.89k
  absl::string_view copy_str = str;
622
3.89k
  if (is_raw_bytes_literal) {
623
    // Strip off the prefix {"rb", "br"} from the raw bytes content before
624
1.13k
    copy_str = absl::ClippedSubstr(copy_str, 2);
625
2.75k
  } else {
626
    // Strip off the prefix 'b' from the bytes content before parsing.
627
2.75k
    copy_str = absl::ClippedSubstr(copy_str, 1);
628
2.75k
  }
629
630
3.89k
  bool is_triple_quoted = MayBeTripleQuotedString(copy_str);
631
  // Starts after the opening quotes {""", '''} or {", '}.
632
3.89k
  int quotes_length = is_triple_quoted ? 3 : 1;
633
3.89k
  absl::string_view quotes = copy_str.substr(0, quotes_length);
634
  // Includes the closing quotes.
635
3.89k
  copy_str = absl::ClippedSubstr(copy_str, quotes_length);
636
3.89k
  std::string error;
637
3.89k
  if (!UnescapeInternal(copy_str, quotes, is_raw_bytes_literal, true, &out,
638
3.89k
                        &error)) {
639
1.59k
    return absl::InvalidArgumentError(
640
1.59k
        absl::StrCat("Invalid bytes literal: ", error));
641
1.59k
  }
642
2.29k
  return out;
643
3.89k
}
644
645
0
std::string FormatStringLiteral(absl::string_view str) {
646
0
  absl::string_view quote =
647
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
648
0
  return absl::StrCat(quote, EscapeInternal(str, true, quote[0]), quote);
649
0
}
650
651
0
std::string FormatSingleQuotedStringLiteral(absl::string_view str) {
652
0
  return absl::StrCat("'", EscapeInternal(str, true, '\''), "'");
653
0
}
654
655
0
std::string FormatDoubleQuotedStringLiteral(absl::string_view str) {
656
0
  return absl::StrCat("\"", EscapeInternal(str, true, '"'), "\"");
657
0
}
658
659
0
std::string FormatBytesLiteral(absl::string_view str) {
660
0
  absl::string_view quote =
661
0
      (str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
662
0
  return absl::StrCat("b", quote, EscapeBytes(str, false, quote[0]), quote);
663
0
}
664
665
0
std::string FormatSingleQuotedBytesLiteral(absl::string_view str) {
666
0
  return absl::StrCat("b'", EscapeBytes(str, false, '\''), "'");
667
0
}
668
669
0
std::string FormatDoubleQuotedBytesLiteral(absl::string_view str) {
670
0
  return absl::StrCat("b\"", EscapeBytes(str, false, '"'), "\"");
671
0
}
672
673
0
absl::StatusOr<std::string> ParseIdentifier(absl::string_view str) {
674
0
  if (!LexisIsIdentifier(str)) {
675
0
    return absl::InvalidArgumentError("Invalid identifier");
676
0
  }
677
0
  return std::string(str);
678
0
}
679
680
}  // namespace cel::internal