Coverage Report

Created: 2025-11-29 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/internal/utf8.cc
Line
Count
Source
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "internal/utf8.h"
16
17
#include <algorithm>
18
#include <cstdint>
19
#include <cstring>
20
#include <string>
21
#include <utility>
22
23
#include "absl/base/macros.h"
24
#include "absl/base/nullability.h"
25
#include "absl/base/optimization.h"
26
#include "absl/log/absl_check.h"
27
#include "absl/strings/cord.h"
28
#include "absl/strings/string_view.h"
29
#include "internal/unicode.h"
30
31
// Implementation is based on
32
// https://go.googlesource.com/go/+/refs/heads/master/src/unicode/utf8/utf8.go
33
// but adapted for C++.
34
35
namespace cel::internal {
36
37
namespace {
38
39
constexpr uint8_t kUtf8RuneSelf = 0x80;
40
constexpr size_t kUtf8Max = 4;
41
42
constexpr uint8_t kLow = 0x80;
43
constexpr uint8_t kHigh = 0xbf;
44
45
constexpr uint8_t kMaskX = 0x3f;
46
constexpr uint8_t kMask2 = 0x1f;
47
constexpr uint8_t kMask3 = 0xf;
48
constexpr uint8_t kMask4 = 0x7;
49
50
constexpr uint8_t kTX = 0x80;
51
constexpr uint8_t kT2 = 0xc0;
52
constexpr uint8_t kT3 = 0xe0;
53
constexpr uint8_t kT4 = 0xf0;
54
55
constexpr uint8_t kXX = 0xf1;
56
constexpr uint8_t kAS = 0xf0;
57
constexpr uint8_t kS1 = 0x02;
58
constexpr uint8_t kS2 = 0x13;
59
constexpr uint8_t kS3 = 0x03;
60
constexpr uint8_t kS4 = 0x23;
61
constexpr uint8_t kS5 = 0x34;
62
constexpr uint8_t kS6 = 0x04;
63
constexpr uint8_t kS7 = 0x44;
64
65
// NOLINTBEGIN
66
// clang-format off
67
constexpr uint8_t kLeading[256] = {
68
  //   1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
69
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x00-0x0F
70
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x10-0x1F
71
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x20-0x2F
72
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x30-0x3F
73
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x40-0x4F
74
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x50-0x5F
75
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x60-0x6F
76
  kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, kAS, // 0x70-0x7F
77
  //   1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
78
  kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, // 0x80-0x8F
79
  kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, // 0x90-0x9F
80
  kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, // 0xA0-0xAF
81
  kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, // 0xB0-0xBF
82
  kXX, kXX, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, // 0xC0-0xCF
83
  kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, kS1, // 0xD0-0xDF
84
  kS2, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS3, kS4, kS3, kS3, // 0xE0-0xEF
85
  kS5, kS6, kS6, kS6, kS7, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, kXX, // 0xF0-0xFF
86
};
87
// clang-format on
88
// NOLINTEND
89
90
constexpr std::pair<const uint8_t, const uint8_t> kAccept[16] = {
91
    {kLow, kHigh}, {0xa0, kHigh}, {kLow, 0x9f}, {0x90, kHigh},
92
    {kLow, 0x8f},  {0x0, 0x0},    {0x0, 0x0},   {0x0, 0x0},
93
    {0x0, 0x0},    {0x0, 0x0},    {0x0, 0x0},   {0x0, 0x0},
94
    {0x0, 0x0},    {0x0, 0x0},    {0x0, 0x0},   {0x0, 0x0},
95
};
96
97
class StringReader final {
98
 public:
99
392k
  constexpr explicit StringReader(absl::string_view input) : input_(input) {}
100
101
1.77k
  size_t Remaining() const { return input_.size(); }
102
103
2.21M
  bool HasRemaining() const { return !input_.empty(); }
104
105
1.77k
  absl::string_view Peek(size_t n) {
106
1.77k
    ABSL_ASSERT(n <= Remaining());
107
1.77k
    return input_.substr(0, n);
108
1.77k
  }
109
110
1.82M
  char Read() {
111
1.82M
    ABSL_ASSERT(HasRemaining());
112
1.82M
    char value = input_.front();
113
1.82M
    input_.remove_prefix(1);
114
1.82M
    return value;
115
1.82M
  }
116
117
1.77k
  void Advance(size_t n) {
118
1.77k
    ABSL_ASSERT(n <= Remaining());
119
1.77k
    input_.remove_prefix(n);
120
1.77k
  }
121
122
0
  void Reset(absl::string_view input) { input_ = input; }
123
124
 private:
125
  absl::string_view input_;
126
};
127
128
class CordReader final {
129
 public:
130
  explicit CordReader(const absl::Cord& input)
131
0
      : input_(input), size_(input_.size()), buffer_(), index_(0) {}
132
133
0
  size_t Remaining() const { return size_; }
134
135
0
  bool HasRemaining() const { return size_ != 0; }
136
137
0
  absl::string_view Peek(size_t n) {
138
0
    ABSL_ASSERT(n <= Remaining());
139
0
    if (n == 0) {
140
0
      return absl::string_view();
141
0
    }
142
0
    if (n <= buffer_.size() - index_) {
143
      // Enough data remaining in temporary buffer.
144
0
      return absl::string_view(buffer_.data() + index_, n);
145
0
    }
146
    // We do not have enough data. See if we can fit it without allocating by
147
    // shifting data back to the beginning of the buffer.
148
0
    if (buffer_.capacity() >= n) {
149
      // It will fit in the current capacity, see if we need to shift the
150
      // existing data to make it fit.
151
0
      if (buffer_.capacity() - buffer_.size() < n && index_ != 0) {
152
        // We need to shift.
153
0
        buffer_.erase(buffer_.begin(), buffer_.begin() + index_);
154
0
        index_ = 0;
155
0
      }
156
0
    }
157
    // Ensure we never reserve less than kUtf8Max.
158
0
    buffer_.reserve(std::max(buffer_.size() + n, kUtf8Max));
159
0
    size_t to_copy = n - (buffer_.size() - index_);
160
0
    absl::CopyCordToString(input_.Subcord(0, to_copy), &buffer_);
161
0
    input_.RemovePrefix(to_copy);
162
0
    return absl::string_view(buffer_.data() + index_, n);
163
0
  }
164
165
0
  char Read() {
166
0
    char value = Peek(1).front();
167
0
    Advance(1);
168
0
    return value;
169
0
  }
170
171
0
  void Advance(size_t n) {
172
0
    ABSL_ASSERT(n <= Remaining());
173
0
    if (n == 0) {
174
0
      return;
175
0
    }
176
0
    if (index_ < buffer_.size()) {
177
0
      size_t count = std::min(n, buffer_.size() - index_);
178
0
      index_ += count;
179
0
      n -= count;
180
0
      size_ -= count;
181
0
      if (index_ < buffer_.size()) {
182
0
        return;
183
0
      }
184
      // Temporary buffer is empty, clear it.
185
0
      buffer_.clear();
186
0
      index_ = 0;
187
0
    }
188
0
    input_.RemovePrefix(n);
189
0
    size_ -= n;
190
0
  }
191
192
0
  void Reset(const absl::Cord& input) {
193
0
    input_ = input;
194
0
    size_ = input_.size();
195
0
    buffer_.clear();
196
0
    index_ = 0;
197
0
  }
198
199
 private:
200
  absl::Cord input_;
201
  size_t size_;
202
  std::string buffer_;
203
  size_t index_;
204
};
205
206
template <typename BufferedByteReader>
207
392k
bool Utf8IsValidImpl(BufferedByteReader* reader) {
208
2.21M
  while (reader->HasRemaining()) {
209
1.82M
    const auto b = static_cast<uint8_t>(reader->Read());
210
1.82M
    if (b < kUtf8RuneSelf) {
211
1.82M
      continue;
212
1.82M
    }
213
1.77k
    const auto leading = kLeading[b];
214
1.77k
    if (leading == kXX) {
215
0
      return false;
216
0
    }
217
1.77k
    const auto size = static_cast<size_t>(leading & 7) - 1;
218
1.77k
    if (size > reader->Remaining()) {
219
0
      return false;
220
0
    }
221
1.77k
    const absl::string_view segment = reader->Peek(size);
222
1.77k
    const auto& accept = kAccept[leading >> 4];
223
1.77k
    if (static_cast<uint8_t>(segment[0]) < accept.first ||
224
1.77k
        static_cast<uint8_t>(segment[0]) > accept.second) {
225
0
      return false;
226
1.77k
    } else if (size == 1) {
227
984
    } else if (static_cast<uint8_t>(segment[1]) < kLow ||
228
984
               static_cast<uint8_t>(segment[1]) > kHigh) {
229
0
      return false;
230
984
    } else if (size == 2) {
231
670
    } else if (static_cast<uint8_t>(segment[2]) < kLow ||
232
314
               static_cast<uint8_t>(segment[2]) > kHigh) {
233
0
      return false;
234
0
    }
235
1.77k
    reader->Advance(size);
236
1.77k
  }
237
392k
  return true;
238
392k
}
utf8.cc:bool cel::internal::(anonymous namespace)::Utf8IsValidImpl<cel::internal::(anonymous namespace)::StringReader>(cel::internal::(anonymous namespace)::StringReader*)
Line
Count
Source
207
392k
bool Utf8IsValidImpl(BufferedByteReader* reader) {
208
2.21M
  while (reader->HasRemaining()) {
209
1.82M
    const auto b = static_cast<uint8_t>(reader->Read());
210
1.82M
    if (b < kUtf8RuneSelf) {
211
1.82M
      continue;
212
1.82M
    }
213
1.77k
    const auto leading = kLeading[b];
214
1.77k
    if (leading == kXX) {
215
0
      return false;
216
0
    }
217
1.77k
    const auto size = static_cast<size_t>(leading & 7) - 1;
218
1.77k
    if (size > reader->Remaining()) {
219
0
      return false;
220
0
    }
221
1.77k
    const absl::string_view segment = reader->Peek(size);
222
1.77k
    const auto& accept = kAccept[leading >> 4];
223
1.77k
    if (static_cast<uint8_t>(segment[0]) < accept.first ||
224
1.77k
        static_cast<uint8_t>(segment[0]) > accept.second) {
225
0
      return false;
226
1.77k
    } else if (size == 1) {
227
984
    } else if (static_cast<uint8_t>(segment[1]) < kLow ||
228
984
               static_cast<uint8_t>(segment[1]) > kHigh) {
229
0
      return false;
230
984
    } else if (size == 2) {
231
670
    } else if (static_cast<uint8_t>(segment[2]) < kLow ||
232
314
               static_cast<uint8_t>(segment[2]) > kHigh) {
233
0
      return false;
234
0
    }
235
1.77k
    reader->Advance(size);
236
1.77k
  }
237
392k
  return true;
238
392k
}
Unexecuted instantiation: utf8.cc:bool cel::internal::(anonymous namespace)::Utf8IsValidImpl<cel::internal::(anonymous namespace)::CordReader>(cel::internal::(anonymous namespace)::CordReader*)
239
240
template <typename BufferedByteReader>
241
0
size_t Utf8CodePointCountImpl(BufferedByteReader* reader) {
242
0
  size_t count = 0;
243
0
  while (reader->HasRemaining()) {
244
0
    count++;
245
0
    const auto b = static_cast<uint8_t>(reader->Read());
246
0
    if (b < kUtf8RuneSelf) {
247
0
      continue;
248
0
    }
249
0
    const auto leading = kLeading[b];
250
0
    if (leading == kXX) {
251
0
      continue;
252
0
    }
253
0
    auto size = static_cast<size_t>(leading & 7) - 1;
254
0
    if (size > reader->Remaining()) {
255
0
      continue;
256
0
    }
257
0
    const absl::string_view segment = reader->Peek(size);
258
0
    const auto& accept = kAccept[leading >> 4];
259
0
    if (static_cast<uint8_t>(segment[0]) < accept.first ||
260
0
        static_cast<uint8_t>(segment[0]) > accept.second) {
261
0
      size = 0;
262
0
    } else if (size == 1) {
263
0
    } else if (static_cast<uint8_t>(segment[1]) < kLow ||
264
0
               static_cast<uint8_t>(segment[1]) > kHigh) {
265
0
      size = 0;
266
0
    } else if (size == 2) {
267
0
    } else if (static_cast<uint8_t>(segment[2]) < kLow ||
268
0
               static_cast<uint8_t>(segment[2]) > kHigh) {
269
0
      size = 0;
270
0
    }
271
0
    reader->Advance(size);
272
0
  }
273
0
  return count;
274
0
}
Unexecuted instantiation: utf8.cc:unsigned long cel::internal::(anonymous namespace)::Utf8CodePointCountImpl<cel::internal::(anonymous namespace)::StringReader>(cel::internal::(anonymous namespace)::StringReader*)
Unexecuted instantiation: utf8.cc:unsigned long cel::internal::(anonymous namespace)::Utf8CodePointCountImpl<cel::internal::(anonymous namespace)::CordReader>(cel::internal::(anonymous namespace)::CordReader*)
275
276
template <typename BufferedByteReader>
277
0
std::pair<size_t, bool> Utf8ValidateImpl(BufferedByteReader* reader) {
278
0
  size_t count = 0;
279
0
  while (reader->HasRemaining()) {
280
0
    const auto b = static_cast<uint8_t>(reader->Read());
281
0
    if (b < kUtf8RuneSelf) {
282
0
      count++;
283
0
      continue;
284
0
    }
285
0
    const auto leading = kLeading[b];
286
0
    if (leading == kXX) {
287
0
      return {count, false};
288
0
    }
289
0
    const auto size = static_cast<size_t>(leading & 7) - 1;
290
0
    if (size > reader->Remaining()) {
291
0
      return {count, false};
292
0
    }
293
0
    const absl::string_view segment = reader->Peek(size);
294
0
    const auto& accept = kAccept[leading >> 4];
295
0
    if (static_cast<uint8_t>(segment[0]) < accept.first ||
296
0
        static_cast<uint8_t>(segment[0]) > accept.second) {
297
0
      return {count, false};
298
0
    } else if (size == 1) {
299
0
      count++;
300
0
    } else if (static_cast<uint8_t>(segment[1]) < kLow ||
301
0
               static_cast<uint8_t>(segment[1]) > kHigh) {
302
0
      return {count, false};
303
0
    } else if (size == 2) {
304
0
      count++;
305
0
    } else if (static_cast<uint8_t>(segment[2]) < kLow ||
306
0
               static_cast<uint8_t>(segment[2]) > kHigh) {
307
0
      return {count, false};
308
0
    } else {
309
0
      count++;
310
0
    }
311
0
    reader->Advance(size);
312
0
  }
313
0
  return {count, true};
314
0
}
Unexecuted instantiation: utf8.cc:std::__1::pair<unsigned long, bool> cel::internal::(anonymous namespace)::Utf8ValidateImpl<cel::internal::(anonymous namespace)::StringReader>(cel::internal::(anonymous namespace)::StringReader*)
Unexecuted instantiation: utf8.cc:std::__1::pair<unsigned long, bool> cel::internal::(anonymous namespace)::Utf8ValidateImpl<cel::internal::(anonymous namespace)::CordReader>(cel::internal::(anonymous namespace)::CordReader*)
315
316
}  // namespace
317
318
392k
bool Utf8IsValid(absl::string_view str) {
319
392k
  StringReader reader(str);
320
392k
  bool valid = Utf8IsValidImpl(&reader);
321
392k
  ABSL_ASSERT((reader.Reset(str), valid == Utf8ValidateImpl(&reader).second));
322
392k
  return valid;
323
392k
}
324
325
0
bool Utf8IsValid(const absl::Cord& str) {
326
0
  CordReader reader(str);
327
0
  bool valid = Utf8IsValidImpl(&reader);
328
0
  ABSL_ASSERT((reader.Reset(str), valid == Utf8ValidateImpl(&reader).second));
329
0
  return valid;
330
0
}
331
332
0
size_t Utf8CodePointCount(absl::string_view str) {
333
0
  StringReader reader(str);
334
0
  return Utf8CodePointCountImpl(&reader);
335
0
}
336
337
0
size_t Utf8CodePointCount(const absl::Cord& str) {
338
0
  CordReader reader(str);
339
0
  return Utf8CodePointCountImpl(&reader);
340
0
}
341
342
0
std::pair<size_t, bool> Utf8Validate(absl::string_view str) {
343
0
  StringReader reader(str);
344
0
  auto result = Utf8ValidateImpl(&reader);
345
0
  ABSL_ASSERT((reader.Reset(str), result.second == Utf8IsValidImpl(&reader)));
346
0
  return result;
347
0
}
348
349
0
std::pair<size_t, bool> Utf8Validate(const absl::Cord& str) {
350
0
  CordReader reader(str);
351
0
  auto result = Utf8ValidateImpl(&reader);
352
0
  ABSL_ASSERT((reader.Reset(str), result.second == Utf8IsValidImpl(&reader)));
353
0
  return result;
354
0
}
355
356
namespace {
357
358
size_t Utf8DecodeImpl(uint8_t b, uint8_t leading, size_t size,
359
                      absl::string_view str,
360
339k
                      char32_t* absl_nullable code_point) {
361
339k
  const auto& accept = kAccept[leading >> 4];
362
339k
  const auto b1 = static_cast<uint8_t>(str.front());
363
339k
  if (ABSL_PREDICT_FALSE(b1 < accept.first || b1 > accept.second)) {
364
5
    if (code_point != nullptr) {
365
5
      *code_point = kUnicodeReplacementCharacter;
366
5
    }
367
5
    return 1;
368
5
  }
369
339k
  if (size <= 1) {
370
18.2k
    if (code_point != nullptr) {
371
18.2k
      *code_point = (static_cast<char32_t>(b & kMask2) << 6) |
372
18.2k
                    static_cast<char32_t>(b1 & kMaskX);
373
18.2k
    }
374
18.2k
    return 2;
375
18.2k
  }
376
321k
  str.remove_prefix(1);
377
321k
  const auto b2 = static_cast<uint8_t>(str.front());
378
321k
  if (ABSL_PREDICT_FALSE(b2 < kLow || b2 > kHigh)) {
379
5
    if (code_point != nullptr) {
380
5
      *code_point = kUnicodeReplacementCharacter;
381
5
    }
382
5
    return 1;
383
5
  }
384
321k
  if (size <= 2) {
385
306k
    if (code_point != nullptr) {
386
306k
      *code_point = (static_cast<char32_t>(b & kMask3) << 12) |
387
306k
                    (static_cast<char32_t>(b1 & kMaskX) << 6) |
388
306k
                    static_cast<char32_t>(b2 & kMaskX);
389
306k
    }
390
306k
    return 3;
391
306k
  }
392
15.0k
  str.remove_prefix(1);
393
15.0k
  const auto b3 = static_cast<uint8_t>(str.front());
394
15.0k
  if (ABSL_PREDICT_FALSE(b3 < kLow || b3 > kHigh)) {
395
3
    if (code_point != nullptr) {
396
3
      *code_point = kUnicodeReplacementCharacter;
397
3
    }
398
3
    return 1;
399
3
  }
400
15.0k
  if (code_point != nullptr) {
401
15.0k
    *code_point = (static_cast<char32_t>(b & kMask4) << 18) |
402
15.0k
                  (static_cast<char32_t>(b1 & kMaskX) << 12) |
403
15.0k
                  (static_cast<char32_t>(b2 & kMaskX) << 6) |
404
15.0k
                  static_cast<char32_t>(b3 & kMaskX);
405
15.0k
  }
406
15.0k
  return 4;
407
15.0k
}
408
409
}  // namespace
410
411
293M
size_t Utf8Decode(absl::string_view str, char32_t* absl_nullable code_point) {
412
293M
  ABSL_DCHECK(!str.empty());
413
293M
  const auto b = static_cast<uint8_t>(str.front());
414
293M
  if (b < kUtf8RuneSelf) {
415
293M
    if (code_point != nullptr) {
416
293M
      *code_point = static_cast<char32_t>(b);
417
293M
    }
418
293M
    return 1;
419
293M
  }
420
339k
  const auto leading = kLeading[b];
421
339k
  if (ABSL_PREDICT_FALSE(leading == kXX)) {
422
17
    if (code_point != nullptr) {
423
17
      *code_point = kUnicodeReplacementCharacter;
424
17
    }
425
17
    return 1;
426
17
  }
427
339k
  auto size = static_cast<size_t>(leading & 7) - 1;
428
339k
  str.remove_prefix(1);
429
339k
  if (ABSL_PREDICT_FALSE(size > str.size())) {
430
2
    if (code_point != nullptr) {
431
2
      *code_point = kUnicodeReplacementCharacter;
432
2
    }
433
2
    return 1;
434
2
  }
435
339k
  return Utf8DecodeImpl(b, leading, size, str, code_point);
436
339k
}
437
438
size_t Utf8Decode(const absl::Cord::CharIterator& it,
439
0
                  char32_t* absl_nullable code_point) {
440
0
  absl::string_view str = absl::Cord::ChunkRemaining(it);
441
0
  ABSL_DCHECK(!str.empty());
442
0
  const auto b = static_cast<uint8_t>(str.front());
443
0
  if (b < kUtf8RuneSelf) {
444
0
    if (code_point != nullptr) {
445
0
      *code_point = static_cast<char32_t>(b);
446
0
    }
447
0
    return 1;
448
0
  }
449
0
  const auto leading = kLeading[b];
450
0
  if (ABSL_PREDICT_FALSE(leading == kXX)) {
451
0
    if (code_point != nullptr) {
452
0
      *code_point = kUnicodeReplacementCharacter;
453
0
    }
454
0
    return 1;
455
0
  }
456
0
  auto size = static_cast<size_t>(leading & 7) - 1;
457
0
  str.remove_prefix(1);
458
0
  if (ABSL_PREDICT_TRUE(size <= str.size())) {
459
    // Fast path.
460
0
    return Utf8DecodeImpl(b, leading, size, str, code_point);
461
0
  }
462
0
  absl::Cord::CharIterator current = it;
463
0
  absl::Cord::Advance(&current, 1);
464
0
  char buffer[3];
465
0
  size_t buffer_len = 0;
466
0
  while (buffer_len < size) {
467
0
    str = absl::Cord::ChunkRemaining(current);
468
0
    if (ABSL_PREDICT_FALSE(str.empty())) {
469
0
      if (code_point != nullptr) {
470
0
        *code_point = kUnicodeReplacementCharacter;
471
0
      }
472
0
      return 1;
473
0
    }
474
0
    size_t to_copy = std::min(size_t{3} - buffer_len, str.size());
475
0
    std::memcpy(buffer + buffer_len, str.data(), to_copy);
476
0
    buffer_len += to_copy;
477
0
    absl::Cord::Advance(&current, to_copy);
478
0
  }
479
0
  return Utf8DecodeImpl(b, leading, size, absl::string_view(buffer, buffer_len),
480
0
                        code_point);
481
0
}
482
483
608M
size_t Utf8Encode(char32_t code_point, std::string* absl_nonnull buffer) {
484
608M
  ABSL_DCHECK(buffer != nullptr);
485
486
608M
  char storage[4];
487
608M
  size_t storage_len = Utf8Encode(code_point, storage);
488
608M
  buffer->append(storage, storage_len);
489
608M
  return storage_len;
490
608M
}
491
492
608M
size_t Utf8Encode(char32_t code_point, char* absl_nonnull buffer) {
493
608M
  ABSL_DCHECK(buffer != nullptr);
494
495
608M
  if (ABSL_PREDICT_FALSE(!UnicodeIsValid(code_point))) {
496
0
    code_point = kUnicodeReplacementCharacter;
497
0
  }
498
608M
  size_t storage_len = 0;
499
608M
  if (code_point <= 0x7f) {
500
597M
    buffer[storage_len++] = static_cast<char>(static_cast<uint8_t>(code_point));
501
597M
  } else if (code_point <= 0x7ff) {
502
51.4k
    buffer[storage_len++] =
503
51.4k
        static_cast<char>(kT2 | static_cast<uint8_t>(code_point >> 6));
504
51.4k
    buffer[storage_len++] =
505
51.4k
        static_cast<char>(kTX | (static_cast<uint8_t>(code_point) & kMaskX));
506
10.3M
  } else if (code_point <= 0xffff) {
507
10.2M
    buffer[storage_len++] =
508
10.2M
        static_cast<char>(kT3 | static_cast<uint8_t>(code_point >> 12));
509
10.2M
    buffer[storage_len++] = static_cast<char>(
510
10.2M
        kTX | (static_cast<uint8_t>(code_point >> 6) & kMaskX));
511
10.2M
    buffer[storage_len++] =
512
10.2M
        static_cast<char>(kTX | (static_cast<uint8_t>(code_point) & kMaskX));
513
10.2M
  } else {
514
47.5k
    buffer[storage_len++] =
515
47.5k
        static_cast<char>(kT4 | static_cast<uint8_t>(code_point >> 18));
516
47.5k
    buffer[storage_len++] = static_cast<char>(
517
47.5k
        kTX | (static_cast<uint8_t>(code_point >> 12) & kMaskX));
518
47.5k
    buffer[storage_len++] = static_cast<char>(
519
47.5k
        kTX | (static_cast<uint8_t>(code_point >> 6) & kMaskX));
520
47.5k
    buffer[storage_len++] =
521
47.5k
        static_cast<char>(kTX | (static_cast<uint8_t>(code_point) & kMaskX));
522
47.5k
  }
523
608M
  return storage_len;
524
608M
}
525
526
}  // namespace cel::internal