Coverage Report

Created: 2025-12-29 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/common/source.cc
Line
Count
Source
1
// Copyright 2023 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "common/source.h"
16
17
#include <algorithm>
18
#include <cstddef>
19
#include <cstdint>
20
#include <limits>
21
#include <memory>
22
#include <string>
23
#include <tuple>
24
#include <utility>
25
#include <vector>
26
27
#include "absl/base/nullability.h"
28
#include "absl/base/optimization.h"
29
#include "absl/container/inlined_vector.h"
30
#include "absl/functional/overload.h"
31
#include "absl/log/absl_check.h"
32
#include "absl/status/status.h"
33
#include "absl/status/statusor.h"
34
#include "absl/strings/cord.h"
35
#include "absl/strings/str_cat.h"
36
#include "absl/strings/str_replace.h"
37
#include "absl/strings/string_view.h"
38
#include "absl/types/optional.h"
39
#include "absl/types/span.h"
40
#include "absl/types/variant.h"
41
#include "internal/unicode.h"
42
#include "internal/utf8.h"
43
44
namespace cel {
45
46
112k
SourcePosition SourceContentView::size() const {
47
112k
  return static_cast<SourcePosition>(absl::visit(
48
112k
      absl::Overload(
49
112k
          [](absl::Span<const char> view) { return view.size(); },
50
112k
          [](absl::Span<const uint8_t> view) { return view.size(); },
51
112k
          [](absl::Span<const char16_t> view) { return view.size(); },
52
112k
          [](absl::Span<const char32_t> view) { return view.size(); }),
53
112k
      view_));
54
112k
}
55
56
138k
bool SourceContentView::empty() const {
57
138k
  return absl::visit(
58
138k
      absl::Overload(
59
138k
          [](absl::Span<const char> view) { return view.empty(); },
60
138k
          [](absl::Span<const uint8_t> view) { return view.empty(); },
61
138k
          [](absl::Span<const char16_t> view) { return view.empty(); },
62
138k
          [](absl::Span<const char32_t> view) { return view.empty(); }),
63
138k
      view_);
64
138k
}
65
66
403M
char32_t SourceContentView::at(SourcePosition position) const {
67
403M
  ABSL_DCHECK_GE(position, 0);
68
403M
  ABSL_DCHECK_LT(position, size());
69
403M
  return absl::visit(
70
403M
      absl::Overload(
71
403M
          [position =
72
403M
               static_cast<size_t>(position)](absl::Span<const char> view) {
73
202M
            return static_cast<char32_t>(static_cast<uint8_t>(view[position]));
74
202M
          },
75
403M
          [position =
76
403M
               static_cast<size_t>(position)](absl::Span<const uint8_t> view) {
77
7.77M
            return static_cast<char32_t>(view[position]);
78
7.77M
          },
79
403M
          [position =
80
403M
               static_cast<size_t>(position)](absl::Span<const char16_t> view) {
81
51.3M
            return static_cast<char32_t>(view[position]);
82
51.3M
          },
83
403M
          [position =
84
403M
               static_cast<size_t>(position)](absl::Span<const char32_t> view) {
85
141M
            return static_cast<char32_t>(view[position]);
86
141M
          }),
87
403M
      view_);
88
403M
}
89
90
std::string SourceContentView::ToString(SourcePosition begin,
91
13.3M
                                        SourcePosition end) const {
92
13.3M
  ABSL_DCHECK_GE(begin, 0);
93
13.3M
  ABSL_DCHECK_LE(end, size());
94
13.3M
  ABSL_DCHECK_LE(begin, end);
95
13.3M
  return absl::visit(
96
13.3M
      absl::Overload(
97
13.3M
          [begin = static_cast<size_t>(begin),
98
13.3M
           end = static_cast<size_t>(end)](absl::Span<const char> view) {
99
10.7M
            view = view.subspan(begin, end - begin);
100
10.7M
            return std::string(view.data(), view.size());
101
10.7M
          },
102
13.3M
          [begin = static_cast<size_t>(begin),
103
13.3M
           end = static_cast<size_t>(end)](absl::Span<const uint8_t> view) {
104
254k
            view = view.subspan(begin, end - begin);
105
254k
            std::string result;
106
254k
            result.reserve(view.size() * 2);
107
26.6M
            for (const auto& code_point : view) {
108
26.6M
              internal::Utf8Encode(result, code_point);
109
26.6M
            }
110
254k
            result.shrink_to_fit();
111
254k
            return result;
112
254k
          },
113
13.3M
          [begin = static_cast<size_t>(begin),
114
13.3M
           end = static_cast<size_t>(end)](absl::Span<const char16_t> view) {
115
1.35M
            view = view.subspan(begin, end - begin);
116
1.35M
            std::string result;
117
1.35M
            result.reserve(view.size() * 3);
118
375M
            for (const auto& code_point : view) {
119
375M
              internal::Utf8Encode(result, code_point);
120
375M
            }
121
1.35M
            result.shrink_to_fit();
122
1.35M
            return result;
123
1.35M
          },
124
13.3M
          [begin = static_cast<size_t>(begin),
125
13.3M
           end = static_cast<size_t>(end)](absl::Span<const char32_t> view) {
126
1.00M
            view = view.subspan(begin, end - begin);
127
1.00M
            std::string result;
128
1.00M
            result.reserve(view.size() * 4);
129
479M
            for (const auto& code_point : view) {
130
479M
              internal::Utf8Encode(result, code_point);
131
479M
            }
132
1.00M
            result.shrink_to_fit();
133
1.00M
            return result;
134
1.00M
          }),
135
13.3M
      view_);
136
13.3M
}
137
138
0
void SourceContentView::AppendToString(std::string& dest) const {
139
0
  absl::visit(absl::Overload(
140
0
                  [&dest](absl::Span<const char> view) {
141
0
                    dest.append(view.data(), view.size());
142
0
                  },
143
0
                  [&dest](absl::Span<const uint8_t> view) {
144
0
                    for (const auto& code_point : view) {
145
0
                      internal::Utf8Encode(dest, code_point);
146
0
                    }
147
0
                  },
148
0
                  [&dest](absl::Span<const char16_t> view) {
149
0
                    for (const auto& code_point : view) {
150
0
                      internal::Utf8Encode(dest, code_point);
151
0
                    }
152
0
                  },
153
0
                  [&dest](absl::Span<const char32_t> view) {
154
0
                    for (const auto& code_point : view) {
155
0
                      internal::Utf8Encode(dest, code_point);
156
0
                    }
157
0
                  }),
158
0
              view_);
159
0
}
160
161
namespace common_internal {
162
163
class SourceImpl : public Source {
164
 public:
165
  SourceImpl(std::string description,
166
             absl::InlinedVector<SourcePosition, 1> line_offsets)
167
7.55k
      : description_(std::move(description)),
168
7.55k
        line_offsets_(std::move(line_offsets)) {}
169
170
150k
  absl::string_view description() const final { return description_; }
171
172
640k
  absl::Span<const SourcePosition> line_offsets() const final {
173
640k
    return absl::MakeConstSpan(line_offsets_);
174
640k
  }
175
176
 private:
177
  const std::string description_;
178
  const absl::InlinedVector<SourcePosition, 1> line_offsets_;
179
};
180
181
namespace {
182
183
class AsciiSource final : public SourceImpl {
184
 public:
185
  AsciiSource(std::string description,
186
              absl::InlinedVector<SourcePosition, 1> line_offsets,
187
              std::vector<char> text)
188
6.13k
      : SourceImpl(std::move(description), std::move(line_offsets)),
189
6.13k
        text_(std::move(text)) {}
190
191
98.3k
  ContentView content() const override {
192
98.3k
    return MakeContentView(absl::MakeConstSpan(text_));
193
98.3k
  }
194
195
 private:
196
  const std::vector<char> text_;
197
};
198
199
class Latin1Source final : public SourceImpl {
200
 public:
201
  Latin1Source(std::string description,
202
               absl::InlinedVector<SourcePosition, 1> line_offsets,
203
               std::vector<uint8_t> text)
204
181
      : SourceImpl(std::move(description), std::move(line_offsets)),
205
181
        text_(std::move(text)) {}
206
207
4.34k
  ContentView content() const override {
208
4.34k
    return MakeContentView(absl::MakeConstSpan(text_));
209
4.34k
  }
210
211
 private:
212
  const std::vector<uint8_t> text_;
213
};
214
215
class BasicPlaneSource final : public SourceImpl {
216
 public:
217
  BasicPlaneSource(std::string description,
218
                   absl::InlinedVector<SourcePosition, 1> line_offsets,
219
                   std::vector<char16_t> text)
220
609
      : SourceImpl(std::move(description), std::move(line_offsets)),
221
609
        text_(std::move(text)) {}
222
223
22.1k
  ContentView content() const override {
224
22.1k
    return MakeContentView(absl::MakeConstSpan(text_));
225
22.1k
  }
226
227
 private:
228
  const std::vector<char16_t> text_;
229
};
230
231
class SupplementalPlaneSource final : public SourceImpl {
232
 public:
233
  SupplementalPlaneSource(std::string description,
234
                          absl::InlinedVector<SourcePosition, 1> line_offsets,
235
                          std::vector<char32_t> text)
236
631
      : SourceImpl(std::move(description), std::move(line_offsets)),
237
631
        text_(std::move(text)) {}
238
239
23.5k
  ContentView content() const override {
240
23.5k
    return MakeContentView(absl::MakeConstSpan(text_));
241
23.5k
  }
242
243
 private:
244
  const std::vector<char32_t> text_;
245
};
246
247
template <typename T>
248
struct SourceTextTraits;
249
250
template <>
251
struct SourceTextTraits<absl::string_view> {
252
  using iterator_type = absl::string_view;
253
254
7.63k
  static iterator_type Begin(absl::string_view text) { return text; }
255
256
142M
  static void Advance(iterator_type& it, size_t n) { it.remove_prefix(n); }
257
258
  static void AppendTo(std::vector<uint8_t>& out, absl::string_view text,
259
329
                       size_t n) {
260
329
    const auto* in = reinterpret_cast<const uint8_t*>(text.data());
261
329
    out.insert(out.end(), in, in + n);
262
329
  }
263
264
6.13k
  static std::vector<char> ToVector(absl::string_view in) {
265
6.13k
    std::vector<char> out;
266
6.13k
    out.reserve(in.size());
267
6.13k
    out.insert(out.end(), in.begin(), in.end());
268
6.13k
    return out;
269
6.13k
  }
270
};
271
272
template <>
273
struct SourceTextTraits<absl::Cord> {
274
  using iterator_type = absl::Cord::CharIterator;
275
276
0
  static iterator_type Begin(const absl::Cord& text) {
277
0
    return text.char_begin();
278
0
  }
279
280
0
  static void Advance(iterator_type& it, size_t n) {
281
0
    absl::Cord::Advance(&it, n);
282
0
  }
283
284
  static void AppendTo(std::vector<uint8_t>& out, const absl::Cord& text,
285
0
                       size_t n) {
286
0
    auto it = text.char_begin();
287
0
    while (n > 0) {
288
0
      auto str = absl::Cord::ChunkRemaining(it);
289
0
      size_t to_append = std::min(n, str.size());
290
0
      const auto* in = reinterpret_cast<const uint8_t*>(str.data());
291
0
      out.insert(out.end(), in, in + to_append);
292
0
      n -= to_append;
293
0
      absl::Cord::Advance(&it, to_append);
294
0
    }
295
0
  }
296
297
0
  static std::vector<char> ToVector(const absl::Cord& in) {
298
0
    std::vector<char> out;
299
0
    out.reserve(in.size());
300
0
    for (const auto& chunk : in.Chunks()) {
301
0
      out.insert(out.end(), chunk.begin(), chunk.end());
302
0
    }
303
0
    return out;
304
0
  }
305
};
306
307
template <typename T>
308
absl::StatusOr<SourcePtr> NewSourceImpl(std::string description, const T& text,
309
7.63k
                                        const size_t text_size) {
310
7.63k
  if (ABSL_PREDICT_FALSE(
311
7.63k
          text_size >
312
7.63k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
7.63k
  using Traits = SourceTextTraits<T>;
316
7.63k
  size_t index = 0;
317
7.63k
  typename Traits::iterator_type it = Traits::Begin(text);
318
7.63k
  SourcePosition offset = 0;
319
7.63k
  char32_t code_point;
320
7.63k
  size_t code_units;
321
7.63k
  std::vector<uint8_t> data8;
322
7.63k
  std::vector<char16_t> data16;
323
7.63k
  std::vector<char32_t> data32;
324
7.63k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
92.2M
  while (index < text_size) {
326
92.2M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
92.2M
    if (ABSL_PREDICT_FALSE(code_point ==
328
92.2M
                               cel::internal::kUnicodeReplacementCharacter &&
329
92.2M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
23
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
23
    }
333
92.2M
    if (code_point == '\n') {
334
28.6M
      line_offsets.push_back(offset + 1);
335
28.6M
    }
336
92.2M
    if (code_point <= 0x7f) {
337
92.2M
      Traits::Advance(it, code_units);
338
92.2M
      index += code_units;
339
92.2M
      ++offset;
340
92.2M
      continue;
341
92.2M
    }
342
1.47k
    if (code_point <= 0xff) {
343
329
      data8.reserve(text_size);
344
329
      Traits::AppendTo(data8, text, index);
345
329
      data8.push_back(static_cast<uint8_t>(code_point));
346
329
      Traits::Advance(it, code_units);
347
329
      index += code_units;
348
329
      ++offset;
349
329
      goto latin1;
350
329
    }
351
1.14k
    if (code_point <= 0xffff) {
352
673
      data16.reserve(text_size);
353
12.3M
      for (size_t offset = 0; offset < index; offset++) {
354
12.3M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
12.3M
      }
356
673
      data16.push_back(static_cast<char16_t>(code_point));
357
673
      Traits::Advance(it, code_units);
358
673
      index += code_units;
359
673
      ++offset;
360
673
      goto basic;
361
673
    }
362
475
    data32.reserve(text_size);
363
5.68M
    for (size_t offset = 0; offset < index; offset++) {
364
5.68M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
5.68M
    }
366
475
    data32.push_back(code_point);
367
475
    Traits::Advance(it, code_units);
368
475
    index += code_units;
369
475
    ++offset;
370
475
    goto supplemental;
371
1.14k
  }
372
6.13k
  line_offsets.push_back(offset + 1);
373
6.13k
  return std::make_unique<AsciiSource>(
374
6.13k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
329
latin1:
376
4.72M
  while (index < text_size) {
377
4.72M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
4.72M
    if (ABSL_PREDICT_FALSE(code_point ==
379
4.72M
                               internal::kUnicodeReplacementCharacter &&
380
4.72M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
14
    }
384
4.72M
    if (code_point == '\n') {
385
3.78M
      line_offsets.push_back(offset + 1);
386
3.78M
    }
387
4.72M
    if (code_point <= 0xff) {
388
4.72M
      data8.push_back(static_cast<uint8_t>(code_point));
389
4.72M
      Traits::Advance(it, code_units);
390
4.72M
      index += code_units;
391
4.72M
      ++offset;
392
4.72M
      continue;
393
4.72M
    }
394
134
    if (code_point <= 0xffff) {
395
58
      data16.reserve(text_size);
396
2.22M
      for (const auto& value : data8) {
397
2.22M
        data16.push_back(value);
398
2.22M
      }
399
58
      std::vector<uint8_t>().swap(data8);
400
58
      data16.push_back(static_cast<char16_t>(code_point));
401
58
      Traits::Advance(it, code_units);
402
58
      index += code_units;
403
58
      ++offset;
404
58
      goto basic;
405
58
    }
406
76
    data32.reserve(text_size);
407
1.10M
    for (const auto& value : data8) {
408
1.10M
      data32.push_back(value);
409
1.10M
    }
410
76
    std::vector<uint8_t>().swap(data8);
411
76
    data32.push_back(code_point);
412
76
    Traits::Advance(it, code_units);
413
76
    index += code_units;
414
76
    ++offset;
415
76
    goto supplemental;
416
134
  }
417
181
  line_offsets.push_back(offset + 1);
418
181
  return std::make_unique<Latin1Source>(
419
181
      std::move(description), std::move(line_offsets), std::move(data8));
420
731
basic:
421
12.1M
  while (index < text_size) {
422
12.1M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
12.1M
    if (ABSL_PREDICT_FALSE(code_point ==
424
12.1M
                               internal::kUnicodeReplacementCharacter &&
425
12.1M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
20
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
20
    }
429
12.1M
    if (code_point == '\n') {
430
7.29M
      line_offsets.push_back(offset + 1);
431
7.29M
    }
432
12.1M
    if (code_point <= 0xffff) {
433
12.1M
      data16.push_back(static_cast<char16_t>(code_point));
434
12.1M
      Traits::Advance(it, code_units);
435
12.1M
      index += code_units;
436
12.1M
      ++offset;
437
12.1M
      continue;
438
12.1M
    }
439
102
    data32.reserve(text_size);
440
1.38M
    for (const auto& value : data16) {
441
1.38M
      data32.push_back(static_cast<char32_t>(value));
442
1.38M
    }
443
102
    std::vector<char16_t>().swap(data16);
444
102
    data32.push_back(code_point);
445
102
    Traits::Advance(it, code_units);
446
102
    index += code_units;
447
102
    ++offset;
448
102
    goto supplemental;
449
12.1M
  }
450
609
  line_offsets.push_back(offset + 1);
451
609
  return std::make_unique<BasicPlaneSource>(
452
609
      std::move(description), std::move(line_offsets), std::move(data16));
453
653
supplemental:
454
33.5M
  while (index < text_size) {
455
33.5M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
33.5M
    if (ABSL_PREDICT_FALSE(code_point ==
457
33.5M
                               internal::kUnicodeReplacementCharacter &&
458
33.5M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
22
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
22
    }
462
33.5M
    if (code_point == '\n') {
463
15.7M
      line_offsets.push_back(offset + 1);
464
15.7M
    }
465
33.5M
    data32.push_back(code_point);
466
33.5M
    Traits::Advance(it, code_units);
467
33.5M
    index += code_units;
468
33.5M
    ++offset;
469
33.5M
  }
470
631
  line_offsets.push_back(offset + 1);
471
631
  return std::make_unique<SupplementalPlaneSource>(
472
631
      std::move(description), std::move(line_offsets), std::move(data32));
473
653
}
source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long)
Line
Count
Source
309
7.63k
                                        const size_t text_size) {
310
7.63k
  if (ABSL_PREDICT_FALSE(
311
7.63k
          text_size >
312
7.63k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
7.63k
  using Traits = SourceTextTraits<T>;
316
7.63k
  size_t index = 0;
317
7.63k
  typename Traits::iterator_type it = Traits::Begin(text);
318
7.63k
  SourcePosition offset = 0;
319
7.63k
  char32_t code_point;
320
7.63k
  size_t code_units;
321
7.63k
  std::vector<uint8_t> data8;
322
7.63k
  std::vector<char16_t> data16;
323
7.63k
  std::vector<char32_t> data32;
324
7.63k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
92.2M
  while (index < text_size) {
326
92.2M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
92.2M
    if (ABSL_PREDICT_FALSE(code_point ==
328
92.2M
                               cel::internal::kUnicodeReplacementCharacter &&
329
92.2M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
23
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
23
    }
333
92.2M
    if (code_point == '\n') {
334
28.6M
      line_offsets.push_back(offset + 1);
335
28.6M
    }
336
92.2M
    if (code_point <= 0x7f) {
337
92.2M
      Traits::Advance(it, code_units);
338
92.2M
      index += code_units;
339
92.2M
      ++offset;
340
92.2M
      continue;
341
92.2M
    }
342
1.47k
    if (code_point <= 0xff) {
343
329
      data8.reserve(text_size);
344
329
      Traits::AppendTo(data8, text, index);
345
329
      data8.push_back(static_cast<uint8_t>(code_point));
346
329
      Traits::Advance(it, code_units);
347
329
      index += code_units;
348
329
      ++offset;
349
329
      goto latin1;
350
329
    }
351
1.14k
    if (code_point <= 0xffff) {
352
673
      data16.reserve(text_size);
353
12.3M
      for (size_t offset = 0; offset < index; offset++) {
354
12.3M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
12.3M
      }
356
673
      data16.push_back(static_cast<char16_t>(code_point));
357
673
      Traits::Advance(it, code_units);
358
673
      index += code_units;
359
673
      ++offset;
360
673
      goto basic;
361
673
    }
362
475
    data32.reserve(text_size);
363
5.68M
    for (size_t offset = 0; offset < index; offset++) {
364
5.68M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
5.68M
    }
366
475
    data32.push_back(code_point);
367
475
    Traits::Advance(it, code_units);
368
475
    index += code_units;
369
475
    ++offset;
370
475
    goto supplemental;
371
1.14k
  }
372
6.13k
  line_offsets.push_back(offset + 1);
373
6.13k
  return std::make_unique<AsciiSource>(
374
6.13k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
329
latin1:
376
4.72M
  while (index < text_size) {
377
4.72M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
4.72M
    if (ABSL_PREDICT_FALSE(code_point ==
379
4.72M
                               internal::kUnicodeReplacementCharacter &&
380
4.72M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
14
    }
384
4.72M
    if (code_point == '\n') {
385
3.78M
      line_offsets.push_back(offset + 1);
386
3.78M
    }
387
4.72M
    if (code_point <= 0xff) {
388
4.72M
      data8.push_back(static_cast<uint8_t>(code_point));
389
4.72M
      Traits::Advance(it, code_units);
390
4.72M
      index += code_units;
391
4.72M
      ++offset;
392
4.72M
      continue;
393
4.72M
    }
394
134
    if (code_point <= 0xffff) {
395
58
      data16.reserve(text_size);
396
2.22M
      for (const auto& value : data8) {
397
2.22M
        data16.push_back(value);
398
2.22M
      }
399
58
      std::vector<uint8_t>().swap(data8);
400
58
      data16.push_back(static_cast<char16_t>(code_point));
401
58
      Traits::Advance(it, code_units);
402
58
      index += code_units;
403
58
      ++offset;
404
58
      goto basic;
405
58
    }
406
76
    data32.reserve(text_size);
407
1.10M
    for (const auto& value : data8) {
408
1.10M
      data32.push_back(value);
409
1.10M
    }
410
76
    std::vector<uint8_t>().swap(data8);
411
76
    data32.push_back(code_point);
412
76
    Traits::Advance(it, code_units);
413
76
    index += code_units;
414
76
    ++offset;
415
76
    goto supplemental;
416
134
  }
417
181
  line_offsets.push_back(offset + 1);
418
181
  return std::make_unique<Latin1Source>(
419
181
      std::move(description), std::move(line_offsets), std::move(data8));
420
731
basic:
421
12.1M
  while (index < text_size) {
422
12.1M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
12.1M
    if (ABSL_PREDICT_FALSE(code_point ==
424
12.1M
                               internal::kUnicodeReplacementCharacter &&
425
12.1M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
20
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
20
    }
429
12.1M
    if (code_point == '\n') {
430
7.29M
      line_offsets.push_back(offset + 1);
431
7.29M
    }
432
12.1M
    if (code_point <= 0xffff) {
433
12.1M
      data16.push_back(static_cast<char16_t>(code_point));
434
12.1M
      Traits::Advance(it, code_units);
435
12.1M
      index += code_units;
436
12.1M
      ++offset;
437
12.1M
      continue;
438
12.1M
    }
439
102
    data32.reserve(text_size);
440
1.38M
    for (const auto& value : data16) {
441
1.38M
      data32.push_back(static_cast<char32_t>(value));
442
1.38M
    }
443
102
    std::vector<char16_t>().swap(data16);
444
102
    data32.push_back(code_point);
445
102
    Traits::Advance(it, code_units);
446
102
    index += code_units;
447
102
    ++offset;
448
102
    goto supplemental;
449
12.1M
  }
450
609
  line_offsets.push_back(offset + 1);
451
609
  return std::make_unique<BasicPlaneSource>(
452
609
      std::move(description), std::move(line_offsets), std::move(data16));
453
653
supplemental:
454
33.5M
  while (index < text_size) {
455
33.5M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
33.5M
    if (ABSL_PREDICT_FALSE(code_point ==
457
33.5M
                               internal::kUnicodeReplacementCharacter &&
458
33.5M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
22
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
22
    }
462
33.5M
    if (code_point == '\n') {
463
15.7M
      line_offsets.push_back(offset + 1);
464
15.7M
    }
465
33.5M
    data32.push_back(code_point);
466
33.5M
    Traits::Advance(it, code_units);
467
33.5M
    index += code_units;
468
33.5M
    ++offset;
469
33.5M
  }
470
631
  line_offsets.push_back(offset + 1);
471
631
  return std::make_unique<SupplementalPlaneSource>(
472
631
      std::move(description), std::move(line_offsets), std::move(data32));
473
653
}
Unexecuted instantiation: source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<absl::lts_20250512::Cord>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20250512::Cord const&, unsigned long)
474
475
}  // namespace
476
477
}  // namespace common_internal
478
479
absl::optional<SourceLocation> Source::GetLocation(
480
140k
    SourcePosition position) const {
481
140k
  if (auto line_and_offset = FindLine(position);
482
140k
      ABSL_PREDICT_TRUE(line_and_offset.has_value())) {
483
138k
    return SourceLocation{line_and_offset->first,
484
138k
                          position - line_and_offset->second};
485
138k
  }
486
2.16k
  return absl::nullopt;
487
140k
}
488
489
absl::optional<SourcePosition> Source::GetPosition(
490
4.15M
    const SourceLocation& location) const {
491
4.15M
  if (ABSL_PREDICT_FALSE(location.line < 1 || location.column < 0)) {
492
0
    return absl::nullopt;
493
0
  }
494
4.15M
  if (auto position = FindLinePosition(location.line);
495
4.15M
      ABSL_PREDICT_TRUE(position.has_value())) {
496
4.15M
    return *position + location.column;
497
4.15M
  }
498
0
  return absl::nullopt;
499
4.15M
}
500
501
140k
absl::optional<std::string> Source::Snippet(int32_t line) const {
502
140k
  auto content = this->content();
503
140k
  auto start = FindLinePosition(line);
504
140k
  if (ABSL_PREDICT_FALSE(!start.has_value() || content.empty())) {
505
2.16k
    return absl::nullopt;
506
2.16k
  }
507
138k
  auto end = FindLinePosition(line + 1);
508
138k
  if (end.has_value()) {
509
33.5k
    return content.ToString(*start, *end - 1);
510
33.5k
  }
511
105k
  return content.ToString(*start);
512
138k
}
513
514
140k
std::string Source::DisplayErrorLocation(SourceLocation location) const {
515
140k
  constexpr char32_t kDot = '.';
516
140k
  constexpr char32_t kHat = '^';
517
518
140k
  constexpr char32_t kWideDot = 0xff0e;
519
140k
  constexpr char32_t kWideHat = 0xff3e;
520
140k
  absl::optional<std::string> snippet = Snippet(location.line);
521
140k
  if (!snippet || snippet->empty()) {
522
2.54k
    return "";
523
2.54k
  }
524
525
138k
  *snippet = absl::StrReplaceAll(*snippet, {{"\t", " "}});
526
138k
  absl::string_view snippet_view(*snippet);
527
138k
  std::string result;
528
138k
  absl::StrAppend(&result, "\n | ", *snippet);
529
138k
  absl::StrAppend(&result, "\n | ");
530
531
138k
  std::string index_line;
532
873M
  for (int32_t i = 0; i < location.column && !snippet_view.empty(); ++i) {
533
873M
    size_t count;
534
873M
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
535
873M
    snippet_view.remove_prefix(count);
536
873M
    if (count > 1) {
537
128k
      internal::Utf8Encode(index_line, kWideDot);
538
873M
    } else {
539
873M
      internal::Utf8Encode(index_line, kDot);
540
873M
    }
541
873M
  }
542
138k
  size_t count = 0;
543
138k
  if (!snippet_view.empty()) {
544
134k
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
545
134k
  }
546
138k
  if (count > 1) {
547
3.01k
    internal::Utf8Encode(index_line, kWideHat);
548
135k
  } else {
549
135k
    internal::Utf8Encode(index_line, kHat);
550
135k
  }
551
138k
  absl::StrAppend(&result, index_line);
552
138k
  return result;
553
140k
}
554
555
4.43M
absl::optional<SourcePosition> Source::FindLinePosition(int32_t line) const {
556
4.43M
  if (ABSL_PREDICT_FALSE(line < 1)) {
557
2.16k
    return absl::nullopt;
558
2.16k
  }
559
4.42M
  if (line == 1) {
560
3.93M
    return SourcePosition{0};
561
3.93M
  }
562
497k
  const auto line_offsets = this->line_offsets();
563
497k
  if (ABSL_PREDICT_TRUE(line <= static_cast<int32_t>(line_offsets.size()))) {
564
392k
    return line_offsets[static_cast<size_t>(line - 2)];
565
392k
  }
566
105k
  return absl::nullopt;
567
497k
}
568
569
absl::optional<std::pair<int32_t, SourcePosition>> Source::FindLine(
570
140k
    SourcePosition position) const {
571
140k
  if (ABSL_PREDICT_FALSE(position < 0)) {
572
2.16k
    return absl::nullopt;
573
2.16k
  }
574
138k
  int32_t line = 1;
575
138k
  const auto line_offsets = this->line_offsets();
576
1.56G
  for (const auto& line_offset : line_offsets) {
577
1.56G
    if (line_offset > position) {
578
138k
      break;
579
138k
    }
580
1.56G
    ++line;
581
1.56G
  }
582
138k
  if (line == 1) {
583
104k
    return std::make_pair(line, SourcePosition{0});
584
104k
  }
585
34.4k
  return std::make_pair(line, line_offsets[static_cast<size_t>(line) - 2]);
586
138k
}
587
588
absl::StatusOr<absl_nonnull SourcePtr> NewSource(absl::string_view content,
589
7.63k
                                                 std::string description) {
590
7.63k
  return common_internal::NewSourceImpl(std::move(description), content,
591
7.63k
                                        content.size());
592
7.63k
}
593
594
absl::StatusOr<absl_nonnull SourcePtr> NewSource(const absl::Cord& content,
595
0
                                                 std::string description) {
596
0
  return common_internal::NewSourceImpl(std::move(description), content,
597
0
                                        content.size());
598
0
}
599
600
}  // namespace cel