Coverage Report

Created: 2025-11-29 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/common/source.cc
Line
Count
Source
1
// Copyright 2023 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "common/source.h"
16
17
#include <algorithm>
18
#include <cstddef>
19
#include <cstdint>
20
#include <limits>
21
#include <memory>
22
#include <string>
23
#include <tuple>
24
#include <utility>
25
#include <vector>
26
27
#include "absl/base/nullability.h"
28
#include "absl/base/optimization.h"
29
#include "absl/container/inlined_vector.h"
30
#include "absl/functional/overload.h"
31
#include "absl/log/absl_check.h"
32
#include "absl/status/status.h"
33
#include "absl/status/statusor.h"
34
#include "absl/strings/cord.h"
35
#include "absl/strings/str_cat.h"
36
#include "absl/strings/str_replace.h"
37
#include "absl/strings/string_view.h"
38
#include "absl/types/optional.h"
39
#include "absl/types/span.h"
40
#include "absl/types/variant.h"
41
#include "internal/unicode.h"
42
#include "internal/utf8.h"
43
44
namespace cel {
45
46
47.3k
SourcePosition SourceContentView::size() const {
47
47.3k
  return static_cast<SourcePosition>(absl::visit(
48
47.3k
      absl::Overload(
49
47.3k
          [](absl::Span<const char> view) { return view.size(); },
50
47.3k
          [](absl::Span<const uint8_t> view) { return view.size(); },
51
47.3k
          [](absl::Span<const char16_t> view) { return view.size(); },
52
47.3k
          [](absl::Span<const char32_t> view) { return view.size(); }),
53
47.3k
      view_));
54
47.3k
}
55
56
56.5k
bool SourceContentView::empty() const {
57
56.5k
  return absl::visit(
58
56.5k
      absl::Overload(
59
56.5k
          [](absl::Span<const char> view) { return view.empty(); },
60
56.5k
          [](absl::Span<const uint8_t> view) { return view.empty(); },
61
56.5k
          [](absl::Span<const char16_t> view) { return view.empty(); },
62
56.5k
          [](absl::Span<const char32_t> view) { return view.empty(); }),
63
56.5k
      view_);
64
56.5k
}
65
66
200M
char32_t SourceContentView::at(SourcePosition position) const {
67
200M
  ABSL_DCHECK_GE(position, 0);
68
200M
  ABSL_DCHECK_LT(position, size());
69
200M
  return absl::visit(
70
200M
      absl::Overload(
71
200M
          [position =
72
200M
               static_cast<size_t>(position)](absl::Span<const char> view) {
73
107M
            return static_cast<char32_t>(static_cast<uint8_t>(view[position]));
74
107M
          },
75
200M
          [position =
76
200M
               static_cast<size_t>(position)](absl::Span<const uint8_t> view) {
77
11.1M
            return static_cast<char32_t>(view[position]);
78
11.1M
          },
79
200M
          [position =
80
200M
               static_cast<size_t>(position)](absl::Span<const char16_t> view) {
81
9.77M
            return static_cast<char32_t>(view[position]);
82
9.77M
          },
83
200M
          [position =
84
200M
               static_cast<size_t>(position)](absl::Span<const char32_t> view) {
85
72.1M
            return static_cast<char32_t>(view[position]);
86
72.1M
          }),
87
200M
      view_);
88
200M
}
89
90
std::string SourceContentView::ToString(SourcePosition begin,
91
5.96M
                                        SourcePosition end) const {
92
5.96M
  ABSL_DCHECK_GE(begin, 0);
93
5.96M
  ABSL_DCHECK_LE(end, size());
94
5.96M
  ABSL_DCHECK_LE(begin, end);
95
5.96M
  return absl::visit(
96
5.96M
      absl::Overload(
97
5.96M
          [begin = static_cast<size_t>(begin),
98
5.96M
           end = static_cast<size_t>(end)](absl::Span<const char> view) {
99
4.49M
            view = view.subspan(begin, end - begin);
100
4.49M
            return std::string(view.data(), view.size());
101
4.49M
          },
102
5.96M
          [begin = static_cast<size_t>(begin),
103
5.96M
           end = static_cast<size_t>(end)](absl::Span<const uint8_t> view) {
104
350k
            view = view.subspan(begin, end - begin);
105
350k
            std::string result;
106
350k
            result.reserve(view.size() * 2);
107
32.3M
            for (const auto& code_point : view) {
108
32.3M
              internal::Utf8Encode(result, code_point);
109
32.3M
            }
110
350k
            result.shrink_to_fit();
111
350k
            return result;
112
350k
          },
113
5.96M
          [begin = static_cast<size_t>(begin),
114
5.96M
           end = static_cast<size_t>(end)](absl::Span<const char16_t> view) {
115
438k
            view = view.subspan(begin, end - begin);
116
438k
            std::string result;
117
438k
            result.reserve(view.size() * 3);
118
136M
            for (const auto& code_point : view) {
119
136M
              internal::Utf8Encode(result, code_point);
120
136M
            }
121
438k
            result.shrink_to_fit();
122
438k
            return result;
123
438k
          },
124
5.96M
          [begin = static_cast<size_t>(begin),
125
5.96M
           end = static_cast<size_t>(end)](absl::Span<const char32_t> view) {
126
675k
            view = view.subspan(begin, end - begin);
127
675k
            std::string result;
128
675k
            result.reserve(view.size() * 4);
129
202M
            for (const auto& code_point : view) {
130
202M
              internal::Utf8Encode(result, code_point);
131
202M
            }
132
675k
            result.shrink_to_fit();
133
675k
            return result;
134
675k
          }),
135
5.96M
      view_);
136
5.96M
}
137
138
0
void SourceContentView::AppendToString(std::string& dest) const {
139
0
  absl::visit(absl::Overload(
140
0
                  [&dest](absl::Span<const char> view) {
141
0
                    dest.append(view.data(), view.size());
142
0
                  },
143
0
                  [&dest](absl::Span<const uint8_t> view) {
144
0
                    for (const auto& code_point : view) {
145
0
                      internal::Utf8Encode(dest, code_point);
146
0
                    }
147
0
                  },
148
0
                  [&dest](absl::Span<const char16_t> view) {
149
0
                    for (const auto& code_point : view) {
150
0
                      internal::Utf8Encode(dest, code_point);
151
0
                    }
152
0
                  },
153
0
                  [&dest](absl::Span<const char32_t> view) {
154
0
                    for (const auto& code_point : view) {
155
0
                      internal::Utf8Encode(dest, code_point);
156
0
                    }
157
0
                  }),
158
0
              view_);
159
0
}
160
161
namespace common_internal {
162
163
class SourceImpl : public Source {
164
 public:
165
  SourceImpl(std::string description,
166
             absl::InlinedVector<SourcePosition, 1> line_offsets)
167
4.20k
      : description_(std::move(description)),
168
4.20k
        line_offsets_(std::move(line_offsets)) {}
169
170
63.9k
  absl::string_view description() const final { return description_; }
171
172
348k
  absl::Span<const SourcePosition> line_offsets() const final {
173
348k
    return absl::MakeConstSpan(line_offsets_);
174
348k
  }
175
176
 private:
177
  const std::string description_;
178
  const absl::InlinedVector<SourcePosition, 1> line_offsets_;
179
};
180
181
namespace {
182
183
class AsciiSource final : public SourceImpl {
184
 public:
185
  AsciiSource(std::string description,
186
              absl::InlinedVector<SourcePosition, 1> line_offsets,
187
              std::vector<char> text)
188
3.47k
      : SourceImpl(std::move(description), std::move(line_offsets)),
189
3.47k
        text_(std::move(text)) {}
190
191
40.4k
  ContentView content() const override {
192
40.4k
    return MakeContentView(absl::MakeConstSpan(text_));
193
40.4k
  }
194
195
 private:
196
  const std::vector<char> text_;
197
};
198
199
class Latin1Source final : public SourceImpl {
200
 public:
201
  Latin1Source(std::string description,
202
               absl::InlinedVector<SourcePosition, 1> line_offsets,
203
               std::vector<uint8_t> text)
204
105
      : SourceImpl(std::move(description), std::move(line_offsets)),
205
105
        text_(std::move(text)) {}
206
207
3.48k
  ContentView content() const override {
208
3.48k
    return MakeContentView(absl::MakeConstSpan(text_));
209
3.48k
  }
210
211
 private:
212
  const std::vector<uint8_t> text_;
213
};
214
215
class BasicPlaneSource final : public SourceImpl {
216
 public:
217
  BasicPlaneSource(std::string description,
218
                   absl::InlinedVector<SourcePosition, 1> line_offsets,
219
                   std::vector<char16_t> text)
220
309
      : SourceImpl(std::move(description), std::move(line_offsets)),
221
309
        text_(std::move(text)) {}
222
223
7.67k
  ContentView content() const override {
224
7.67k
    return MakeContentView(absl::MakeConstSpan(text_));
225
7.67k
  }
226
227
 private:
228
  const std::vector<char16_t> text_;
229
};
230
231
class SupplementalPlaneSource final : public SourceImpl {
232
 public:
233
  SupplementalPlaneSource(std::string description,
234
                          absl::InlinedVector<SourcePosition, 1> line_offsets,
235
                          std::vector<char32_t> text)
236
316
      : SourceImpl(std::move(description), std::move(line_offsets)),
237
316
        text_(std::move(text)) {}
238
239
11.0k
  ContentView content() const override {
240
11.0k
    return MakeContentView(absl::MakeConstSpan(text_));
241
11.0k
  }
242
243
 private:
244
  const std::vector<char32_t> text_;
245
};
246
247
template <typename T>
248
struct SourceTextTraits;
249
250
template <>
251
struct SourceTextTraits<absl::string_view> {
252
  using iterator_type = absl::string_view;
253
254
4.23k
  static iterator_type Begin(absl::string_view text) { return text; }
255
256
57.3M
  static void Advance(iterator_type& it, size_t n) { it.remove_prefix(n); }
257
258
  static void AppendTo(std::vector<uint8_t>& out, absl::string_view text,
259
187
                       size_t n) {
260
187
    const auto* in = reinterpret_cast<const uint8_t*>(text.data());
261
187
    out.insert(out.end(), in, in + n);
262
187
  }
263
264
3.47k
  static std::vector<char> ToVector(absl::string_view in) {
265
3.47k
    std::vector<char> out;
266
3.47k
    out.reserve(in.size());
267
3.47k
    out.insert(out.end(), in.begin(), in.end());
268
3.47k
    return out;
269
3.47k
  }
270
};
271
272
template <>
273
struct SourceTextTraits<absl::Cord> {
274
  using iterator_type = absl::Cord::CharIterator;
275
276
0
  static iterator_type Begin(const absl::Cord& text) {
277
0
    return text.char_begin();
278
0
  }
279
280
0
  static void Advance(iterator_type& it, size_t n) {
281
0
    absl::Cord::Advance(&it, n);
282
0
  }
283
284
  static void AppendTo(std::vector<uint8_t>& out, const absl::Cord& text,
285
0
                       size_t n) {
286
0
    auto it = text.char_begin();
287
0
    while (n > 0) {
288
0
      auto str = absl::Cord::ChunkRemaining(it);
289
0
      size_t to_append = std::min(n, str.size());
290
0
      const auto* in = reinterpret_cast<const uint8_t*>(str.data());
291
0
      out.insert(out.end(), in, in + to_append);
292
0
      n -= to_append;
293
0
      absl::Cord::Advance(&it, to_append);
294
0
    }
295
0
  }
296
297
0
  static std::vector<char> ToVector(const absl::Cord& in) {
298
0
    std::vector<char> out;
299
0
    out.reserve(in.size());
300
0
    for (const auto& chunk : in.Chunks()) {
301
0
      out.insert(out.end(), chunk.begin(), chunk.end());
302
0
    }
303
0
    return out;
304
0
  }
305
};
306
307
template <typename T>
308
absl::StatusOr<SourcePtr> NewSourceImpl(std::string description, const T& text,
309
4.23k
                                        const size_t text_size) {
310
4.23k
  if (ABSL_PREDICT_FALSE(
311
4.23k
          text_size >
312
4.23k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
4.23k
  using Traits = SourceTextTraits<T>;
316
4.23k
  size_t index = 0;
317
4.23k
  typename Traits::iterator_type it = Traits::Begin(text);
318
4.23k
  SourcePosition offset = 0;
319
4.23k
  char32_t code_point;
320
4.23k
  size_t code_units;
321
4.23k
  std::vector<uint8_t> data8;
322
4.23k
  std::vector<char16_t> data16;
323
4.23k
  std::vector<char32_t> data32;
324
4.23k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
39.6M
  while (index < text_size) {
326
39.6M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
39.6M
    if (ABSL_PREDICT_FALSE(code_point ==
328
39.6M
                               cel::internal::kUnicodeReplacementCharacter &&
329
39.6M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
14
    }
333
39.6M
    if (code_point == '\n') {
334
18.7M
      line_offsets.push_back(offset + 1);
335
18.7M
    }
336
39.6M
    if (code_point <= 0x7f) {
337
39.6M
      Traits::Advance(it, code_units);
338
39.6M
      index += code_units;
339
39.6M
      ++offset;
340
39.6M
      continue;
341
39.6M
    }
342
748
    if (code_point <= 0xff) {
343
187
      data8.reserve(text_size);
344
187
      Traits::AppendTo(data8, text, index);
345
187
      data8.push_back(static_cast<uint8_t>(code_point));
346
187
      Traits::Advance(it, code_units);
347
187
      index += code_units;
348
187
      ++offset;
349
187
      goto latin1;
350
187
    }
351
561
    if (code_point <= 0xffff) {
352
351
      data16.reserve(text_size);
353
1.94M
      for (size_t offset = 0; offset < index; offset++) {
354
1.94M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
1.94M
      }
356
351
      data16.push_back(static_cast<char16_t>(code_point));
357
351
      Traits::Advance(it, code_units);
358
351
      index += code_units;
359
351
      ++offset;
360
351
      goto basic;
361
351
    }
362
210
    data32.reserve(text_size);
363
1.88M
    for (size_t offset = 0; offset < index; offset++) {
364
1.88M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
1.88M
    }
366
210
    data32.push_back(code_point);
367
210
    Traits::Advance(it, code_units);
368
210
    index += code_units;
369
210
    ++offset;
370
210
    goto supplemental;
371
561
  }
372
3.47k
  line_offsets.push_back(offset + 1);
373
3.47k
  return std::make_unique<AsciiSource>(
374
3.47k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
187
latin1:
376
4.06M
  while (index < text_size) {
377
4.06M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
4.06M
    if (ABSL_PREDICT_FALSE(code_point ==
379
4.06M
                               internal::kUnicodeReplacementCharacter &&
380
4.06M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
4
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
4
    }
384
4.06M
    if (code_point == '\n') {
385
3.30M
      line_offsets.push_back(offset + 1);
386
3.30M
    }
387
4.06M
    if (code_point <= 0xff) {
388
4.06M
      data8.push_back(static_cast<uint8_t>(code_point));
389
4.06M
      Traits::Advance(it, code_units);
390
4.06M
      index += code_units;
391
4.06M
      ++offset;
392
4.06M
      continue;
393
4.06M
    }
394
78
    if (code_point <= 0xffff) {
395
36
      data16.reserve(text_size);
396
1.23M
      for (const auto& value : data8) {
397
1.23M
        data16.push_back(value);
398
1.23M
      }
399
36
      std::vector<uint8_t>().swap(data8);
400
36
      data16.push_back(static_cast<char16_t>(code_point));
401
36
      Traits::Advance(it, code_units);
402
36
      index += code_units;
403
36
      ++offset;
404
36
      goto basic;
405
36
    }
406
42
    data32.reserve(text_size);
407
895k
    for (const auto& value : data8) {
408
895k
      data32.push_back(value);
409
895k
    }
410
42
    std::vector<uint8_t>().swap(data8);
411
42
    data32.push_back(code_point);
412
42
    Traits::Advance(it, code_units);
413
42
    index += code_units;
414
42
    ++offset;
415
42
    goto supplemental;
416
78
  }
417
105
  line_offsets.push_back(offset + 1);
418
105
  return std::make_unique<Latin1Source>(
419
105
      std::move(description), std::move(line_offsets), std::move(data8));
420
387
basic:
421
4.33M
  while (index < text_size) {
422
4.33M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
4.33M
    if (ABSL_PREDICT_FALSE(code_point ==
424
4.33M
                               internal::kUnicodeReplacementCharacter &&
425
4.33M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
9
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
9
    }
429
4.33M
    if (code_point == '\n') {
430
1.97M
      line_offsets.push_back(offset + 1);
431
1.97M
    }
432
4.33M
    if (code_point <= 0xffff) {
433
4.33M
      data16.push_back(static_cast<char16_t>(code_point));
434
4.33M
      Traits::Advance(it, code_units);
435
4.33M
      index += code_units;
436
4.33M
      ++offset;
437
4.33M
      continue;
438
4.33M
    }
439
69
    data32.reserve(text_size);
440
824k
    for (const auto& value : data16) {
441
824k
      data32.push_back(static_cast<char32_t>(value));
442
824k
    }
443
69
    std::vector<char16_t>().swap(data16);
444
69
    data32.push_back(code_point);
445
69
    Traits::Advance(it, code_units);
446
69
    index += code_units;
447
69
    ++offset;
448
69
    goto supplemental;
449
4.33M
  }
450
309
  line_offsets.push_back(offset + 1);
451
309
  return std::make_unique<BasicPlaneSource>(
452
309
      std::move(description), std::move(line_offsets), std::move(data16));
453
321
supplemental:
454
9.29M
  while (index < text_size) {
455
9.29M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
9.29M
    if (ABSL_PREDICT_FALSE(code_point ==
457
9.29M
                               internal::kUnicodeReplacementCharacter &&
458
9.29M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
5
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
5
    }
462
9.29M
    if (code_point == '\n') {
463
5.57M
      line_offsets.push_back(offset + 1);
464
5.57M
    }
465
9.29M
    data32.push_back(code_point);
466
9.29M
    Traits::Advance(it, code_units);
467
9.29M
    index += code_units;
468
9.29M
    ++offset;
469
9.29M
  }
470
316
  line_offsets.push_back(offset + 1);
471
316
  return std::make_unique<SupplementalPlaneSource>(
472
316
      std::move(description), std::move(line_offsets), std::move(data32));
473
321
}
source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long)
Line
Count
Source
309
4.23k
                                        const size_t text_size) {
310
4.23k
  if (ABSL_PREDICT_FALSE(
311
4.23k
          text_size >
312
4.23k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
4.23k
  using Traits = SourceTextTraits<T>;
316
4.23k
  size_t index = 0;
317
4.23k
  typename Traits::iterator_type it = Traits::Begin(text);
318
4.23k
  SourcePosition offset = 0;
319
4.23k
  char32_t code_point;
320
4.23k
  size_t code_units;
321
4.23k
  std::vector<uint8_t> data8;
322
4.23k
  std::vector<char16_t> data16;
323
4.23k
  std::vector<char32_t> data32;
324
4.23k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
39.6M
  while (index < text_size) {
326
39.6M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
39.6M
    if (ABSL_PREDICT_FALSE(code_point ==
328
39.6M
                               cel::internal::kUnicodeReplacementCharacter &&
329
39.6M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
14
    }
333
39.6M
    if (code_point == '\n') {
334
18.7M
      line_offsets.push_back(offset + 1);
335
18.7M
    }
336
39.6M
    if (code_point <= 0x7f) {
337
39.6M
      Traits::Advance(it, code_units);
338
39.6M
      index += code_units;
339
39.6M
      ++offset;
340
39.6M
      continue;
341
39.6M
    }
342
748
    if (code_point <= 0xff) {
343
187
      data8.reserve(text_size);
344
187
      Traits::AppendTo(data8, text, index);
345
187
      data8.push_back(static_cast<uint8_t>(code_point));
346
187
      Traits::Advance(it, code_units);
347
187
      index += code_units;
348
187
      ++offset;
349
187
      goto latin1;
350
187
    }
351
561
    if (code_point <= 0xffff) {
352
351
      data16.reserve(text_size);
353
1.94M
      for (size_t offset = 0; offset < index; offset++) {
354
1.94M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
1.94M
      }
356
351
      data16.push_back(static_cast<char16_t>(code_point));
357
351
      Traits::Advance(it, code_units);
358
351
      index += code_units;
359
351
      ++offset;
360
351
      goto basic;
361
351
    }
362
210
    data32.reserve(text_size);
363
1.88M
    for (size_t offset = 0; offset < index; offset++) {
364
1.88M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
1.88M
    }
366
210
    data32.push_back(code_point);
367
210
    Traits::Advance(it, code_units);
368
210
    index += code_units;
369
210
    ++offset;
370
210
    goto supplemental;
371
561
  }
372
3.47k
  line_offsets.push_back(offset + 1);
373
3.47k
  return std::make_unique<AsciiSource>(
374
3.47k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
187
latin1:
376
4.06M
  while (index < text_size) {
377
4.06M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
4.06M
    if (ABSL_PREDICT_FALSE(code_point ==
379
4.06M
                               internal::kUnicodeReplacementCharacter &&
380
4.06M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
4
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
4
    }
384
4.06M
    if (code_point == '\n') {
385
3.30M
      line_offsets.push_back(offset + 1);
386
3.30M
    }
387
4.06M
    if (code_point <= 0xff) {
388
4.06M
      data8.push_back(static_cast<uint8_t>(code_point));
389
4.06M
      Traits::Advance(it, code_units);
390
4.06M
      index += code_units;
391
4.06M
      ++offset;
392
4.06M
      continue;
393
4.06M
    }
394
78
    if (code_point <= 0xffff) {
395
36
      data16.reserve(text_size);
396
1.23M
      for (const auto& value : data8) {
397
1.23M
        data16.push_back(value);
398
1.23M
      }
399
36
      std::vector<uint8_t>().swap(data8);
400
36
      data16.push_back(static_cast<char16_t>(code_point));
401
36
      Traits::Advance(it, code_units);
402
36
      index += code_units;
403
36
      ++offset;
404
36
      goto basic;
405
36
    }
406
42
    data32.reserve(text_size);
407
895k
    for (const auto& value : data8) {
408
895k
      data32.push_back(value);
409
895k
    }
410
42
    std::vector<uint8_t>().swap(data8);
411
42
    data32.push_back(code_point);
412
42
    Traits::Advance(it, code_units);
413
42
    index += code_units;
414
42
    ++offset;
415
42
    goto supplemental;
416
78
  }
417
105
  line_offsets.push_back(offset + 1);
418
105
  return std::make_unique<Latin1Source>(
419
105
      std::move(description), std::move(line_offsets), std::move(data8));
420
387
basic:
421
4.33M
  while (index < text_size) {
422
4.33M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
4.33M
    if (ABSL_PREDICT_FALSE(code_point ==
424
4.33M
                               internal::kUnicodeReplacementCharacter &&
425
4.33M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
9
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
9
    }
429
4.33M
    if (code_point == '\n') {
430
1.97M
      line_offsets.push_back(offset + 1);
431
1.97M
    }
432
4.33M
    if (code_point <= 0xffff) {
433
4.33M
      data16.push_back(static_cast<char16_t>(code_point));
434
4.33M
      Traits::Advance(it, code_units);
435
4.33M
      index += code_units;
436
4.33M
      ++offset;
437
4.33M
      continue;
438
4.33M
    }
439
69
    data32.reserve(text_size);
440
824k
    for (const auto& value : data16) {
441
824k
      data32.push_back(static_cast<char32_t>(value));
442
824k
    }
443
69
    std::vector<char16_t>().swap(data16);
444
69
    data32.push_back(code_point);
445
69
    Traits::Advance(it, code_units);
446
69
    index += code_units;
447
69
    ++offset;
448
69
    goto supplemental;
449
4.33M
  }
450
309
  line_offsets.push_back(offset + 1);
451
309
  return std::make_unique<BasicPlaneSource>(
452
309
      std::move(description), std::move(line_offsets), std::move(data16));
453
321
supplemental:
454
9.29M
  while (index < text_size) {
455
9.29M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
9.29M
    if (ABSL_PREDICT_FALSE(code_point ==
457
9.29M
                               internal::kUnicodeReplacementCharacter &&
458
9.29M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
5
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
5
    }
462
9.29M
    if (code_point == '\n') {
463
5.57M
      line_offsets.push_back(offset + 1);
464
5.57M
    }
465
9.29M
    data32.push_back(code_point);
466
9.29M
    Traits::Advance(it, code_units);
467
9.29M
    index += code_units;
468
9.29M
    ++offset;
469
9.29M
  }
470
316
  line_offsets.push_back(offset + 1);
471
316
  return std::make_unique<SupplementalPlaneSource>(
472
316
      std::move(description), std::move(line_offsets), std::move(data32));
473
321
}
Unexecuted instantiation: source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<absl::lts_20250512::Cord>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20250512::Cord const&, unsigned long)
474
475
}  // namespace
476
477
}  // namespace common_internal
478
479
absl::optional<SourceLocation> Source::GetLocation(
480
58.4k
    SourcePosition position) const {
481
58.4k
  if (auto line_and_offset = FindLine(position);
482
58.4k
      ABSL_PREDICT_TRUE(line_and_offset.has_value())) {
483
56.5k
    return SourceLocation{line_and_offset->first,
484
56.5k
                          position - line_and_offset->second};
485
56.5k
  }
486
1.91k
  return absl::nullopt;
487
58.4k
}
488
489
absl::optional<SourcePosition> Source::GetPosition(
490
1.78M
    const SourceLocation& location) const {
491
1.78M
  if (ABSL_PREDICT_FALSE(location.line < 1 || location.column < 0)) {
492
0
    return absl::nullopt;
493
0
  }
494
1.78M
  if (auto position = FindLinePosition(location.line);
495
1.78M
      ABSL_PREDICT_TRUE(position.has_value())) {
496
1.78M
    return *position + location.column;
497
1.78M
  }
498
0
  return absl::nullopt;
499
1.78M
}
500
501
58.4k
absl::optional<std::string> Source::Snippet(int32_t line) const {
502
58.4k
  auto content = this->content();
503
58.4k
  auto start = FindLinePosition(line);
504
58.4k
  if (ABSL_PREDICT_FALSE(!start.has_value() || content.empty())) {
505
1.91k
    return absl::nullopt;
506
1.91k
  }
507
56.5k
  auto end = FindLinePosition(line + 1);
508
56.5k
  if (end.has_value()) {
509
13.3k
    return content.ToString(*start, *end - 1);
510
13.3k
  }
511
43.1k
  return content.ToString(*start);
512
56.5k
}
513
514
58.4k
std::string Source::DisplayErrorLocation(SourceLocation location) const {
515
58.4k
  constexpr char32_t kDot = '.';
516
58.4k
  constexpr char32_t kHat = '^';
517
518
58.4k
  constexpr char32_t kWideDot = 0xff0e;
519
58.4k
  constexpr char32_t kWideHat = 0xff3e;
520
58.4k
  absl::optional<std::string> snippet = Snippet(location.line);
521
58.4k
  if (!snippet || snippet->empty()) {
522
2.15k
    return "";
523
2.15k
  }
524
525
56.3k
  *snippet = absl::StrReplaceAll(*snippet, {{"\t", " "}});
526
56.3k
  absl::string_view snippet_view(*snippet);
527
56.3k
  std::string result;
528
56.3k
  absl::StrAppend(&result, "\n | ", *snippet);
529
56.3k
  absl::StrAppend(&result, "\n | ");
530
531
56.3k
  std::string index_line;
532
236M
  for (int32_t i = 0; i < location.column && !snippet_view.empty(); ++i) {
533
236M
    size_t count;
534
236M
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
535
236M
    snippet_view.remove_prefix(count);
536
236M
    if (count > 1) {
537
94.6k
      internal::Utf8Encode(index_line, kWideDot);
538
236M
    } else {
539
236M
      internal::Utf8Encode(index_line, kDot);
540
236M
    }
541
236M
  }
542
56.3k
  size_t count = 0;
543
56.3k
  if (!snippet_view.empty()) {
544
54.2k
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
545
54.2k
  }
546
56.3k
  if (count > 1) {
547
1.68k
    internal::Utf8Encode(index_line, kWideHat);
548
54.6k
  } else {
549
54.6k
    internal::Utf8Encode(index_line, kHat);
550
54.6k
  }
551
56.3k
  absl::StrAppend(&result, index_line);
552
56.3k
  return result;
553
58.4k
}
554
555
1.89M
absl::optional<SourcePosition> Source::FindLinePosition(int32_t line) const {
556
1.89M
  if (ABSL_PREDICT_FALSE(line < 1)) {
557
1.91k
    return absl::nullopt;
558
1.91k
  }
559
1.89M
  if (line == 1) {
560
1.60M
    return SourcePosition{0};
561
1.60M
  }
562
288k
  const auto line_offsets = this->line_offsets();
563
288k
  if (ABSL_PREDICT_TRUE(line <= static_cast<int32_t>(line_offsets.size()))) {
564
245k
    return line_offsets[static_cast<size_t>(line - 2)];
565
245k
  }
566
43.1k
  return absl::nullopt;
567
288k
}
568
569
absl::optional<std::pair<int32_t, SourcePosition>> Source::FindLine(
570
58.4k
    SourcePosition position) const {
571
58.4k
  if (ABSL_PREDICT_FALSE(position < 0)) {
572
1.91k
    return absl::nullopt;
573
1.91k
  }
574
56.5k
  int32_t line = 1;
575
56.5k
  const auto line_offsets = this->line_offsets();
576
806M
  for (const auto& line_offset : line_offsets) {
577
806M
    if (line_offset > position) {
578
56.5k
      break;
579
56.5k
    }
580
806M
    ++line;
581
806M
  }
582
56.5k
  if (line == 1) {
583
41.6k
    return std::make_pair(line, SourcePosition{0});
584
41.6k
  }
585
14.9k
  return std::make_pair(line, line_offsets[static_cast<size_t>(line) - 2]);
586
56.5k
}
587
588
absl::StatusOr<absl_nonnull SourcePtr> NewSource(absl::string_view content,
589
4.23k
                                                 std::string description) {
590
4.23k
  return common_internal::NewSourceImpl(std::move(description), content,
591
4.23k
                                        content.size());
592
4.23k
}
593
594
absl::StatusOr<absl_nonnull SourcePtr> NewSource(const absl::Cord& content,
595
0
                                                 std::string description) {
596
0
  return common_internal::NewSourceImpl(std::move(description), content,
597
0
                                        content.size());
598
0
}
599
600
}  // namespace cel