Coverage Report

Created: 2026-01-12 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/common/source.cc
Line
Count
Source
1
// Copyright 2023 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "common/source.h"
16
17
#include <algorithm>
18
#include <cstddef>
19
#include <cstdint>
20
#include <limits>
21
#include <memory>
22
#include <string>
23
#include <tuple>
24
#include <utility>
25
#include <vector>
26
27
#include "absl/base/nullability.h"
28
#include "absl/base/optimization.h"
29
#include "absl/container/inlined_vector.h"
30
#include "absl/functional/overload.h"
31
#include "absl/log/absl_check.h"
32
#include "absl/status/status.h"
33
#include "absl/status/statusor.h"
34
#include "absl/strings/cord.h"
35
#include "absl/strings/str_cat.h"
36
#include "absl/strings/str_replace.h"
37
#include "absl/strings/string_view.h"
38
#include "absl/types/optional.h"
39
#include "absl/types/span.h"
40
#include "absl/types/variant.h"
41
#include "internal/unicode.h"
42
#include "internal/utf8.h"
43
44
namespace cel {
45
46
86.9k
SourcePosition SourceContentView::size() const {
47
86.9k
  return static_cast<SourcePosition>(absl::visit(
48
86.9k
      absl::Overload(
49
86.9k
          [](absl::Span<const char> view) { return view.size(); },
50
86.9k
          [](absl::Span<const uint8_t> view) { return view.size(); },
51
86.9k
          [](absl::Span<const char16_t> view) { return view.size(); },
52
86.9k
          [](absl::Span<const char32_t> view) { return view.size(); }),
53
86.9k
      view_));
54
86.9k
}
55
56
111k
bool SourceContentView::empty() const {
57
111k
  return absl::visit(
58
111k
      absl::Overload(
59
111k
          [](absl::Span<const char> view) { return view.empty(); },
60
111k
          [](absl::Span<const uint8_t> view) { return view.empty(); },
61
111k
          [](absl::Span<const char16_t> view) { return view.empty(); },
62
111k
          [](absl::Span<const char32_t> view) { return view.empty(); }),
63
111k
      view_);
64
111k
}
65
66
416M
char32_t SourceContentView::at(SourcePosition position) const {
67
416M
  ABSL_DCHECK_GE(position, 0);
68
416M
  ABSL_DCHECK_LT(position, size());
69
416M
  return absl::visit(
70
416M
      absl::Overload(
71
416M
          [position =
72
416M
               static_cast<size_t>(position)](absl::Span<const char> view) {
73
242M
            return static_cast<char32_t>(static_cast<uint8_t>(view[position]));
74
242M
          },
75
416M
          [position =
76
416M
               static_cast<size_t>(position)](absl::Span<const uint8_t> view) {
77
11.1M
            return static_cast<char32_t>(view[position]);
78
11.1M
          },
79
416M
          [position =
80
416M
               static_cast<size_t>(position)](absl::Span<const char16_t> view) {
81
50.5M
            return static_cast<char32_t>(view[position]);
82
50.5M
          },
83
416M
          [position =
84
416M
               static_cast<size_t>(position)](absl::Span<const char32_t> view) {
85
113M
            return static_cast<char32_t>(view[position]);
86
113M
          }),
87
416M
      view_);
88
416M
}
89
90
std::string SourceContentView::ToString(SourcePosition begin,
91
13.6M
                                        SourcePosition end) const {
92
13.6M
  ABSL_DCHECK_GE(begin, 0);
93
13.6M
  ABSL_DCHECK_LE(end, size());
94
13.6M
  ABSL_DCHECK_LE(begin, end);
95
13.6M
  return absl::visit(
96
13.6M
      absl::Overload(
97
13.6M
          [begin = static_cast<size_t>(begin),
98
13.6M
           end = static_cast<size_t>(end)](absl::Span<const char> view) {
99
11.3M
            view = view.subspan(begin, end - begin);
100
11.3M
            return std::string(view.data(), view.size());
101
11.3M
          },
102
13.6M
          [begin = static_cast<size_t>(begin),
103
13.6M
           end = static_cast<size_t>(end)](absl::Span<const uint8_t> view) {
104
298k
            view = view.subspan(begin, end - begin);
105
298k
            std::string result;
106
298k
            result.reserve(view.size() * 2);
107
73.2M
            for (const auto& code_point : view) {
108
73.2M
              internal::Utf8Encode(result, code_point);
109
73.2M
            }
110
298k
            result.shrink_to_fit();
111
298k
            return result;
112
298k
          },
113
13.6M
          [begin = static_cast<size_t>(begin),
114
13.6M
           end = static_cast<size_t>(end)](absl::Span<const char16_t> view) {
115
1.08M
            view = view.subspan(begin, end - begin);
116
1.08M
            std::string result;
117
1.08M
            result.reserve(view.size() * 3);
118
339M
            for (const auto& code_point : view) {
119
339M
              internal::Utf8Encode(result, code_point);
120
339M
            }
121
1.08M
            result.shrink_to_fit();
122
1.08M
            return result;
123
1.08M
          },
124
13.6M
          [begin = static_cast<size_t>(begin),
125
13.6M
           end = static_cast<size_t>(end)](absl::Span<const char32_t> view) {
126
936k
            view = view.subspan(begin, end - begin);
127
936k
            std::string result;
128
936k
            result.reserve(view.size() * 4);
129
358M
            for (const auto& code_point : view) {
130
358M
              internal::Utf8Encode(result, code_point);
131
358M
            }
132
936k
            result.shrink_to_fit();
133
936k
            return result;
134
936k
          }),
135
13.6M
      view_);
136
13.6M
}
137
138
0
void SourceContentView::AppendToString(std::string& dest) const {
139
0
  absl::visit(absl::Overload(
140
0
                  [&dest](absl::Span<const char> view) {
141
0
                    dest.append(view.data(), view.size());
142
0
                  },
143
0
                  [&dest](absl::Span<const uint8_t> view) {
144
0
                    for (const auto& code_point : view) {
145
0
                      internal::Utf8Encode(dest, code_point);
146
0
                    }
147
0
                  },
148
0
                  [&dest](absl::Span<const char16_t> view) {
149
0
                    for (const auto& code_point : view) {
150
0
                      internal::Utf8Encode(dest, code_point);
151
0
                    }
152
0
                  },
153
0
                  [&dest](absl::Span<const char32_t> view) {
154
0
                    for (const auto& code_point : view) {
155
0
                      internal::Utf8Encode(dest, code_point);
156
0
                    }
157
0
                  }),
158
0
              view_);
159
0
}
160
161
namespace common_internal {
162
163
class SourceImpl : public Source {
164
 public:
165
  SourceImpl(std::string description,
166
             absl::InlinedVector<SourcePosition, 1> line_offsets)
167
6.47k
      : description_(std::move(description)),
168
6.47k
        line_offsets_(std::move(line_offsets)) {}
169
170
121k
  absl::string_view description() const final { return description_; }
171
172
497k
  absl::Span<const SourcePosition> line_offsets() const final {
173
497k
    return absl::MakeConstSpan(line_offsets_);
174
497k
  }
175
176
 private:
177
  const std::string description_;
178
  const absl::InlinedVector<SourcePosition, 1> line_offsets_;
179
};
180
181
namespace {
182
183
class AsciiSource final : public SourceImpl {
184
 public:
185
  AsciiSource(std::string description,
186
              absl::InlinedVector<SourcePosition, 1> line_offsets,
187
              std::vector<char> text)
188
5.26k
      : SourceImpl(std::move(description), std::move(line_offsets)),
189
5.26k
        text_(std::move(text)) {}
190
191
76.9k
  ContentView content() const override {
192
76.9k
    return MakeContentView(absl::MakeConstSpan(text_));
193
76.9k
  }
194
195
 private:
196
  const std::vector<char> text_;
197
};
198
199
class Latin1Source final : public SourceImpl {
200
 public:
201
  Latin1Source(std::string description,
202
               absl::InlinedVector<SourcePosition, 1> line_offsets,
203
               std::vector<uint8_t> text)
204
166
      : SourceImpl(std::move(description), std::move(line_offsets)),
205
166
        text_(std::move(text)) {}
206
207
5.13k
  ContentView content() const override {
208
5.13k
    return MakeContentView(absl::MakeConstSpan(text_));
209
5.13k
  }
210
211
 private:
212
  const std::vector<uint8_t> text_;
213
};
214
215
class BasicPlaneSource final : public SourceImpl {
216
 public:
217
  BasicPlaneSource(std::string description,
218
                   absl::InlinedVector<SourcePosition, 1> line_offsets,
219
                   std::vector<char16_t> text)
220
506
      : SourceImpl(std::move(description), std::move(line_offsets)),
221
506
        text_(std::move(text)) {}
222
223
17.9k
  ContentView content() const override {
224
17.9k
    return MakeContentView(absl::MakeConstSpan(text_));
225
17.9k
  }
226
227
 private:
228
  const std::vector<char16_t> text_;
229
};
230
231
class SupplementalPlaneSource final : public SourceImpl {
232
 public:
233
  SupplementalPlaneSource(std::string description,
234
                          absl::InlinedVector<SourcePosition, 1> line_offsets,
235
                          std::vector<char32_t> text)
236
538
      : SourceImpl(std::move(description), std::move(line_offsets)),
237
538
        text_(std::move(text)) {}
238
239
19.3k
  ContentView content() const override {
240
19.3k
    return MakeContentView(absl::MakeConstSpan(text_));
241
19.3k
  }
242
243
 private:
244
  const std::vector<char32_t> text_;
245
};
246
247
template <typename T>
248
struct SourceTextTraits;
249
250
template <>
251
struct SourceTextTraits<absl::string_view> {
252
  using iterator_type = absl::string_view;
253
254
6.54k
  static iterator_type Begin(absl::string_view text) { return text; }
255
256
162M
  static void Advance(iterator_type& it, size_t n) { it.remove_prefix(n); }
257
258
  static void AppendTo(std::vector<uint8_t>& out, absl::string_view text,
259
303
                       size_t n) {
260
303
    const auto* in = reinterpret_cast<const uint8_t*>(text.data());
261
303
    out.insert(out.end(), in, in + n);
262
303
  }
263
264
5.26k
  static std::vector<char> ToVector(absl::string_view in) {
265
5.26k
    std::vector<char> out;
266
5.26k
    out.reserve(in.size());
267
5.26k
    out.insert(out.end(), in.begin(), in.end());
268
5.26k
    return out;
269
5.26k
  }
270
};
271
272
template <>
273
struct SourceTextTraits<absl::Cord> {
274
  using iterator_type = absl::Cord::CharIterator;
275
276
0
  static iterator_type Begin(const absl::Cord& text) {
277
0
    return text.char_begin();
278
0
  }
279
280
0
  static void Advance(iterator_type& it, size_t n) {
281
0
    absl::Cord::Advance(&it, n);
282
0
  }
283
284
  static void AppendTo(std::vector<uint8_t>& out, const absl::Cord& text,
285
0
                       size_t n) {
286
0
    auto it = text.char_begin();
287
0
    while (n > 0) {
288
0
      auto str = absl::Cord::ChunkRemaining(it);
289
0
      size_t to_append = std::min(n, str.size());
290
0
      const auto* in = reinterpret_cast<const uint8_t*>(str.data());
291
0
      out.insert(out.end(), in, in + to_append);
292
0
      n -= to_append;
293
0
      absl::Cord::Advance(&it, to_append);
294
0
    }
295
0
  }
296
297
0
  static std::vector<char> ToVector(const absl::Cord& in) {
298
0
    std::vector<char> out;
299
0
    out.reserve(in.size());
300
0
    for (const auto& chunk : in.Chunks()) {
301
0
      out.insert(out.end(), chunk.begin(), chunk.end());
302
0
    }
303
0
    return out;
304
0
  }
305
};
306
307
template <typename T>
308
absl::StatusOr<SourcePtr> NewSourceImpl(std::string description, const T& text,
309
6.54k
                                        const size_t text_size) {
310
6.54k
  if (ABSL_PREDICT_FALSE(
311
6.54k
          text_size >
312
6.54k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
6.54k
  using Traits = SourceTextTraits<T>;
316
6.54k
  size_t index = 0;
317
6.54k
  typename Traits::iterator_type it = Traits::Begin(text);
318
6.54k
  SourcePosition offset = 0;
319
6.54k
  char32_t code_point;
320
6.54k
  size_t code_units;
321
6.54k
  std::vector<uint8_t> data8;
322
6.54k
  std::vector<char16_t> data16;
323
6.54k
  std::vector<char32_t> data32;
324
6.54k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
110M
  while (index < text_size) {
326
110M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
110M
    if (ABSL_PREDICT_FALSE(code_point ==
328
110M
                               cel::internal::kUnicodeReplacementCharacter &&
329
110M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
27
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
27
    }
333
110M
    if (code_point == '\n') {
334
32.8M
      line_offsets.push_back(offset + 1);
335
32.8M
    }
336
110M
    if (code_point <= 0x7f) {
337
110M
      Traits::Advance(it, code_units);
338
110M
      index += code_units;
339
110M
      ++offset;
340
110M
      continue;
341
110M
    }
342
1.24k
    if (code_point <= 0xff) {
343
303
      data8.reserve(text_size);
344
303
      Traits::AppendTo(data8, text, index);
345
303
      data8.push_back(static_cast<uint8_t>(code_point));
346
303
      Traits::Advance(it, code_units);
347
303
      index += code_units;
348
303
      ++offset;
349
303
      goto latin1;
350
303
    }
351
945
    if (code_point <= 0xffff) {
352
572
      data16.reserve(text_size);
353
11.4M
      for (size_t offset = 0; offset < index; offset++) {
354
11.4M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
11.4M
      }
356
572
      data16.push_back(static_cast<char16_t>(code_point));
357
572
      Traits::Advance(it, code_units);
358
572
      index += code_units;
359
572
      ++offset;
360
572
      goto basic;
361
572
    }
362
373
    data32.reserve(text_size);
363
4.21M
    for (size_t offset = 0; offset < index; offset++) {
364
4.20M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
4.20M
    }
366
373
    data32.push_back(code_point);
367
373
    Traits::Advance(it, code_units);
368
373
    index += code_units;
369
373
    ++offset;
370
373
    goto supplemental;
371
945
  }
372
5.26k
  line_offsets.push_back(offset + 1);
373
5.26k
  return std::make_unique<AsciiSource>(
374
5.26k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
303
latin1:
376
5.66M
  while (index < text_size) {
377
5.66M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
5.66M
    if (ABSL_PREDICT_FALSE(code_point ==
379
5.66M
                               internal::kUnicodeReplacementCharacter &&
380
5.66M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
8
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
8
    }
384
5.66M
    if (code_point == '\n') {
385
3.77M
      line_offsets.push_back(offset + 1);
386
3.77M
    }
387
5.66M
    if (code_point <= 0xff) {
388
5.66M
      data8.push_back(static_cast<uint8_t>(code_point));
389
5.66M
      Traits::Advance(it, code_units);
390
5.66M
      index += code_units;
391
5.66M
      ++offset;
392
5.66M
      continue;
393
5.66M
    }
394
129
    if (code_point <= 0xffff) {
395
51
      data16.reserve(text_size);
396
1.37M
      for (const auto& value : data8) {
397
1.37M
        data16.push_back(value);
398
1.37M
      }
399
51
      std::vector<uint8_t>().swap(data8);
400
51
      data16.push_back(static_cast<char16_t>(code_point));
401
51
      Traits::Advance(it, code_units);
402
51
      index += code_units;
403
51
      ++offset;
404
51
      goto basic;
405
51
    }
406
78
    data32.reserve(text_size);
407
1.66M
    for (const auto& value : data8) {
408
1.66M
      data32.push_back(value);
409
1.66M
    }
410
78
    std::vector<uint8_t>().swap(data8);
411
78
    data32.push_back(code_point);
412
78
    Traits::Advance(it, code_units);
413
78
    index += code_units;
414
78
    ++offset;
415
78
    goto supplemental;
416
129
  }
417
166
  line_offsets.push_back(offset + 1);
418
166
  return std::make_unique<Latin1Source>(
419
166
      std::move(description), std::move(line_offsets), std::move(data8));
420
623
basic:
421
14.1M
  while (index < text_size) {
422
14.1M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
14.1M
    if (ABSL_PREDICT_FALSE(code_point ==
424
14.1M
                               internal::kUnicodeReplacementCharacter &&
425
14.1M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
16
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
16
    }
429
14.1M
    if (code_point == '\n') {
430
9.24M
      line_offsets.push_back(offset + 1);
431
9.24M
    }
432
14.1M
    if (code_point <= 0xffff) {
433
14.1M
      data16.push_back(static_cast<char16_t>(code_point));
434
14.1M
      Traits::Advance(it, code_units);
435
14.1M
      index += code_units;
436
14.1M
      ++offset;
437
14.1M
      continue;
438
14.1M
    }
439
101
    data32.reserve(text_size);
440
1.51M
    for (const auto& value : data16) {
441
1.51M
      data32.push_back(static_cast<char32_t>(value));
442
1.51M
    }
443
101
    std::vector<char16_t>().swap(data16);
444
101
    data32.push_back(code_point);
445
101
    Traits::Advance(it, code_units);
446
101
    index += code_units;
447
101
    ++offset;
448
101
    goto supplemental;
449
14.1M
  }
450
506
  line_offsets.push_back(offset + 1);
451
506
  return std::make_unique<BasicPlaneSource>(
452
506
      std::move(description), std::move(line_offsets), std::move(data16));
453
552
supplemental:
454
32.5M
  while (index < text_size) {
455
32.5M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
32.5M
    if (ABSL_PREDICT_FALSE(code_point ==
457
32.5M
                               internal::kUnicodeReplacementCharacter &&
458
32.5M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
14
    }
462
32.5M
    if (code_point == '\n') {
463
14.3M
      line_offsets.push_back(offset + 1);
464
14.3M
    }
465
32.5M
    data32.push_back(code_point);
466
32.5M
    Traits::Advance(it, code_units);
467
32.5M
    index += code_units;
468
32.5M
    ++offset;
469
32.5M
  }
470
538
  line_offsets.push_back(offset + 1);
471
538
  return std::make_unique<SupplementalPlaneSource>(
472
538
      std::move(description), std::move(line_offsets), std::move(data32));
473
552
}
source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long)
Line
Count
Source
309
6.54k
                                        const size_t text_size) {
310
6.54k
  if (ABSL_PREDICT_FALSE(
311
6.54k
          text_size >
312
6.54k
          static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
313
0
    return absl::InvalidArgumentError("expression larger than 2GiB limit");
314
0
  }
315
6.54k
  using Traits = SourceTextTraits<T>;
316
6.54k
  size_t index = 0;
317
6.54k
  typename Traits::iterator_type it = Traits::Begin(text);
318
6.54k
  SourcePosition offset = 0;
319
6.54k
  char32_t code_point;
320
6.54k
  size_t code_units;
321
6.54k
  std::vector<uint8_t> data8;
322
6.54k
  std::vector<char16_t> data16;
323
6.54k
  std::vector<char32_t> data32;
324
6.54k
  absl::InlinedVector<SourcePosition, 1> line_offsets;
325
110M
  while (index < text_size) {
326
110M
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(it);
327
110M
    if (ABSL_PREDICT_FALSE(code_point ==
328
110M
                               cel::internal::kUnicodeReplacementCharacter &&
329
110M
                           code_units == 1)) {
330
      // Thats an invalid UTF-8 encoding.
331
27
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
332
27
    }
333
110M
    if (code_point == '\n') {
334
32.8M
      line_offsets.push_back(offset + 1);
335
32.8M
    }
336
110M
    if (code_point <= 0x7f) {
337
110M
      Traits::Advance(it, code_units);
338
110M
      index += code_units;
339
110M
      ++offset;
340
110M
      continue;
341
110M
    }
342
1.24k
    if (code_point <= 0xff) {
343
303
      data8.reserve(text_size);
344
303
      Traits::AppendTo(data8, text, index);
345
303
      data8.push_back(static_cast<uint8_t>(code_point));
346
303
      Traits::Advance(it, code_units);
347
303
      index += code_units;
348
303
      ++offset;
349
303
      goto latin1;
350
303
    }
351
945
    if (code_point <= 0xffff) {
352
572
      data16.reserve(text_size);
353
11.4M
      for (size_t offset = 0; offset < index; offset++) {
354
11.4M
        data16.push_back(static_cast<uint8_t>(text[offset]));
355
11.4M
      }
356
572
      data16.push_back(static_cast<char16_t>(code_point));
357
572
      Traits::Advance(it, code_units);
358
572
      index += code_units;
359
572
      ++offset;
360
572
      goto basic;
361
572
    }
362
373
    data32.reserve(text_size);
363
4.21M
    for (size_t offset = 0; offset < index; offset++) {
364
4.20M
      data32.push_back(static_cast<char32_t>(text[offset]));
365
4.20M
    }
366
373
    data32.push_back(code_point);
367
373
    Traits::Advance(it, code_units);
368
373
    index += code_units;
369
373
    ++offset;
370
373
    goto supplemental;
371
945
  }
372
5.26k
  line_offsets.push_back(offset + 1);
373
5.26k
  return std::make_unique<AsciiSource>(
374
5.26k
      std::move(description), std::move(line_offsets), Traits::ToVector(text));
375
303
latin1:
376
5.66M
  while (index < text_size) {
377
5.66M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
378
5.66M
    if (ABSL_PREDICT_FALSE(code_point ==
379
5.66M
                               internal::kUnicodeReplacementCharacter &&
380
5.66M
                           code_units == 1)) {
381
      // Thats an invalid UTF-8 encoding.
382
8
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
383
8
    }
384
5.66M
    if (code_point == '\n') {
385
3.77M
      line_offsets.push_back(offset + 1);
386
3.77M
    }
387
5.66M
    if (code_point <= 0xff) {
388
5.66M
      data8.push_back(static_cast<uint8_t>(code_point));
389
5.66M
      Traits::Advance(it, code_units);
390
5.66M
      index += code_units;
391
5.66M
      ++offset;
392
5.66M
      continue;
393
5.66M
    }
394
129
    if (code_point <= 0xffff) {
395
51
      data16.reserve(text_size);
396
1.37M
      for (const auto& value : data8) {
397
1.37M
        data16.push_back(value);
398
1.37M
      }
399
51
      std::vector<uint8_t>().swap(data8);
400
51
      data16.push_back(static_cast<char16_t>(code_point));
401
51
      Traits::Advance(it, code_units);
402
51
      index += code_units;
403
51
      ++offset;
404
51
      goto basic;
405
51
    }
406
78
    data32.reserve(text_size);
407
1.66M
    for (const auto& value : data8) {
408
1.66M
      data32.push_back(value);
409
1.66M
    }
410
78
    std::vector<uint8_t>().swap(data8);
411
78
    data32.push_back(code_point);
412
78
    Traits::Advance(it, code_units);
413
78
    index += code_units;
414
78
    ++offset;
415
78
    goto supplemental;
416
129
  }
417
166
  line_offsets.push_back(offset + 1);
418
166
  return std::make_unique<Latin1Source>(
419
166
      std::move(description), std::move(line_offsets), std::move(data8));
420
623
basic:
421
14.1M
  while (index < text_size) {
422
14.1M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
423
14.1M
    if (ABSL_PREDICT_FALSE(code_point ==
424
14.1M
                               internal::kUnicodeReplacementCharacter &&
425
14.1M
                           code_units == 1)) {
426
      // Thats an invalid UTF-8 encoding.
427
16
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
428
16
    }
429
14.1M
    if (code_point == '\n') {
430
9.24M
      line_offsets.push_back(offset + 1);
431
9.24M
    }
432
14.1M
    if (code_point <= 0xffff) {
433
14.1M
      data16.push_back(static_cast<char16_t>(code_point));
434
14.1M
      Traits::Advance(it, code_units);
435
14.1M
      index += code_units;
436
14.1M
      ++offset;
437
14.1M
      continue;
438
14.1M
    }
439
101
    data32.reserve(text_size);
440
1.51M
    for (const auto& value : data16) {
441
1.51M
      data32.push_back(static_cast<char32_t>(value));
442
1.51M
    }
443
101
    std::vector<char16_t>().swap(data16);
444
101
    data32.push_back(code_point);
445
101
    Traits::Advance(it, code_units);
446
101
    index += code_units;
447
101
    ++offset;
448
101
    goto supplemental;
449
14.1M
  }
450
506
  line_offsets.push_back(offset + 1);
451
506
  return std::make_unique<BasicPlaneSource>(
452
506
      std::move(description), std::move(line_offsets), std::move(data16));
453
552
supplemental:
454
32.5M
  while (index < text_size) {
455
32.5M
    std::tie(code_point, code_units) = internal::Utf8Decode(it);
456
32.5M
    if (ABSL_PREDICT_FALSE(code_point ==
457
32.5M
                               internal::kUnicodeReplacementCharacter &&
458
32.5M
                           code_units == 1)) {
459
      // Thats an invalid UTF-8 encoding.
460
14
      return absl::InvalidArgumentError("cannot parse malformed UTF-8 input");
461
14
    }
462
32.5M
    if (code_point == '\n') {
463
14.3M
      line_offsets.push_back(offset + 1);
464
14.3M
    }
465
32.5M
    data32.push_back(code_point);
466
32.5M
    Traits::Advance(it, code_units);
467
32.5M
    index += code_units;
468
32.5M
    ++offset;
469
32.5M
  }
470
538
  line_offsets.push_back(offset + 1);
471
538
  return std::make_unique<SupplementalPlaneSource>(
472
538
      std::move(description), std::move(line_offsets), std::move(data32));
473
552
}
Unexecuted instantiation: source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<absl::lts_20250512::Cord>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20250512::Cord const&, unsigned long)
474
475
}  // namespace
476
477
}  // namespace common_internal
478
479
absl::optional<SourceLocation> Source::GetLocation(
480
112k
    SourcePosition position) const {
481
112k
  if (auto line_and_offset = FindLine(position);
482
112k
      ABSL_PREDICT_TRUE(line_and_offset.has_value())) {
483
111k
    return SourceLocation{line_and_offset->first,
484
111k
                          position - line_and_offset->second};
485
111k
  }
486
1.93k
  return absl::nullopt;
487
112k
}
488
489
absl::optional<SourcePosition> Source::GetPosition(
490
4.00M
    const SourceLocation& location) const {
491
4.00M
  if (ABSL_PREDICT_FALSE(location.line < 1 || location.column < 0)) {
492
0
    return absl::nullopt;
493
0
  }
494
4.00M
  if (auto position = FindLinePosition(location.line);
495
4.00M
      ABSL_PREDICT_TRUE(position.has_value())) {
496
4.00M
    return *position + location.column;
497
4.00M
  }
498
0
  return absl::nullopt;
499
4.00M
}
500
501
112k
absl::optional<std::string> Source::Snippet(int32_t line) const {
502
112k
  auto content = this->content();
503
112k
  auto start = FindLinePosition(line);
504
112k
  if (ABSL_PREDICT_FALSE(!start.has_value() || content.empty())) {
505
1.93k
    return absl::nullopt;
506
1.93k
  }
507
111k
  auto end = FindLinePosition(line + 1);
508
111k
  if (end.has_value()) {
509
30.5k
    return content.ToString(*start, *end - 1);
510
30.5k
  }
511
80.4k
  return content.ToString(*start);
512
111k
}
513
514
112k
std::string Source::DisplayErrorLocation(SourceLocation location) const {
515
112k
  constexpr char32_t kDot = '.';
516
112k
  constexpr char32_t kHat = '^';
517
518
112k
  constexpr char32_t kWideDot = 0xff0e;
519
112k
  constexpr char32_t kWideHat = 0xff3e;
520
112k
  absl::optional<std::string> snippet = Snippet(location.line);
521
112k
  if (!snippet || snippet->empty()) {
522
2.28k
    return "";
523
2.28k
  }
524
525
110k
  *snippet = absl::StrReplaceAll(*snippet, {{"\t", " "}});
526
110k
  absl::string_view snippet_view(*snippet);
527
110k
  std::string result;
528
110k
  absl::StrAppend(&result, "\n | ", *snippet);
529
110k
  absl::StrAppend(&result, "\n | ");
530
531
110k
  std::string index_line;
532
810M
  for (int32_t i = 0; i < location.column && !snippet_view.empty(); ++i) {
533
810M
    size_t count;
534
810M
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
535
810M
    snippet_view.remove_prefix(count);
536
810M
    if (count > 1) {
537
63.1k
      internal::Utf8Encode(index_line, kWideDot);
538
810M
    } else {
539
810M
      internal::Utf8Encode(index_line, kDot);
540
810M
    }
541
810M
  }
542
110k
  size_t count = 0;
543
110k
  if (!snippet_view.empty()) {
544
107k
    std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view);
545
107k
  }
546
110k
  if (count > 1) {
547
2.32k
    internal::Utf8Encode(index_line, kWideHat);
548
108k
  } else {
549
108k
    internal::Utf8Encode(index_line, kHat);
550
108k
  }
551
110k
  absl::StrAppend(&result, index_line);
552
110k
  return result;
553
112k
}
554
555
4.22M
absl::optional<SourcePosition> Source::FindLinePosition(int32_t line) const {
556
4.22M
  if (ABSL_PREDICT_FALSE(line < 1)) {
557
1.93k
    return absl::nullopt;
558
1.93k
  }
559
4.22M
  if (line == 1) {
560
3.84M
    return SourcePosition{0};
561
3.84M
  }
562
383k
  const auto line_offsets = this->line_offsets();
563
383k
  if (ABSL_PREDICT_TRUE(line <= static_cast<int32_t>(line_offsets.size()))) {
564
302k
    return line_offsets[static_cast<size_t>(line - 2)];
565
302k
  }
566
80.4k
  return absl::nullopt;
567
383k
}
568
569
absl::optional<std::pair<int32_t, SourcePosition>> Source::FindLine(
570
112k
    SourcePosition position) const {
571
112k
  if (ABSL_PREDICT_FALSE(position < 0)) {
572
1.93k
    return absl::nullopt;
573
1.93k
  }
574
111k
  int32_t line = 1;
575
111k
  const auto line_offsets = this->line_offsets();
576
1.67G
  for (const auto& line_offset : line_offsets) {
577
1.67G
    if (line_offset > position) {
578
111k
      break;
579
111k
    }
580
1.67G
    ++line;
581
1.67G
  }
582
111k
  if (line == 1) {
583
78.1k
    return std::make_pair(line, SourcePosition{0});
584
78.1k
  }
585
32.8k
  return std::make_pair(line, line_offsets[static_cast<size_t>(line) - 2]);
586
111k
}
587
588
absl::StatusOr<absl_nonnull SourcePtr> NewSource(absl::string_view content,
589
6.54k
                                                 std::string description) {
590
6.54k
  return common_internal::NewSourceImpl(std::move(description), content,
591
6.54k
                                        content.size());
592
6.54k
}
593
594
absl::StatusOr<absl_nonnull SourcePtr> NewSource(const absl::Cord& content,
595
0
                                                 std::string description) {
596
0
  return common_internal::NewSourceImpl(std::move(description), content,
597
0
                                        content.size());
598
0
}
599
600
}  // namespace cel