/proc/self/cwd/common/source.cc
Line | Count | Source |
1 | | // Copyright 2023 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "common/source.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <cstddef> |
19 | | #include <cstdint> |
20 | | #include <limits> |
21 | | #include <memory> |
22 | | #include <string> |
23 | | #include <tuple> |
24 | | #include <utility> |
25 | | #include <vector> |
26 | | |
27 | | #include "absl/base/nullability.h" |
28 | | #include "absl/base/optimization.h" |
29 | | #include "absl/container/inlined_vector.h" |
30 | | #include "absl/functional/overload.h" |
31 | | #include "absl/log/absl_check.h" |
32 | | #include "absl/status/status.h" |
33 | | #include "absl/status/statusor.h" |
34 | | #include "absl/strings/cord.h" |
35 | | #include "absl/strings/str_cat.h" |
36 | | #include "absl/strings/str_replace.h" |
37 | | #include "absl/strings/string_view.h" |
38 | | #include "absl/types/optional.h" |
39 | | #include "absl/types/span.h" |
40 | | #include "absl/types/variant.h" |
41 | | #include "internal/unicode.h" |
42 | | #include "internal/utf8.h" |
43 | | |
44 | | namespace cel { |
45 | | |
46 | 112k | SourcePosition SourceContentView::size() const { |
47 | 112k | return static_cast<SourcePosition>(absl::visit( |
48 | 112k | absl::Overload( |
49 | 112k | [](absl::Span<const char> view) { return view.size(); }, |
50 | 112k | [](absl::Span<const uint8_t> view) { return view.size(); }, |
51 | 112k | [](absl::Span<const char16_t> view) { return view.size(); }, |
52 | 112k | [](absl::Span<const char32_t> view) { return view.size(); }), |
53 | 112k | view_)); |
54 | 112k | } |
55 | | |
56 | 138k | bool SourceContentView::empty() const { |
57 | 138k | return absl::visit( |
58 | 138k | absl::Overload( |
59 | 138k | [](absl::Span<const char> view) { return view.empty(); }, |
60 | 138k | [](absl::Span<const uint8_t> view) { return view.empty(); }, |
61 | 138k | [](absl::Span<const char16_t> view) { return view.empty(); }, |
62 | 138k | [](absl::Span<const char32_t> view) { return view.empty(); }), |
63 | 138k | view_); |
64 | 138k | } |
65 | | |
66 | 403M | char32_t SourceContentView::at(SourcePosition position) const { |
67 | 403M | ABSL_DCHECK_GE(position, 0); |
68 | 403M | ABSL_DCHECK_LT(position, size()); |
69 | 403M | return absl::visit( |
70 | 403M | absl::Overload( |
71 | 403M | [position = |
72 | 403M | static_cast<size_t>(position)](absl::Span<const char> view) { |
73 | 202M | return static_cast<char32_t>(static_cast<uint8_t>(view[position])); |
74 | 202M | }, |
75 | 403M | [position = |
76 | 403M | static_cast<size_t>(position)](absl::Span<const uint8_t> view) { |
77 | 7.77M | return static_cast<char32_t>(view[position]); |
78 | 7.77M | }, |
79 | 403M | [position = |
80 | 403M | static_cast<size_t>(position)](absl::Span<const char16_t> view) { |
81 | 51.3M | return static_cast<char32_t>(view[position]); |
82 | 51.3M | }, |
83 | 403M | [position = |
84 | 403M | static_cast<size_t>(position)](absl::Span<const char32_t> view) { |
85 | 141M | return static_cast<char32_t>(view[position]); |
86 | 141M | }), |
87 | 403M | view_); |
88 | 403M | } |
89 | | |
90 | | std::string SourceContentView::ToString(SourcePosition begin, |
91 | 13.3M | SourcePosition end) const { |
92 | 13.3M | ABSL_DCHECK_GE(begin, 0); |
93 | 13.3M | ABSL_DCHECK_LE(end, size()); |
94 | 13.3M | ABSL_DCHECK_LE(begin, end); |
95 | 13.3M | return absl::visit( |
96 | 13.3M | absl::Overload( |
97 | 13.3M | [begin = static_cast<size_t>(begin), |
98 | 13.3M | end = static_cast<size_t>(end)](absl::Span<const char> view) { |
99 | 10.7M | view = view.subspan(begin, end - begin); |
100 | 10.7M | return std::string(view.data(), view.size()); |
101 | 10.7M | }, |
102 | 13.3M | [begin = static_cast<size_t>(begin), |
103 | 13.3M | end = static_cast<size_t>(end)](absl::Span<const uint8_t> view) { |
104 | 254k | view = view.subspan(begin, end - begin); |
105 | 254k | std::string result; |
106 | 254k | result.reserve(view.size() * 2); |
107 | 26.6M | for (const auto& code_point : view) { |
108 | 26.6M | internal::Utf8Encode(result, code_point); |
109 | 26.6M | } |
110 | 254k | result.shrink_to_fit(); |
111 | 254k | return result; |
112 | 254k | }, |
113 | 13.3M | [begin = static_cast<size_t>(begin), |
114 | 13.3M | end = static_cast<size_t>(end)](absl::Span<const char16_t> view) { |
115 | 1.35M | view = view.subspan(begin, end - begin); |
116 | 1.35M | std::string result; |
117 | 1.35M | result.reserve(view.size() * 3); |
118 | 375M | for (const auto& code_point : view) { |
119 | 375M | internal::Utf8Encode(result, code_point); |
120 | 375M | } |
121 | 1.35M | result.shrink_to_fit(); |
122 | 1.35M | return result; |
123 | 1.35M | }, |
124 | 13.3M | [begin = static_cast<size_t>(begin), |
125 | 13.3M | end = static_cast<size_t>(end)](absl::Span<const char32_t> view) { |
126 | 1.00M | view = view.subspan(begin, end - begin); |
127 | 1.00M | std::string result; |
128 | 1.00M | result.reserve(view.size() * 4); |
129 | 479M | for (const auto& code_point : view) { |
130 | 479M | internal::Utf8Encode(result, code_point); |
131 | 479M | } |
132 | 1.00M | result.shrink_to_fit(); |
133 | 1.00M | return result; |
134 | 1.00M | }), |
135 | 13.3M | view_); |
136 | 13.3M | } |
137 | | |
138 | 0 | void SourceContentView::AppendToString(std::string& dest) const { |
139 | 0 | absl::visit(absl::Overload( |
140 | 0 | [&dest](absl::Span<const char> view) { |
141 | 0 | dest.append(view.data(), view.size()); |
142 | 0 | }, |
143 | 0 | [&dest](absl::Span<const uint8_t> view) { |
144 | 0 | for (const auto& code_point : view) { |
145 | 0 | internal::Utf8Encode(dest, code_point); |
146 | 0 | } |
147 | 0 | }, |
148 | 0 | [&dest](absl::Span<const char16_t> view) { |
149 | 0 | for (const auto& code_point : view) { |
150 | 0 | internal::Utf8Encode(dest, code_point); |
151 | 0 | } |
152 | 0 | }, |
153 | 0 | [&dest](absl::Span<const char32_t> view) { |
154 | 0 | for (const auto& code_point : view) { |
155 | 0 | internal::Utf8Encode(dest, code_point); |
156 | 0 | } |
157 | 0 | }), |
158 | 0 | view_); |
159 | 0 | } |
160 | | |
161 | | namespace common_internal { |
162 | | |
163 | | class SourceImpl : public Source { |
164 | | public: |
165 | | SourceImpl(std::string description, |
166 | | absl::InlinedVector<SourcePosition, 1> line_offsets) |
167 | 7.55k | : description_(std::move(description)), |
168 | 7.55k | line_offsets_(std::move(line_offsets)) {} |
169 | | |
170 | 150k | absl::string_view description() const final { return description_; } |
171 | | |
172 | 640k | absl::Span<const SourcePosition> line_offsets() const final { |
173 | 640k | return absl::MakeConstSpan(line_offsets_); |
174 | 640k | } |
175 | | |
176 | | private: |
177 | | const std::string description_; |
178 | | const absl::InlinedVector<SourcePosition, 1> line_offsets_; |
179 | | }; |
180 | | |
181 | | namespace { |
182 | | |
183 | | class AsciiSource final : public SourceImpl { |
184 | | public: |
185 | | AsciiSource(std::string description, |
186 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
187 | | std::vector<char> text) |
188 | 6.13k | : SourceImpl(std::move(description), std::move(line_offsets)), |
189 | 6.13k | text_(std::move(text)) {} |
190 | | |
191 | 98.3k | ContentView content() const override { |
192 | 98.3k | return MakeContentView(absl::MakeConstSpan(text_)); |
193 | 98.3k | } |
194 | | |
195 | | private: |
196 | | const std::vector<char> text_; |
197 | | }; |
198 | | |
199 | | class Latin1Source final : public SourceImpl { |
200 | | public: |
201 | | Latin1Source(std::string description, |
202 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
203 | | std::vector<uint8_t> text) |
204 | 181 | : SourceImpl(std::move(description), std::move(line_offsets)), |
205 | 181 | text_(std::move(text)) {} |
206 | | |
207 | 4.34k | ContentView content() const override { |
208 | 4.34k | return MakeContentView(absl::MakeConstSpan(text_)); |
209 | 4.34k | } |
210 | | |
211 | | private: |
212 | | const std::vector<uint8_t> text_; |
213 | | }; |
214 | | |
215 | | class BasicPlaneSource final : public SourceImpl { |
216 | | public: |
217 | | BasicPlaneSource(std::string description, |
218 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
219 | | std::vector<char16_t> text) |
220 | 609 | : SourceImpl(std::move(description), std::move(line_offsets)), |
221 | 609 | text_(std::move(text)) {} |
222 | | |
223 | 22.1k | ContentView content() const override { |
224 | 22.1k | return MakeContentView(absl::MakeConstSpan(text_)); |
225 | 22.1k | } |
226 | | |
227 | | private: |
228 | | const std::vector<char16_t> text_; |
229 | | }; |
230 | | |
231 | | class SupplementalPlaneSource final : public SourceImpl { |
232 | | public: |
233 | | SupplementalPlaneSource(std::string description, |
234 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
235 | | std::vector<char32_t> text) |
236 | 631 | : SourceImpl(std::move(description), std::move(line_offsets)), |
237 | 631 | text_(std::move(text)) {} |
238 | | |
239 | 23.5k | ContentView content() const override { |
240 | 23.5k | return MakeContentView(absl::MakeConstSpan(text_)); |
241 | 23.5k | } |
242 | | |
243 | | private: |
244 | | const std::vector<char32_t> text_; |
245 | | }; |
246 | | |
247 | | template <typename T> |
248 | | struct SourceTextTraits; |
249 | | |
250 | | template <> |
251 | | struct SourceTextTraits<absl::string_view> { |
252 | | using iterator_type = absl::string_view; |
253 | | |
254 | 7.63k | static iterator_type Begin(absl::string_view text) { return text; } |
255 | | |
256 | 142M | static void Advance(iterator_type& it, size_t n) { it.remove_prefix(n); } |
257 | | |
258 | | static void AppendTo(std::vector<uint8_t>& out, absl::string_view text, |
259 | 329 | size_t n) { |
260 | 329 | const auto* in = reinterpret_cast<const uint8_t*>(text.data()); |
261 | 329 | out.insert(out.end(), in, in + n); |
262 | 329 | } |
263 | | |
264 | 6.13k | static std::vector<char> ToVector(absl::string_view in) { |
265 | 6.13k | std::vector<char> out; |
266 | 6.13k | out.reserve(in.size()); |
267 | 6.13k | out.insert(out.end(), in.begin(), in.end()); |
268 | 6.13k | return out; |
269 | 6.13k | } |
270 | | }; |
271 | | |
272 | | template <> |
273 | | struct SourceTextTraits<absl::Cord> { |
274 | | using iterator_type = absl::Cord::CharIterator; |
275 | | |
276 | 0 | static iterator_type Begin(const absl::Cord& text) { |
277 | 0 | return text.char_begin(); |
278 | 0 | } |
279 | | |
280 | 0 | static void Advance(iterator_type& it, size_t n) { |
281 | 0 | absl::Cord::Advance(&it, n); |
282 | 0 | } |
283 | | |
284 | | static void AppendTo(std::vector<uint8_t>& out, const absl::Cord& text, |
285 | 0 | size_t n) { |
286 | 0 | auto it = text.char_begin(); |
287 | 0 | while (n > 0) { |
288 | 0 | auto str = absl::Cord::ChunkRemaining(it); |
289 | 0 | size_t to_append = std::min(n, str.size()); |
290 | 0 | const auto* in = reinterpret_cast<const uint8_t*>(str.data()); |
291 | 0 | out.insert(out.end(), in, in + to_append); |
292 | 0 | n -= to_append; |
293 | 0 | absl::Cord::Advance(&it, to_append); |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 0 | static std::vector<char> ToVector(const absl::Cord& in) { |
298 | 0 | std::vector<char> out; |
299 | 0 | out.reserve(in.size()); |
300 | 0 | for (const auto& chunk : in.Chunks()) { |
301 | 0 | out.insert(out.end(), chunk.begin(), chunk.end()); |
302 | 0 | } |
303 | 0 | return out; |
304 | 0 | } |
305 | | }; |
306 | | |
307 | | template <typename T> |
308 | | absl::StatusOr<SourcePtr> NewSourceImpl(std::string description, const T& text, |
309 | 7.63k | const size_t text_size) { |
310 | 7.63k | if (ABSL_PREDICT_FALSE( |
311 | 7.63k | text_size > |
312 | 7.63k | static_cast<size_t>(std::numeric_limits<int32_t>::max()))) { |
313 | 0 | return absl::InvalidArgumentError("expression larger than 2GiB limit"); |
314 | 0 | } |
315 | 7.63k | using Traits = SourceTextTraits<T>; |
316 | 7.63k | size_t index = 0; |
317 | 7.63k | typename Traits::iterator_type it = Traits::Begin(text); |
318 | 7.63k | SourcePosition offset = 0; |
319 | 7.63k | char32_t code_point; |
320 | 7.63k | size_t code_units; |
321 | 7.63k | std::vector<uint8_t> data8; |
322 | 7.63k | std::vector<char16_t> data16; |
323 | 7.63k | std::vector<char32_t> data32; |
324 | 7.63k | absl::InlinedVector<SourcePosition, 1> line_offsets; |
325 | 92.2M | while (index < text_size) { |
326 | 92.2M | std::tie(code_point, code_units) = cel::internal::Utf8Decode(it); |
327 | 92.2M | if (ABSL_PREDICT_FALSE(code_point == |
328 | 92.2M | cel::internal::kUnicodeReplacementCharacter && |
329 | 92.2M | code_units == 1)) { |
330 | | // Thats an invalid UTF-8 encoding. |
331 | 23 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
332 | 23 | } |
333 | 92.2M | if (code_point == '\n') { |
334 | 28.6M | line_offsets.push_back(offset + 1); |
335 | 28.6M | } |
336 | 92.2M | if (code_point <= 0x7f) { |
337 | 92.2M | Traits::Advance(it, code_units); |
338 | 92.2M | index += code_units; |
339 | 92.2M | ++offset; |
340 | 92.2M | continue; |
341 | 92.2M | } |
342 | 1.47k | if (code_point <= 0xff) { |
343 | 329 | data8.reserve(text_size); |
344 | 329 | Traits::AppendTo(data8, text, index); |
345 | 329 | data8.push_back(static_cast<uint8_t>(code_point)); |
346 | 329 | Traits::Advance(it, code_units); |
347 | 329 | index += code_units; |
348 | 329 | ++offset; |
349 | 329 | goto latin1; |
350 | 329 | } |
351 | 1.14k | if (code_point <= 0xffff) { |
352 | 673 | data16.reserve(text_size); |
353 | 12.3M | for (size_t offset = 0; offset < index; offset++) { |
354 | 12.3M | data16.push_back(static_cast<uint8_t>(text[offset])); |
355 | 12.3M | } |
356 | 673 | data16.push_back(static_cast<char16_t>(code_point)); |
357 | 673 | Traits::Advance(it, code_units); |
358 | 673 | index += code_units; |
359 | 673 | ++offset; |
360 | 673 | goto basic; |
361 | 673 | } |
362 | 475 | data32.reserve(text_size); |
363 | 5.68M | for (size_t offset = 0; offset < index; offset++) { |
364 | 5.68M | data32.push_back(static_cast<char32_t>(text[offset])); |
365 | 5.68M | } |
366 | 475 | data32.push_back(code_point); |
367 | 475 | Traits::Advance(it, code_units); |
368 | 475 | index += code_units; |
369 | 475 | ++offset; |
370 | 475 | goto supplemental; |
371 | 1.14k | } |
372 | 6.13k | line_offsets.push_back(offset + 1); |
373 | 6.13k | return std::make_unique<AsciiSource>( |
374 | 6.13k | std::move(description), std::move(line_offsets), Traits::ToVector(text)); |
375 | 329 | latin1: |
376 | 4.72M | while (index < text_size) { |
377 | 4.72M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
378 | 4.72M | if (ABSL_PREDICT_FALSE(code_point == |
379 | 4.72M | internal::kUnicodeReplacementCharacter && |
380 | 4.72M | code_units == 1)) { |
381 | | // Thats an invalid UTF-8 encoding. |
382 | 14 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
383 | 14 | } |
384 | 4.72M | if (code_point == '\n') { |
385 | 3.78M | line_offsets.push_back(offset + 1); |
386 | 3.78M | } |
387 | 4.72M | if (code_point <= 0xff) { |
388 | 4.72M | data8.push_back(static_cast<uint8_t>(code_point)); |
389 | 4.72M | Traits::Advance(it, code_units); |
390 | 4.72M | index += code_units; |
391 | 4.72M | ++offset; |
392 | 4.72M | continue; |
393 | 4.72M | } |
394 | 134 | if (code_point <= 0xffff) { |
395 | 58 | data16.reserve(text_size); |
396 | 2.22M | for (const auto& value : data8) { |
397 | 2.22M | data16.push_back(value); |
398 | 2.22M | } |
399 | 58 | std::vector<uint8_t>().swap(data8); |
400 | 58 | data16.push_back(static_cast<char16_t>(code_point)); |
401 | 58 | Traits::Advance(it, code_units); |
402 | 58 | index += code_units; |
403 | 58 | ++offset; |
404 | 58 | goto basic; |
405 | 58 | } |
406 | 76 | data32.reserve(text_size); |
407 | 1.10M | for (const auto& value : data8) { |
408 | 1.10M | data32.push_back(value); |
409 | 1.10M | } |
410 | 76 | std::vector<uint8_t>().swap(data8); |
411 | 76 | data32.push_back(code_point); |
412 | 76 | Traits::Advance(it, code_units); |
413 | 76 | index += code_units; |
414 | 76 | ++offset; |
415 | 76 | goto supplemental; |
416 | 134 | } |
417 | 181 | line_offsets.push_back(offset + 1); |
418 | 181 | return std::make_unique<Latin1Source>( |
419 | 181 | std::move(description), std::move(line_offsets), std::move(data8)); |
420 | 731 | basic: |
421 | 12.1M | while (index < text_size) { |
422 | 12.1M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
423 | 12.1M | if (ABSL_PREDICT_FALSE(code_point == |
424 | 12.1M | internal::kUnicodeReplacementCharacter && |
425 | 12.1M | code_units == 1)) { |
426 | | // Thats an invalid UTF-8 encoding. |
427 | 20 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
428 | 20 | } |
429 | 12.1M | if (code_point == '\n') { |
430 | 7.29M | line_offsets.push_back(offset + 1); |
431 | 7.29M | } |
432 | 12.1M | if (code_point <= 0xffff) { |
433 | 12.1M | data16.push_back(static_cast<char16_t>(code_point)); |
434 | 12.1M | Traits::Advance(it, code_units); |
435 | 12.1M | index += code_units; |
436 | 12.1M | ++offset; |
437 | 12.1M | continue; |
438 | 12.1M | } |
439 | 102 | data32.reserve(text_size); |
440 | 1.38M | for (const auto& value : data16) { |
441 | 1.38M | data32.push_back(static_cast<char32_t>(value)); |
442 | 1.38M | } |
443 | 102 | std::vector<char16_t>().swap(data16); |
444 | 102 | data32.push_back(code_point); |
445 | 102 | Traits::Advance(it, code_units); |
446 | 102 | index += code_units; |
447 | 102 | ++offset; |
448 | 102 | goto supplemental; |
449 | 12.1M | } |
450 | 609 | line_offsets.push_back(offset + 1); |
451 | 609 | return std::make_unique<BasicPlaneSource>( |
452 | 609 | std::move(description), std::move(line_offsets), std::move(data16)); |
453 | 653 | supplemental: |
454 | 33.5M | while (index < text_size) { |
455 | 33.5M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
456 | 33.5M | if (ABSL_PREDICT_FALSE(code_point == |
457 | 33.5M | internal::kUnicodeReplacementCharacter && |
458 | 33.5M | code_units == 1)) { |
459 | | // Thats an invalid UTF-8 encoding. |
460 | 22 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
461 | 22 | } |
462 | 33.5M | if (code_point == '\n') { |
463 | 15.7M | line_offsets.push_back(offset + 1); |
464 | 15.7M | } |
465 | 33.5M | data32.push_back(code_point); |
466 | 33.5M | Traits::Advance(it, code_units); |
467 | 33.5M | index += code_units; |
468 | 33.5M | ++offset; |
469 | 33.5M | } |
470 | 631 | line_offsets.push_back(offset + 1); |
471 | 631 | return std::make_unique<SupplementalPlaneSource>( |
472 | 631 | std::move(description), std::move(line_offsets), std::move(data32)); |
473 | 653 | } source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long) Line | Count | Source | 309 | 7.63k | const size_t text_size) { | 310 | 7.63k | if (ABSL_PREDICT_FALSE( | 311 | 7.63k | text_size > | 312 | 7.63k | static_cast<size_t>(std::numeric_limits<int32_t>::max()))) { | 313 | 0 | return absl::InvalidArgumentError("expression larger than 2GiB limit"); | 314 | 0 | } | 315 | 7.63k | using Traits = SourceTextTraits<T>; | 316 | 7.63k | size_t index = 0; | 317 | 7.63k | typename Traits::iterator_type it = Traits::Begin(text); | 318 | 7.63k | SourcePosition offset = 0; | 319 | 7.63k | char32_t code_point; | 320 | 7.63k | size_t code_units; | 321 | 7.63k | std::vector<uint8_t> data8; | 322 | 7.63k | std::vector<char16_t> data16; | 323 | 7.63k | std::vector<char32_t> data32; | 324 | 7.63k | absl::InlinedVector<SourcePosition, 1> line_offsets; | 325 | 92.2M | while (index < text_size) { | 326 | 92.2M | std::tie(code_point, code_units) = cel::internal::Utf8Decode(it); | 327 | 92.2M | if (ABSL_PREDICT_FALSE(code_point == | 328 | 92.2M | cel::internal::kUnicodeReplacementCharacter && | 329 | 92.2M | code_units == 1)) { | 330 | | // Thats an invalid UTF-8 encoding. | 331 | 23 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 332 | 23 | } | 333 | 92.2M | if (code_point == '\n') { | 334 | 28.6M | line_offsets.push_back(offset + 1); | 335 | 28.6M | } | 336 | 92.2M | if (code_point <= 0x7f) { | 337 | 92.2M | Traits::Advance(it, code_units); | 338 | 92.2M | index += code_units; | 339 | 92.2M | ++offset; | 340 | 92.2M | continue; | 341 | 92.2M | } | 342 | 1.47k | if (code_point <= 0xff) { | 343 | 329 | data8.reserve(text_size); | 344 | 329 | Traits::AppendTo(data8, text, index); | 345 | 329 | data8.push_back(static_cast<uint8_t>(code_point)); | 346 | 329 | Traits::Advance(it, code_units); | 347 | 329 | index += code_units; | 348 | 329 | ++offset; | 349 | 329 | goto latin1; | 350 | 329 | } | 351 | 1.14k | if (code_point <= 0xffff) { | 352 | 673 | data16.reserve(text_size); | 353 | 12.3M | for (size_t offset = 0; offset < index; offset++) { | 354 | 12.3M | data16.push_back(static_cast<uint8_t>(text[offset])); | 355 | 12.3M | } | 356 | 673 | data16.push_back(static_cast<char16_t>(code_point)); | 357 | 673 | Traits::Advance(it, code_units); | 358 | 673 | index += code_units; | 359 | 673 | ++offset; | 360 | 673 | goto basic; | 361 | 673 | } | 362 | 475 | data32.reserve(text_size); | 363 | 5.68M | for (size_t offset = 0; offset < index; offset++) { | 364 | 5.68M | data32.push_back(static_cast<char32_t>(text[offset])); | 365 | 5.68M | } | 366 | 475 | data32.push_back(code_point); | 367 | 475 | Traits::Advance(it, code_units); | 368 | 475 | index += code_units; | 369 | 475 | ++offset; | 370 | 475 | goto supplemental; | 371 | 1.14k | } | 372 | 6.13k | line_offsets.push_back(offset + 1); | 373 | 6.13k | return std::make_unique<AsciiSource>( | 374 | 6.13k | std::move(description), std::move(line_offsets), Traits::ToVector(text)); | 375 | 329 | latin1: | 376 | 4.72M | while (index < text_size) { | 377 | 4.72M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 378 | 4.72M | if (ABSL_PREDICT_FALSE(code_point == | 379 | 4.72M | internal::kUnicodeReplacementCharacter && | 380 | 4.72M | code_units == 1)) { | 381 | | // Thats an invalid UTF-8 encoding. | 382 | 14 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 383 | 14 | } | 384 | 4.72M | if (code_point == '\n') { | 385 | 3.78M | line_offsets.push_back(offset + 1); | 386 | 3.78M | } | 387 | 4.72M | if (code_point <= 0xff) { | 388 | 4.72M | data8.push_back(static_cast<uint8_t>(code_point)); | 389 | 4.72M | Traits::Advance(it, code_units); | 390 | 4.72M | index += code_units; | 391 | 4.72M | ++offset; | 392 | 4.72M | continue; | 393 | 4.72M | } | 394 | 134 | if (code_point <= 0xffff) { | 395 | 58 | data16.reserve(text_size); | 396 | 2.22M | for (const auto& value : data8) { | 397 | 2.22M | data16.push_back(value); | 398 | 2.22M | } | 399 | 58 | std::vector<uint8_t>().swap(data8); | 400 | 58 | data16.push_back(static_cast<char16_t>(code_point)); | 401 | 58 | Traits::Advance(it, code_units); | 402 | 58 | index += code_units; | 403 | 58 | ++offset; | 404 | 58 | goto basic; | 405 | 58 | } | 406 | 76 | data32.reserve(text_size); | 407 | 1.10M | for (const auto& value : data8) { | 408 | 1.10M | data32.push_back(value); | 409 | 1.10M | } | 410 | 76 | std::vector<uint8_t>().swap(data8); | 411 | 76 | data32.push_back(code_point); | 412 | 76 | Traits::Advance(it, code_units); | 413 | 76 | index += code_units; | 414 | 76 | ++offset; | 415 | 76 | goto supplemental; | 416 | 134 | } | 417 | 181 | line_offsets.push_back(offset + 1); | 418 | 181 | return std::make_unique<Latin1Source>( | 419 | 181 | std::move(description), std::move(line_offsets), std::move(data8)); | 420 | 731 | basic: | 421 | 12.1M | while (index < text_size) { | 422 | 12.1M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 423 | 12.1M | if (ABSL_PREDICT_FALSE(code_point == | 424 | 12.1M | internal::kUnicodeReplacementCharacter && | 425 | 12.1M | code_units == 1)) { | 426 | | // Thats an invalid UTF-8 encoding. | 427 | 20 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 428 | 20 | } | 429 | 12.1M | if (code_point == '\n') { | 430 | 7.29M | line_offsets.push_back(offset + 1); | 431 | 7.29M | } | 432 | 12.1M | if (code_point <= 0xffff) { | 433 | 12.1M | data16.push_back(static_cast<char16_t>(code_point)); | 434 | 12.1M | Traits::Advance(it, code_units); | 435 | 12.1M | index += code_units; | 436 | 12.1M | ++offset; | 437 | 12.1M | continue; | 438 | 12.1M | } | 439 | 102 | data32.reserve(text_size); | 440 | 1.38M | for (const auto& value : data16) { | 441 | 1.38M | data32.push_back(static_cast<char32_t>(value)); | 442 | 1.38M | } | 443 | 102 | std::vector<char16_t>().swap(data16); | 444 | 102 | data32.push_back(code_point); | 445 | 102 | Traits::Advance(it, code_units); | 446 | 102 | index += code_units; | 447 | 102 | ++offset; | 448 | 102 | goto supplemental; | 449 | 12.1M | } | 450 | 609 | line_offsets.push_back(offset + 1); | 451 | 609 | return std::make_unique<BasicPlaneSource>( | 452 | 609 | std::move(description), std::move(line_offsets), std::move(data16)); | 453 | 653 | supplemental: | 454 | 33.5M | while (index < text_size) { | 455 | 33.5M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 456 | 33.5M | if (ABSL_PREDICT_FALSE(code_point == | 457 | 33.5M | internal::kUnicodeReplacementCharacter && | 458 | 33.5M | code_units == 1)) { | 459 | | // Thats an invalid UTF-8 encoding. | 460 | 22 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 461 | 22 | } | 462 | 33.5M | if (code_point == '\n') { | 463 | 15.7M | line_offsets.push_back(offset + 1); | 464 | 15.7M | } | 465 | 33.5M | data32.push_back(code_point); | 466 | 33.5M | Traits::Advance(it, code_units); | 467 | 33.5M | index += code_units; | 468 | 33.5M | ++offset; | 469 | 33.5M | } | 470 | 631 | line_offsets.push_back(offset + 1); | 471 | 631 | return std::make_unique<SupplementalPlaneSource>( | 472 | 631 | std::move(description), std::move(line_offsets), std::move(data32)); | 473 | 653 | } |
Unexecuted instantiation: source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<absl::lts_20250512::Cord>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20250512::Cord const&, unsigned long) |
474 | | |
475 | | } // namespace |
476 | | |
477 | | } // namespace common_internal |
478 | | |
479 | | absl::optional<SourceLocation> Source::GetLocation( |
480 | 140k | SourcePosition position) const { |
481 | 140k | if (auto line_and_offset = FindLine(position); |
482 | 140k | ABSL_PREDICT_TRUE(line_and_offset.has_value())) { |
483 | 138k | return SourceLocation{line_and_offset->first, |
484 | 138k | position - line_and_offset->second}; |
485 | 138k | } |
486 | 2.16k | return absl::nullopt; |
487 | 140k | } |
488 | | |
489 | | absl::optional<SourcePosition> Source::GetPosition( |
490 | 4.15M | const SourceLocation& location) const { |
491 | 4.15M | if (ABSL_PREDICT_FALSE(location.line < 1 || location.column < 0)) { |
492 | 0 | return absl::nullopt; |
493 | 0 | } |
494 | 4.15M | if (auto position = FindLinePosition(location.line); |
495 | 4.15M | ABSL_PREDICT_TRUE(position.has_value())) { |
496 | 4.15M | return *position + location.column; |
497 | 4.15M | } |
498 | 0 | return absl::nullopt; |
499 | 4.15M | } |
500 | | |
501 | 140k | absl::optional<std::string> Source::Snippet(int32_t line) const { |
502 | 140k | auto content = this->content(); |
503 | 140k | auto start = FindLinePosition(line); |
504 | 140k | if (ABSL_PREDICT_FALSE(!start.has_value() || content.empty())) { |
505 | 2.16k | return absl::nullopt; |
506 | 2.16k | } |
507 | 138k | auto end = FindLinePosition(line + 1); |
508 | 138k | if (end.has_value()) { |
509 | 33.5k | return content.ToString(*start, *end - 1); |
510 | 33.5k | } |
511 | 105k | return content.ToString(*start); |
512 | 138k | } |
513 | | |
514 | 140k | std::string Source::DisplayErrorLocation(SourceLocation location) const { |
515 | 140k | constexpr char32_t kDot = '.'; |
516 | 140k | constexpr char32_t kHat = '^'; |
517 | | |
518 | 140k | constexpr char32_t kWideDot = 0xff0e; |
519 | 140k | constexpr char32_t kWideHat = 0xff3e; |
520 | 140k | absl::optional<std::string> snippet = Snippet(location.line); |
521 | 140k | if (!snippet || snippet->empty()) { |
522 | 2.54k | return ""; |
523 | 2.54k | } |
524 | | |
525 | 138k | *snippet = absl::StrReplaceAll(*snippet, {{"\t", " "}}); |
526 | 138k | absl::string_view snippet_view(*snippet); |
527 | 138k | std::string result; |
528 | 138k | absl::StrAppend(&result, "\n | ", *snippet); |
529 | 138k | absl::StrAppend(&result, "\n | "); |
530 | | |
531 | 138k | std::string index_line; |
532 | 873M | for (int32_t i = 0; i < location.column && !snippet_view.empty(); ++i) { |
533 | 873M | size_t count; |
534 | 873M | std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view); |
535 | 873M | snippet_view.remove_prefix(count); |
536 | 873M | if (count > 1) { |
537 | 128k | internal::Utf8Encode(index_line, kWideDot); |
538 | 873M | } else { |
539 | 873M | internal::Utf8Encode(index_line, kDot); |
540 | 873M | } |
541 | 873M | } |
542 | 138k | size_t count = 0; |
543 | 138k | if (!snippet_view.empty()) { |
544 | 134k | std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view); |
545 | 134k | } |
546 | 138k | if (count > 1) { |
547 | 3.01k | internal::Utf8Encode(index_line, kWideHat); |
548 | 135k | } else { |
549 | 135k | internal::Utf8Encode(index_line, kHat); |
550 | 135k | } |
551 | 138k | absl::StrAppend(&result, index_line); |
552 | 138k | return result; |
553 | 140k | } |
554 | | |
555 | 4.43M | absl::optional<SourcePosition> Source::FindLinePosition(int32_t line) const { |
556 | 4.43M | if (ABSL_PREDICT_FALSE(line < 1)) { |
557 | 2.16k | return absl::nullopt; |
558 | 2.16k | } |
559 | 4.42M | if (line == 1) { |
560 | 3.93M | return SourcePosition{0}; |
561 | 3.93M | } |
562 | 497k | const auto line_offsets = this->line_offsets(); |
563 | 497k | if (ABSL_PREDICT_TRUE(line <= static_cast<int32_t>(line_offsets.size()))) { |
564 | 392k | return line_offsets[static_cast<size_t>(line - 2)]; |
565 | 392k | } |
566 | 105k | return absl::nullopt; |
567 | 497k | } |
568 | | |
569 | | absl::optional<std::pair<int32_t, SourcePosition>> Source::FindLine( |
570 | 140k | SourcePosition position) const { |
571 | 140k | if (ABSL_PREDICT_FALSE(position < 0)) { |
572 | 2.16k | return absl::nullopt; |
573 | 2.16k | } |
574 | 138k | int32_t line = 1; |
575 | 138k | const auto line_offsets = this->line_offsets(); |
576 | 1.56G | for (const auto& line_offset : line_offsets) { |
577 | 1.56G | if (line_offset > position) { |
578 | 138k | break; |
579 | 138k | } |
580 | 1.56G | ++line; |
581 | 1.56G | } |
582 | 138k | if (line == 1) { |
583 | 104k | return std::make_pair(line, SourcePosition{0}); |
584 | 104k | } |
585 | 34.4k | return std::make_pair(line, line_offsets[static_cast<size_t>(line) - 2]); |
586 | 138k | } |
587 | | |
588 | | absl::StatusOr<absl_nonnull SourcePtr> NewSource(absl::string_view content, |
589 | 7.63k | std::string description) { |
590 | 7.63k | return common_internal::NewSourceImpl(std::move(description), content, |
591 | 7.63k | content.size()); |
592 | 7.63k | } |
593 | | |
594 | | absl::StatusOr<absl_nonnull SourcePtr> NewSource(const absl::Cord& content, |
595 | 0 | std::string description) { |
596 | 0 | return common_internal::NewSourceImpl(std::move(description), content, |
597 | 0 | content.size()); |
598 | 0 | } |
599 | | |
600 | | } // namespace cel |