/proc/self/cwd/common/source.cc
Line | Count | Source |
1 | | // Copyright 2023 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "common/source.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <cstddef> |
19 | | #include <cstdint> |
20 | | #include <limits> |
21 | | #include <memory> |
22 | | #include <string> |
23 | | #include <tuple> |
24 | | #include <utility> |
25 | | #include <vector> |
26 | | |
27 | | #include "absl/base/nullability.h" |
28 | | #include "absl/base/optimization.h" |
29 | | #include "absl/container/inlined_vector.h" |
30 | | #include "absl/functional/overload.h" |
31 | | #include "absl/log/absl_check.h" |
32 | | #include "absl/status/status.h" |
33 | | #include "absl/status/statusor.h" |
34 | | #include "absl/strings/cord.h" |
35 | | #include "absl/strings/str_cat.h" |
36 | | #include "absl/strings/str_replace.h" |
37 | | #include "absl/strings/string_view.h" |
38 | | #include "absl/types/optional.h" |
39 | | #include "absl/types/span.h" |
40 | | #include "absl/types/variant.h" |
41 | | #include "internal/unicode.h" |
42 | | #include "internal/utf8.h" |
43 | | |
44 | | namespace cel { |
45 | | |
46 | 47.3k | SourcePosition SourceContentView::size() const { |
47 | 47.3k | return static_cast<SourcePosition>(absl::visit( |
48 | 47.3k | absl::Overload( |
49 | 47.3k | [](absl::Span<const char> view) { return view.size(); }, |
50 | 47.3k | [](absl::Span<const uint8_t> view) { return view.size(); }, |
51 | 47.3k | [](absl::Span<const char16_t> view) { return view.size(); }, |
52 | 47.3k | [](absl::Span<const char32_t> view) { return view.size(); }), |
53 | 47.3k | view_)); |
54 | 47.3k | } |
55 | | |
56 | 56.5k | bool SourceContentView::empty() const { |
57 | 56.5k | return absl::visit( |
58 | 56.5k | absl::Overload( |
59 | 56.5k | [](absl::Span<const char> view) { return view.empty(); }, |
60 | 56.5k | [](absl::Span<const uint8_t> view) { return view.empty(); }, |
61 | 56.5k | [](absl::Span<const char16_t> view) { return view.empty(); }, |
62 | 56.5k | [](absl::Span<const char32_t> view) { return view.empty(); }), |
63 | 56.5k | view_); |
64 | 56.5k | } |
65 | | |
66 | 200M | char32_t SourceContentView::at(SourcePosition position) const { |
67 | 200M | ABSL_DCHECK_GE(position, 0); |
68 | 200M | ABSL_DCHECK_LT(position, size()); |
69 | 200M | return absl::visit( |
70 | 200M | absl::Overload( |
71 | 200M | [position = |
72 | 200M | static_cast<size_t>(position)](absl::Span<const char> view) { |
73 | 107M | return static_cast<char32_t>(static_cast<uint8_t>(view[position])); |
74 | 107M | }, |
75 | 200M | [position = |
76 | 200M | static_cast<size_t>(position)](absl::Span<const uint8_t> view) { |
77 | 11.1M | return static_cast<char32_t>(view[position]); |
78 | 11.1M | }, |
79 | 200M | [position = |
80 | 200M | static_cast<size_t>(position)](absl::Span<const char16_t> view) { |
81 | 9.77M | return static_cast<char32_t>(view[position]); |
82 | 9.77M | }, |
83 | 200M | [position = |
84 | 200M | static_cast<size_t>(position)](absl::Span<const char32_t> view) { |
85 | 72.1M | return static_cast<char32_t>(view[position]); |
86 | 72.1M | }), |
87 | 200M | view_); |
88 | 200M | } |
89 | | |
90 | | std::string SourceContentView::ToString(SourcePosition begin, |
91 | 5.96M | SourcePosition end) const { |
92 | 5.96M | ABSL_DCHECK_GE(begin, 0); |
93 | 5.96M | ABSL_DCHECK_LE(end, size()); |
94 | 5.96M | ABSL_DCHECK_LE(begin, end); |
95 | 5.96M | return absl::visit( |
96 | 5.96M | absl::Overload( |
97 | 5.96M | [begin = static_cast<size_t>(begin), |
98 | 5.96M | end = static_cast<size_t>(end)](absl::Span<const char> view) { |
99 | 4.49M | view = view.subspan(begin, end - begin); |
100 | 4.49M | return std::string(view.data(), view.size()); |
101 | 4.49M | }, |
102 | 5.96M | [begin = static_cast<size_t>(begin), |
103 | 5.96M | end = static_cast<size_t>(end)](absl::Span<const uint8_t> view) { |
104 | 350k | view = view.subspan(begin, end - begin); |
105 | 350k | std::string result; |
106 | 350k | result.reserve(view.size() * 2); |
107 | 32.3M | for (const auto& code_point : view) { |
108 | 32.3M | internal::Utf8Encode(result, code_point); |
109 | 32.3M | } |
110 | 350k | result.shrink_to_fit(); |
111 | 350k | return result; |
112 | 350k | }, |
113 | 5.96M | [begin = static_cast<size_t>(begin), |
114 | 5.96M | end = static_cast<size_t>(end)](absl::Span<const char16_t> view) { |
115 | 438k | view = view.subspan(begin, end - begin); |
116 | 438k | std::string result; |
117 | 438k | result.reserve(view.size() * 3); |
118 | 136M | for (const auto& code_point : view) { |
119 | 136M | internal::Utf8Encode(result, code_point); |
120 | 136M | } |
121 | 438k | result.shrink_to_fit(); |
122 | 438k | return result; |
123 | 438k | }, |
124 | 5.96M | [begin = static_cast<size_t>(begin), |
125 | 5.96M | end = static_cast<size_t>(end)](absl::Span<const char32_t> view) { |
126 | 675k | view = view.subspan(begin, end - begin); |
127 | 675k | std::string result; |
128 | 675k | result.reserve(view.size() * 4); |
129 | 202M | for (const auto& code_point : view) { |
130 | 202M | internal::Utf8Encode(result, code_point); |
131 | 202M | } |
132 | 675k | result.shrink_to_fit(); |
133 | 675k | return result; |
134 | 675k | }), |
135 | 5.96M | view_); |
136 | 5.96M | } |
137 | | |
138 | 0 | void SourceContentView::AppendToString(std::string& dest) const { |
139 | 0 | absl::visit(absl::Overload( |
140 | 0 | [&dest](absl::Span<const char> view) { |
141 | 0 | dest.append(view.data(), view.size()); |
142 | 0 | }, |
143 | 0 | [&dest](absl::Span<const uint8_t> view) { |
144 | 0 | for (const auto& code_point : view) { |
145 | 0 | internal::Utf8Encode(dest, code_point); |
146 | 0 | } |
147 | 0 | }, |
148 | 0 | [&dest](absl::Span<const char16_t> view) { |
149 | 0 | for (const auto& code_point : view) { |
150 | 0 | internal::Utf8Encode(dest, code_point); |
151 | 0 | } |
152 | 0 | }, |
153 | 0 | [&dest](absl::Span<const char32_t> view) { |
154 | 0 | for (const auto& code_point : view) { |
155 | 0 | internal::Utf8Encode(dest, code_point); |
156 | 0 | } |
157 | 0 | }), |
158 | 0 | view_); |
159 | 0 | } |
160 | | |
161 | | namespace common_internal { |
162 | | |
163 | | class SourceImpl : public Source { |
164 | | public: |
165 | | SourceImpl(std::string description, |
166 | | absl::InlinedVector<SourcePosition, 1> line_offsets) |
167 | 4.20k | : description_(std::move(description)), |
168 | 4.20k | line_offsets_(std::move(line_offsets)) {} |
169 | | |
170 | 63.9k | absl::string_view description() const final { return description_; } |
171 | | |
172 | 348k | absl::Span<const SourcePosition> line_offsets() const final { |
173 | 348k | return absl::MakeConstSpan(line_offsets_); |
174 | 348k | } |
175 | | |
176 | | private: |
177 | | const std::string description_; |
178 | | const absl::InlinedVector<SourcePosition, 1> line_offsets_; |
179 | | }; |
180 | | |
181 | | namespace { |
182 | | |
183 | | class AsciiSource final : public SourceImpl { |
184 | | public: |
185 | | AsciiSource(std::string description, |
186 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
187 | | std::vector<char> text) |
188 | 3.47k | : SourceImpl(std::move(description), std::move(line_offsets)), |
189 | 3.47k | text_(std::move(text)) {} |
190 | | |
191 | 40.4k | ContentView content() const override { |
192 | 40.4k | return MakeContentView(absl::MakeConstSpan(text_)); |
193 | 40.4k | } |
194 | | |
195 | | private: |
196 | | const std::vector<char> text_; |
197 | | }; |
198 | | |
199 | | class Latin1Source final : public SourceImpl { |
200 | | public: |
201 | | Latin1Source(std::string description, |
202 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
203 | | std::vector<uint8_t> text) |
204 | 105 | : SourceImpl(std::move(description), std::move(line_offsets)), |
205 | 105 | text_(std::move(text)) {} |
206 | | |
207 | 3.48k | ContentView content() const override { |
208 | 3.48k | return MakeContentView(absl::MakeConstSpan(text_)); |
209 | 3.48k | } |
210 | | |
211 | | private: |
212 | | const std::vector<uint8_t> text_; |
213 | | }; |
214 | | |
215 | | class BasicPlaneSource final : public SourceImpl { |
216 | | public: |
217 | | BasicPlaneSource(std::string description, |
218 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
219 | | std::vector<char16_t> text) |
220 | 309 | : SourceImpl(std::move(description), std::move(line_offsets)), |
221 | 309 | text_(std::move(text)) {} |
222 | | |
223 | 7.67k | ContentView content() const override { |
224 | 7.67k | return MakeContentView(absl::MakeConstSpan(text_)); |
225 | 7.67k | } |
226 | | |
227 | | private: |
228 | | const std::vector<char16_t> text_; |
229 | | }; |
230 | | |
231 | | class SupplementalPlaneSource final : public SourceImpl { |
232 | | public: |
233 | | SupplementalPlaneSource(std::string description, |
234 | | absl::InlinedVector<SourcePosition, 1> line_offsets, |
235 | | std::vector<char32_t> text) |
236 | 316 | : SourceImpl(std::move(description), std::move(line_offsets)), |
237 | 316 | text_(std::move(text)) {} |
238 | | |
239 | 11.0k | ContentView content() const override { |
240 | 11.0k | return MakeContentView(absl::MakeConstSpan(text_)); |
241 | 11.0k | } |
242 | | |
243 | | private: |
244 | | const std::vector<char32_t> text_; |
245 | | }; |
246 | | |
247 | | template <typename T> |
248 | | struct SourceTextTraits; |
249 | | |
250 | | template <> |
251 | | struct SourceTextTraits<absl::string_view> { |
252 | | using iterator_type = absl::string_view; |
253 | | |
254 | 4.23k | static iterator_type Begin(absl::string_view text) { return text; } |
255 | | |
256 | 57.3M | static void Advance(iterator_type& it, size_t n) { it.remove_prefix(n); } |
257 | | |
258 | | static void AppendTo(std::vector<uint8_t>& out, absl::string_view text, |
259 | 187 | size_t n) { |
260 | 187 | const auto* in = reinterpret_cast<const uint8_t*>(text.data()); |
261 | 187 | out.insert(out.end(), in, in + n); |
262 | 187 | } |
263 | | |
264 | 3.47k | static std::vector<char> ToVector(absl::string_view in) { |
265 | 3.47k | std::vector<char> out; |
266 | 3.47k | out.reserve(in.size()); |
267 | 3.47k | out.insert(out.end(), in.begin(), in.end()); |
268 | 3.47k | return out; |
269 | 3.47k | } |
270 | | }; |
271 | | |
272 | | template <> |
273 | | struct SourceTextTraits<absl::Cord> { |
274 | | using iterator_type = absl::Cord::CharIterator; |
275 | | |
276 | 0 | static iterator_type Begin(const absl::Cord& text) { |
277 | 0 | return text.char_begin(); |
278 | 0 | } |
279 | | |
280 | 0 | static void Advance(iterator_type& it, size_t n) { |
281 | 0 | absl::Cord::Advance(&it, n); |
282 | 0 | } |
283 | | |
284 | | static void AppendTo(std::vector<uint8_t>& out, const absl::Cord& text, |
285 | 0 | size_t n) { |
286 | 0 | auto it = text.char_begin(); |
287 | 0 | while (n > 0) { |
288 | 0 | auto str = absl::Cord::ChunkRemaining(it); |
289 | 0 | size_t to_append = std::min(n, str.size()); |
290 | 0 | const auto* in = reinterpret_cast<const uint8_t*>(str.data()); |
291 | 0 | out.insert(out.end(), in, in + to_append); |
292 | 0 | n -= to_append; |
293 | 0 | absl::Cord::Advance(&it, to_append); |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 0 | static std::vector<char> ToVector(const absl::Cord& in) { |
298 | 0 | std::vector<char> out; |
299 | 0 | out.reserve(in.size()); |
300 | 0 | for (const auto& chunk : in.Chunks()) { |
301 | 0 | out.insert(out.end(), chunk.begin(), chunk.end()); |
302 | 0 | } |
303 | 0 | return out; |
304 | 0 | } |
305 | | }; |
306 | | |
307 | | template <typename T> |
308 | | absl::StatusOr<SourcePtr> NewSourceImpl(std::string description, const T& text, |
309 | 4.23k | const size_t text_size) { |
310 | 4.23k | if (ABSL_PREDICT_FALSE( |
311 | 4.23k | text_size > |
312 | 4.23k | static_cast<size_t>(std::numeric_limits<int32_t>::max()))) { |
313 | 0 | return absl::InvalidArgumentError("expression larger than 2GiB limit"); |
314 | 0 | } |
315 | 4.23k | using Traits = SourceTextTraits<T>; |
316 | 4.23k | size_t index = 0; |
317 | 4.23k | typename Traits::iterator_type it = Traits::Begin(text); |
318 | 4.23k | SourcePosition offset = 0; |
319 | 4.23k | char32_t code_point; |
320 | 4.23k | size_t code_units; |
321 | 4.23k | std::vector<uint8_t> data8; |
322 | 4.23k | std::vector<char16_t> data16; |
323 | 4.23k | std::vector<char32_t> data32; |
324 | 4.23k | absl::InlinedVector<SourcePosition, 1> line_offsets; |
325 | 39.6M | while (index < text_size) { |
326 | 39.6M | std::tie(code_point, code_units) = cel::internal::Utf8Decode(it); |
327 | 39.6M | if (ABSL_PREDICT_FALSE(code_point == |
328 | 39.6M | cel::internal::kUnicodeReplacementCharacter && |
329 | 39.6M | code_units == 1)) { |
330 | | // Thats an invalid UTF-8 encoding. |
331 | 14 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
332 | 14 | } |
333 | 39.6M | if (code_point == '\n') { |
334 | 18.7M | line_offsets.push_back(offset + 1); |
335 | 18.7M | } |
336 | 39.6M | if (code_point <= 0x7f) { |
337 | 39.6M | Traits::Advance(it, code_units); |
338 | 39.6M | index += code_units; |
339 | 39.6M | ++offset; |
340 | 39.6M | continue; |
341 | 39.6M | } |
342 | 748 | if (code_point <= 0xff) { |
343 | 187 | data8.reserve(text_size); |
344 | 187 | Traits::AppendTo(data8, text, index); |
345 | 187 | data8.push_back(static_cast<uint8_t>(code_point)); |
346 | 187 | Traits::Advance(it, code_units); |
347 | 187 | index += code_units; |
348 | 187 | ++offset; |
349 | 187 | goto latin1; |
350 | 187 | } |
351 | 561 | if (code_point <= 0xffff) { |
352 | 351 | data16.reserve(text_size); |
353 | 1.94M | for (size_t offset = 0; offset < index; offset++) { |
354 | 1.94M | data16.push_back(static_cast<uint8_t>(text[offset])); |
355 | 1.94M | } |
356 | 351 | data16.push_back(static_cast<char16_t>(code_point)); |
357 | 351 | Traits::Advance(it, code_units); |
358 | 351 | index += code_units; |
359 | 351 | ++offset; |
360 | 351 | goto basic; |
361 | 351 | } |
362 | 210 | data32.reserve(text_size); |
363 | 1.88M | for (size_t offset = 0; offset < index; offset++) { |
364 | 1.88M | data32.push_back(static_cast<char32_t>(text[offset])); |
365 | 1.88M | } |
366 | 210 | data32.push_back(code_point); |
367 | 210 | Traits::Advance(it, code_units); |
368 | 210 | index += code_units; |
369 | 210 | ++offset; |
370 | 210 | goto supplemental; |
371 | 561 | } |
372 | 3.47k | line_offsets.push_back(offset + 1); |
373 | 3.47k | return std::make_unique<AsciiSource>( |
374 | 3.47k | std::move(description), std::move(line_offsets), Traits::ToVector(text)); |
375 | 187 | latin1: |
376 | 4.06M | while (index < text_size) { |
377 | 4.06M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
378 | 4.06M | if (ABSL_PREDICT_FALSE(code_point == |
379 | 4.06M | internal::kUnicodeReplacementCharacter && |
380 | 4.06M | code_units == 1)) { |
381 | | // Thats an invalid UTF-8 encoding. |
382 | 4 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
383 | 4 | } |
384 | 4.06M | if (code_point == '\n') { |
385 | 3.30M | line_offsets.push_back(offset + 1); |
386 | 3.30M | } |
387 | 4.06M | if (code_point <= 0xff) { |
388 | 4.06M | data8.push_back(static_cast<uint8_t>(code_point)); |
389 | 4.06M | Traits::Advance(it, code_units); |
390 | 4.06M | index += code_units; |
391 | 4.06M | ++offset; |
392 | 4.06M | continue; |
393 | 4.06M | } |
394 | 78 | if (code_point <= 0xffff) { |
395 | 36 | data16.reserve(text_size); |
396 | 1.23M | for (const auto& value : data8) { |
397 | 1.23M | data16.push_back(value); |
398 | 1.23M | } |
399 | 36 | std::vector<uint8_t>().swap(data8); |
400 | 36 | data16.push_back(static_cast<char16_t>(code_point)); |
401 | 36 | Traits::Advance(it, code_units); |
402 | 36 | index += code_units; |
403 | 36 | ++offset; |
404 | 36 | goto basic; |
405 | 36 | } |
406 | 42 | data32.reserve(text_size); |
407 | 895k | for (const auto& value : data8) { |
408 | 895k | data32.push_back(value); |
409 | 895k | } |
410 | 42 | std::vector<uint8_t>().swap(data8); |
411 | 42 | data32.push_back(code_point); |
412 | 42 | Traits::Advance(it, code_units); |
413 | 42 | index += code_units; |
414 | 42 | ++offset; |
415 | 42 | goto supplemental; |
416 | 78 | } |
417 | 105 | line_offsets.push_back(offset + 1); |
418 | 105 | return std::make_unique<Latin1Source>( |
419 | 105 | std::move(description), std::move(line_offsets), std::move(data8)); |
420 | 387 | basic: |
421 | 4.33M | while (index < text_size) { |
422 | 4.33M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
423 | 4.33M | if (ABSL_PREDICT_FALSE(code_point == |
424 | 4.33M | internal::kUnicodeReplacementCharacter && |
425 | 4.33M | code_units == 1)) { |
426 | | // Thats an invalid UTF-8 encoding. |
427 | 9 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
428 | 9 | } |
429 | 4.33M | if (code_point == '\n') { |
430 | 1.97M | line_offsets.push_back(offset + 1); |
431 | 1.97M | } |
432 | 4.33M | if (code_point <= 0xffff) { |
433 | 4.33M | data16.push_back(static_cast<char16_t>(code_point)); |
434 | 4.33M | Traits::Advance(it, code_units); |
435 | 4.33M | index += code_units; |
436 | 4.33M | ++offset; |
437 | 4.33M | continue; |
438 | 4.33M | } |
439 | 69 | data32.reserve(text_size); |
440 | 824k | for (const auto& value : data16) { |
441 | 824k | data32.push_back(static_cast<char32_t>(value)); |
442 | 824k | } |
443 | 69 | std::vector<char16_t>().swap(data16); |
444 | 69 | data32.push_back(code_point); |
445 | 69 | Traits::Advance(it, code_units); |
446 | 69 | index += code_units; |
447 | 69 | ++offset; |
448 | 69 | goto supplemental; |
449 | 4.33M | } |
450 | 309 | line_offsets.push_back(offset + 1); |
451 | 309 | return std::make_unique<BasicPlaneSource>( |
452 | 309 | std::move(description), std::move(line_offsets), std::move(data16)); |
453 | 321 | supplemental: |
454 | 9.29M | while (index < text_size) { |
455 | 9.29M | std::tie(code_point, code_units) = internal::Utf8Decode(it); |
456 | 9.29M | if (ABSL_PREDICT_FALSE(code_point == |
457 | 9.29M | internal::kUnicodeReplacementCharacter && |
458 | 9.29M | code_units == 1)) { |
459 | | // Thats an invalid UTF-8 encoding. |
460 | 5 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); |
461 | 5 | } |
462 | 9.29M | if (code_point == '\n') { |
463 | 5.57M | line_offsets.push_back(offset + 1); |
464 | 5.57M | } |
465 | 9.29M | data32.push_back(code_point); |
466 | 9.29M | Traits::Advance(it, code_units); |
467 | 9.29M | index += code_units; |
468 | 9.29M | ++offset; |
469 | 9.29M | } |
470 | 316 | line_offsets.push_back(offset + 1); |
471 | 316 | return std::make_unique<SupplementalPlaneSource>( |
472 | 316 | std::move(description), std::move(line_offsets), std::move(data32)); |
473 | 321 | } source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long) Line | Count | Source | 309 | 4.23k | const size_t text_size) { | 310 | 4.23k | if (ABSL_PREDICT_FALSE( | 311 | 4.23k | text_size > | 312 | 4.23k | static_cast<size_t>(std::numeric_limits<int32_t>::max()))) { | 313 | 0 | return absl::InvalidArgumentError("expression larger than 2GiB limit"); | 314 | 0 | } | 315 | 4.23k | using Traits = SourceTextTraits<T>; | 316 | 4.23k | size_t index = 0; | 317 | 4.23k | typename Traits::iterator_type it = Traits::Begin(text); | 318 | 4.23k | SourcePosition offset = 0; | 319 | 4.23k | char32_t code_point; | 320 | 4.23k | size_t code_units; | 321 | 4.23k | std::vector<uint8_t> data8; | 322 | 4.23k | std::vector<char16_t> data16; | 323 | 4.23k | std::vector<char32_t> data32; | 324 | 4.23k | absl::InlinedVector<SourcePosition, 1> line_offsets; | 325 | 39.6M | while (index < text_size) { | 326 | 39.6M | std::tie(code_point, code_units) = cel::internal::Utf8Decode(it); | 327 | 39.6M | if (ABSL_PREDICT_FALSE(code_point == | 328 | 39.6M | cel::internal::kUnicodeReplacementCharacter && | 329 | 39.6M | code_units == 1)) { | 330 | | // Thats an invalid UTF-8 encoding. | 331 | 14 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 332 | 14 | } | 333 | 39.6M | if (code_point == '\n') { | 334 | 18.7M | line_offsets.push_back(offset + 1); | 335 | 18.7M | } | 336 | 39.6M | if (code_point <= 0x7f) { | 337 | 39.6M | Traits::Advance(it, code_units); | 338 | 39.6M | index += code_units; | 339 | 39.6M | ++offset; | 340 | 39.6M | continue; | 341 | 39.6M | } | 342 | 748 | if (code_point <= 0xff) { | 343 | 187 | data8.reserve(text_size); | 344 | 187 | Traits::AppendTo(data8, text, index); | 345 | 187 | data8.push_back(static_cast<uint8_t>(code_point)); | 346 | 187 | Traits::Advance(it, code_units); | 347 | 187 | index += code_units; | 348 | 187 | ++offset; | 349 | 187 | goto latin1; | 350 | 187 | } | 351 | 561 | if (code_point <= 0xffff) { | 352 | 351 | data16.reserve(text_size); | 353 | 1.94M | for (size_t offset = 0; offset < index; offset++) { | 354 | 1.94M | data16.push_back(static_cast<uint8_t>(text[offset])); | 355 | 1.94M | } | 356 | 351 | data16.push_back(static_cast<char16_t>(code_point)); | 357 | 351 | Traits::Advance(it, code_units); | 358 | 351 | index += code_units; | 359 | 351 | ++offset; | 360 | 351 | goto basic; | 361 | 351 | } | 362 | 210 | data32.reserve(text_size); | 363 | 1.88M | for (size_t offset = 0; offset < index; offset++) { | 364 | 1.88M | data32.push_back(static_cast<char32_t>(text[offset])); | 365 | 1.88M | } | 366 | 210 | data32.push_back(code_point); | 367 | 210 | Traits::Advance(it, code_units); | 368 | 210 | index += code_units; | 369 | 210 | ++offset; | 370 | 210 | goto supplemental; | 371 | 561 | } | 372 | 3.47k | line_offsets.push_back(offset + 1); | 373 | 3.47k | return std::make_unique<AsciiSource>( | 374 | 3.47k | std::move(description), std::move(line_offsets), Traits::ToVector(text)); | 375 | 187 | latin1: | 376 | 4.06M | while (index < text_size) { | 377 | 4.06M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 378 | 4.06M | if (ABSL_PREDICT_FALSE(code_point == | 379 | 4.06M | internal::kUnicodeReplacementCharacter && | 380 | 4.06M | code_units == 1)) { | 381 | | // Thats an invalid UTF-8 encoding. | 382 | 4 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 383 | 4 | } | 384 | 4.06M | if (code_point == '\n') { | 385 | 3.30M | line_offsets.push_back(offset + 1); | 386 | 3.30M | } | 387 | 4.06M | if (code_point <= 0xff) { | 388 | 4.06M | data8.push_back(static_cast<uint8_t>(code_point)); | 389 | 4.06M | Traits::Advance(it, code_units); | 390 | 4.06M | index += code_units; | 391 | 4.06M | ++offset; | 392 | 4.06M | continue; | 393 | 4.06M | } | 394 | 78 | if (code_point <= 0xffff) { | 395 | 36 | data16.reserve(text_size); | 396 | 1.23M | for (const auto& value : data8) { | 397 | 1.23M | data16.push_back(value); | 398 | 1.23M | } | 399 | 36 | std::vector<uint8_t>().swap(data8); | 400 | 36 | data16.push_back(static_cast<char16_t>(code_point)); | 401 | 36 | Traits::Advance(it, code_units); | 402 | 36 | index += code_units; | 403 | 36 | ++offset; | 404 | 36 | goto basic; | 405 | 36 | } | 406 | 42 | data32.reserve(text_size); | 407 | 895k | for (const auto& value : data8) { | 408 | 895k | data32.push_back(value); | 409 | 895k | } | 410 | 42 | std::vector<uint8_t>().swap(data8); | 411 | 42 | data32.push_back(code_point); | 412 | 42 | Traits::Advance(it, code_units); | 413 | 42 | index += code_units; | 414 | 42 | ++offset; | 415 | 42 | goto supplemental; | 416 | 78 | } | 417 | 105 | line_offsets.push_back(offset + 1); | 418 | 105 | return std::make_unique<Latin1Source>( | 419 | 105 | std::move(description), std::move(line_offsets), std::move(data8)); | 420 | 387 | basic: | 421 | 4.33M | while (index < text_size) { | 422 | 4.33M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 423 | 4.33M | if (ABSL_PREDICT_FALSE(code_point == | 424 | 4.33M | internal::kUnicodeReplacementCharacter && | 425 | 4.33M | code_units == 1)) { | 426 | | // Thats an invalid UTF-8 encoding. | 427 | 9 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 428 | 9 | } | 429 | 4.33M | if (code_point == '\n') { | 430 | 1.97M | line_offsets.push_back(offset + 1); | 431 | 1.97M | } | 432 | 4.33M | if (code_point <= 0xffff) { | 433 | 4.33M | data16.push_back(static_cast<char16_t>(code_point)); | 434 | 4.33M | Traits::Advance(it, code_units); | 435 | 4.33M | index += code_units; | 436 | 4.33M | ++offset; | 437 | 4.33M | continue; | 438 | 4.33M | } | 439 | 69 | data32.reserve(text_size); | 440 | 824k | for (const auto& value : data16) { | 441 | 824k | data32.push_back(static_cast<char32_t>(value)); | 442 | 824k | } | 443 | 69 | std::vector<char16_t>().swap(data16); | 444 | 69 | data32.push_back(code_point); | 445 | 69 | Traits::Advance(it, code_units); | 446 | 69 | index += code_units; | 447 | 69 | ++offset; | 448 | 69 | goto supplemental; | 449 | 4.33M | } | 450 | 309 | line_offsets.push_back(offset + 1); | 451 | 309 | return std::make_unique<BasicPlaneSource>( | 452 | 309 | std::move(description), std::move(line_offsets), std::move(data16)); | 453 | 321 | supplemental: | 454 | 9.29M | while (index < text_size) { | 455 | 9.29M | std::tie(code_point, code_units) = internal::Utf8Decode(it); | 456 | 9.29M | if (ABSL_PREDICT_FALSE(code_point == | 457 | 9.29M | internal::kUnicodeReplacementCharacter && | 458 | 9.29M | code_units == 1)) { | 459 | | // Thats an invalid UTF-8 encoding. | 460 | 5 | return absl::InvalidArgumentError("cannot parse malformed UTF-8 input"); | 461 | 5 | } | 462 | 9.29M | if (code_point == '\n') { | 463 | 5.57M | line_offsets.push_back(offset + 1); | 464 | 5.57M | } | 465 | 9.29M | data32.push_back(code_point); | 466 | 9.29M | Traits::Advance(it, code_units); | 467 | 9.29M | index += code_units; | 468 | 9.29M | ++offset; | 469 | 9.29M | } | 470 | 316 | line_offsets.push_back(offset + 1); | 471 | 316 | return std::make_unique<SupplementalPlaneSource>( | 472 | 316 | std::move(description), std::move(line_offsets), std::move(data32)); | 473 | 321 | } |
Unexecuted instantiation: source.cc:absl::lts_20250512::StatusOr<std::__1::unique_ptr<cel::Source, std::__1::default_delete<cel::Source> > > cel::common_internal::(anonymous namespace)::NewSourceImpl<absl::lts_20250512::Cord>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20250512::Cord const&, unsigned long) |
474 | | |
475 | | } // namespace |
476 | | |
477 | | } // namespace common_internal |
478 | | |
479 | | absl::optional<SourceLocation> Source::GetLocation( |
480 | 58.4k | SourcePosition position) const { |
481 | 58.4k | if (auto line_and_offset = FindLine(position); |
482 | 58.4k | ABSL_PREDICT_TRUE(line_and_offset.has_value())) { |
483 | 56.5k | return SourceLocation{line_and_offset->first, |
484 | 56.5k | position - line_and_offset->second}; |
485 | 56.5k | } |
486 | 1.91k | return absl::nullopt; |
487 | 58.4k | } |
488 | | |
489 | | absl::optional<SourcePosition> Source::GetPosition( |
490 | 1.78M | const SourceLocation& location) const { |
491 | 1.78M | if (ABSL_PREDICT_FALSE(location.line < 1 || location.column < 0)) { |
492 | 0 | return absl::nullopt; |
493 | 0 | } |
494 | 1.78M | if (auto position = FindLinePosition(location.line); |
495 | 1.78M | ABSL_PREDICT_TRUE(position.has_value())) { |
496 | 1.78M | return *position + location.column; |
497 | 1.78M | } |
498 | 0 | return absl::nullopt; |
499 | 1.78M | } |
500 | | |
501 | 58.4k | absl::optional<std::string> Source::Snippet(int32_t line) const { |
502 | 58.4k | auto content = this->content(); |
503 | 58.4k | auto start = FindLinePosition(line); |
504 | 58.4k | if (ABSL_PREDICT_FALSE(!start.has_value() || content.empty())) { |
505 | 1.91k | return absl::nullopt; |
506 | 1.91k | } |
507 | 56.5k | auto end = FindLinePosition(line + 1); |
508 | 56.5k | if (end.has_value()) { |
509 | 13.3k | return content.ToString(*start, *end - 1); |
510 | 13.3k | } |
511 | 43.1k | return content.ToString(*start); |
512 | 56.5k | } |
513 | | |
514 | 58.4k | std::string Source::DisplayErrorLocation(SourceLocation location) const { |
515 | 58.4k | constexpr char32_t kDot = '.'; |
516 | 58.4k | constexpr char32_t kHat = '^'; |
517 | | |
518 | 58.4k | constexpr char32_t kWideDot = 0xff0e; |
519 | 58.4k | constexpr char32_t kWideHat = 0xff3e; |
520 | 58.4k | absl::optional<std::string> snippet = Snippet(location.line); |
521 | 58.4k | if (!snippet || snippet->empty()) { |
522 | 2.15k | return ""; |
523 | 2.15k | } |
524 | | |
525 | 56.3k | *snippet = absl::StrReplaceAll(*snippet, {{"\t", " "}}); |
526 | 56.3k | absl::string_view snippet_view(*snippet); |
527 | 56.3k | std::string result; |
528 | 56.3k | absl::StrAppend(&result, "\n | ", *snippet); |
529 | 56.3k | absl::StrAppend(&result, "\n | "); |
530 | | |
531 | 56.3k | std::string index_line; |
532 | 236M | for (int32_t i = 0; i < location.column && !snippet_view.empty(); ++i) { |
533 | 236M | size_t count; |
534 | 236M | std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view); |
535 | 236M | snippet_view.remove_prefix(count); |
536 | 236M | if (count > 1) { |
537 | 94.6k | internal::Utf8Encode(index_line, kWideDot); |
538 | 236M | } else { |
539 | 236M | internal::Utf8Encode(index_line, kDot); |
540 | 236M | } |
541 | 236M | } |
542 | 56.3k | size_t count = 0; |
543 | 56.3k | if (!snippet_view.empty()) { |
544 | 54.2k | std::tie(std::ignore, count) = internal::Utf8Decode(snippet_view); |
545 | 54.2k | } |
546 | 56.3k | if (count > 1) { |
547 | 1.68k | internal::Utf8Encode(index_line, kWideHat); |
548 | 54.6k | } else { |
549 | 54.6k | internal::Utf8Encode(index_line, kHat); |
550 | 54.6k | } |
551 | 56.3k | absl::StrAppend(&result, index_line); |
552 | 56.3k | return result; |
553 | 58.4k | } |
554 | | |
555 | 1.89M | absl::optional<SourcePosition> Source::FindLinePosition(int32_t line) const { |
556 | 1.89M | if (ABSL_PREDICT_FALSE(line < 1)) { |
557 | 1.91k | return absl::nullopt; |
558 | 1.91k | } |
559 | 1.89M | if (line == 1) { |
560 | 1.60M | return SourcePosition{0}; |
561 | 1.60M | } |
562 | 288k | const auto line_offsets = this->line_offsets(); |
563 | 288k | if (ABSL_PREDICT_TRUE(line <= static_cast<int32_t>(line_offsets.size()))) { |
564 | 245k | return line_offsets[static_cast<size_t>(line - 2)]; |
565 | 245k | } |
566 | 43.1k | return absl::nullopt; |
567 | 288k | } |
568 | | |
569 | | absl::optional<std::pair<int32_t, SourcePosition>> Source::FindLine( |
570 | 58.4k | SourcePosition position) const { |
571 | 58.4k | if (ABSL_PREDICT_FALSE(position < 0)) { |
572 | 1.91k | return absl::nullopt; |
573 | 1.91k | } |
574 | 56.5k | int32_t line = 1; |
575 | 56.5k | const auto line_offsets = this->line_offsets(); |
576 | 806M | for (const auto& line_offset : line_offsets) { |
577 | 806M | if (line_offset > position) { |
578 | 56.5k | break; |
579 | 56.5k | } |
580 | 806M | ++line; |
581 | 806M | } |
582 | 56.5k | if (line == 1) { |
583 | 41.6k | return std::make_pair(line, SourcePosition{0}); |
584 | 41.6k | } |
585 | 14.9k | return std::make_pair(line, line_offsets[static_cast<size_t>(line) - 2]); |
586 | 56.5k | } |
587 | | |
588 | | absl::StatusOr<absl_nonnull SourcePtr> NewSource(absl::string_view content, |
589 | 4.23k | std::string description) { |
590 | 4.23k | return common_internal::NewSourceImpl(std::move(description), content, |
591 | 4.23k | content.size()); |
592 | 4.23k | } |
593 | | |
594 | | absl::StatusOr<absl_nonnull SourcePtr> NewSource(const absl::Cord& content, |
595 | 0 | std::string description) { |
596 | 0 | return common_internal::NewSourceImpl(std::move(description), content, |
597 | 0 | content.size()); |
598 | 0 | } |
599 | | |
600 | | } // namespace cel |