/proc/self/cwd/parser/parser.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2021 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "parser/parser.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <any> |
19 | | #include <cstdint> |
20 | | #include <memory> |
21 | | #include <string> |
22 | | #include <utility> |
23 | | #include <vector> |
24 | | |
25 | | #include "google/api/expr/v1alpha1/syntax.pb.h" |
26 | | #include "google/protobuf/struct.pb.h" |
27 | | #include "absl/base/macros.h" |
28 | | #include "absl/base/optimization.h" |
29 | | #include "absl/memory/memory.h" |
30 | | #include "absl/status/status.h" |
31 | | #include "absl/status/statusor.h" |
32 | | #include "absl/strings/match.h" |
33 | | #include "absl/strings/numbers.h" |
34 | | #include "absl/strings/str_format.h" |
35 | | #include "absl/strings/str_join.h" |
36 | | #include "absl/strings/str_replace.h" |
37 | | #include "absl/strings/string_view.h" |
38 | | #include "absl/types/optional.h" |
39 | | #include "absl/types/variant.h" |
40 | | #include "antlr4-runtime.h" |
41 | | #include "common/operators.h" |
42 | | #include "internal/status_macros.h" |
43 | | #include "internal/strings.h" |
44 | | #include "internal/unicode.h" |
45 | | #include "internal/utf8.h" |
46 | | #include "parser/internal/CelBaseVisitor.h" |
47 | | #include "parser/internal/CelLexer.h" |
48 | | #include "parser/internal/CelParser.h" |
49 | | #include "parser/macro.h" |
50 | | #include "parser/options.h" |
51 | | #include "parser/source_factory.h" |
52 | | |
53 | | namespace google::api::expr::parser { |
54 | | |
55 | | namespace { |
56 | | |
57 | | using ::antlr4::CharStream; |
58 | | using ::antlr4::CommonTokenStream; |
59 | | using ::antlr4::DefaultErrorStrategy; |
60 | | using ::antlr4::ParseCancellationException; |
61 | | using ::antlr4::Parser; |
62 | | using ::antlr4::ParserRuleContext; |
63 | | using ::antlr4::Token; |
64 | | using ::antlr4::misc::IntervalSet; |
65 | | using ::antlr4::tree::ErrorNode; |
66 | | using ::antlr4::tree::ParseTreeListener; |
67 | | using ::antlr4::tree::TerminalNode; |
68 | | using ::cel_parser_internal::CelBaseVisitor; |
69 | | using ::cel_parser_internal::CelLexer; |
70 | | using ::cel_parser_internal::CelParser; |
71 | | using common::CelOperator; |
72 | | using common::ReverseLookupOperator; |
73 | | using ::google::api::expr::v1alpha1::Expr; |
74 | | using ::google::api::expr::v1alpha1::ParsedExpr; |
75 | | |
76 | | class CodePointBuffer final { |
77 | | public: |
78 | | explicit CodePointBuffer(absl::string_view data) |
79 | 7.51k | : storage_(absl::in_place_index<0>, data) {} |
80 | | |
81 | | explicit CodePointBuffer(std::string data) |
82 | 185 | : storage_(absl::in_place_index<1>, std::move(data)) {} |
83 | | |
84 | | explicit CodePointBuffer(std::u16string data) |
85 | 727 | : storage_(absl::in_place_index<2>, std::move(data)) {} |
86 | | |
87 | | explicit CodePointBuffer(std::u32string data) |
88 | 897 | : storage_(absl::in_place_index<3>, std::move(data)) {} |
89 | | |
90 | 9.32k | size_t size() const { return absl::visit(SizeVisitor{}, storage_); } |
91 | | |
92 | 134M | char32_t at(size_t index) const { |
93 | 134M | ABSL_ASSERT(index < size()); |
94 | 134M | return absl::visit(AtVisitor{index}, storage_); |
95 | 134M | } |
96 | | |
97 | 6.56M | std::string ToString(size_t begin, size_t end) const { |
98 | 6.56M | ABSL_ASSERT(begin <= end); |
99 | 6.56M | ABSL_ASSERT(begin < size()); |
100 | 6.56M | ABSL_ASSERT(end <= size()); |
101 | 6.56M | return absl::visit(ToStringVisitor{begin, end}, storage_); |
102 | 6.56M | } |
103 | | |
104 | | private: |
105 | | struct SizeVisitor final { |
106 | 7.51k | size_t operator()(absl::string_view ascii) const { return ascii.size(); } |
107 | | |
108 | 185 | size_t operator()(const std::string& latin1) const { return latin1.size(); } |
109 | | |
110 | 727 | size_t operator()(const std::u16string& basic) const { |
111 | 727 | return basic.size(); |
112 | 727 | } |
113 | | |
114 | 897 | size_t operator()(const std::u32string& supplemental) const { |
115 | 897 | return supplemental.size(); |
116 | 897 | } |
117 | | }; |
118 | | |
119 | | struct AtVisitor final { |
120 | | const size_t index; |
121 | | |
122 | 72.3M | size_t operator()(absl::string_view ascii) const { |
123 | 72.3M | return static_cast<uint8_t>(ascii[index]); |
124 | 72.3M | } |
125 | | |
126 | 2.76M | size_t operator()(const std::string& latin1) const { |
127 | 2.76M | return static_cast<uint8_t>(latin1[index]); |
128 | 2.76M | } |
129 | | |
130 | 10.1M | size_t operator()(const std::u16string& basic) const { |
131 | 10.1M | return basic[index]; |
132 | 10.1M | } |
133 | | |
134 | 49.5M | size_t operator()(const std::u32string& supplemental) const { |
135 | 49.5M | return supplemental[index]; |
136 | 49.5M | } |
137 | | }; |
138 | | |
139 | | struct ToStringVisitor final { |
140 | | const size_t begin; |
141 | | const size_t end; |
142 | | |
143 | 3.20M | std::string operator()(absl::string_view ascii) const { |
144 | 3.20M | return std::string(ascii.substr(begin, end - begin)); |
145 | 3.20M | } |
146 | | |
147 | 82.0k | std::string operator()(const std::string& latin1) const { |
148 | 82.0k | std::string result; |
149 | 82.0k | result.reserve((end - begin) * |
150 | 82.0k | 2); // Worst case is 2 code units per code point. |
151 | 1.54M | for (size_t index = begin; index < end; index++) { |
152 | 1.46M | cel::internal::Utf8Encode( |
153 | 1.46M | &result, |
154 | 1.46M | static_cast<char32_t>(static_cast<uint8_t>(latin1[index]))); |
155 | 1.46M | } |
156 | 82.0k | result.shrink_to_fit(); |
157 | 82.0k | return result; |
158 | 82.0k | } |
159 | | |
160 | 858k | std::string operator()(const std::u16string& basic) const { |
161 | 858k | std::string result; |
162 | 858k | result.reserve((end - begin) * |
163 | 858k | 3); // Worst case is 3 code units per code point. |
164 | 4.19M | for (size_t index = begin; index < end; index++) { |
165 | 3.33M | cel::internal::Utf8Encode(&result, static_cast<char32_t>(basic[index])); |
166 | 3.33M | } |
167 | 858k | result.shrink_to_fit(); |
168 | 858k | return result; |
169 | 858k | } |
170 | | |
171 | 2.41M | std::string operator()(const std::u32string& supplemental) const { |
172 | 2.41M | std::string result; |
173 | 2.41M | result.reserve((end - begin) * |
174 | 2.41M | 4); // Worst case is 4 code units per code point. |
175 | 12.2M | for (size_t index = begin; index < end; index++) { |
176 | 9.81M | cel::internal::Utf8Encode(&result, supplemental[index]); |
177 | 9.81M | } |
178 | 2.41M | result.shrink_to_fit(); |
179 | 2.41M | return result; |
180 | 2.41M | } |
181 | | }; |
182 | | |
183 | | absl::variant<absl::string_view, std::string, std::u16string, std::u32string> |
184 | | storage_; |
185 | | }; |
186 | | |
187 | | // Given a UTF-8 encoded string and produces a CodePointBuffer which provides |
188 | | // constant time indexing to each code point. If all code points fall in the |
189 | | // ASCII range then the view is used as is. If all code points fall in the |
190 | | // Latin-1 range then the text is represented as std::string. If all code points |
191 | | // fall in the BMP then the text is represented as std::u16string. Otherwise the |
192 | | // text is represented as std::u32string. This is much more efficient than the |
193 | | // default ANTLRv4 implementation which unconditionally converts to |
194 | | // std::u32string. |
195 | 9.61k | absl::StatusOr<CodePointBuffer> MakeCodePointBuffer(absl::string_view text) { |
196 | 9.61k | size_t index = 0; |
197 | 9.61k | char32_t code_point; |
198 | 9.61k | size_t code_units; |
199 | 9.61k | std::string data8; |
200 | 9.61k | std::u16string data16; |
201 | 9.61k | std::u32string data32; |
202 | 48.9M | while (index < text.size()) { |
203 | 48.9M | std::tie(code_point, code_units) = |
204 | 48.9M | cel::internal::Utf8Decode(text.substr(index)); |
205 | 48.9M | if (code_point <= 0x7f) { |
206 | 48.9M | index += code_units; |
207 | 48.9M | continue; |
208 | 48.9M | } |
209 | 2.10k | if (code_point <= 0xff) { |
210 | 525 | data8.reserve(text.size()); |
211 | 525 | data8.append(text.data(), index); |
212 | 525 | data8.push_back(static_cast<char>(static_cast<uint8_t>(code_point))); |
213 | 525 | index += code_units; |
214 | 525 | goto latin1; |
215 | 525 | } |
216 | 1.57k | if (code_point == cel::internal::kUnicodeReplacementCharacter && |
217 | 1.57k | code_units == 1) { |
218 | | // Thats an invalid UTF-8 encoding. |
219 | 39 | return absl::InvalidArgumentError("Cannot parse malformed UTF-8 input"); |
220 | 39 | } |
221 | 1.54k | if (code_point <= 0xffff) { |
222 | 924 | data16.reserve(text.size()); |
223 | 8.42M | for (size_t offset = 0; offset < index; offset++) { |
224 | 8.42M | data16.push_back(static_cast<uint8_t>(text[offset])); |
225 | 8.42M | } |
226 | 924 | data16.push_back(static_cast<char16_t>(code_point)); |
227 | 924 | index += code_units; |
228 | 924 | goto basic; |
229 | 924 | } |
230 | 616 | data32.reserve(text.size()); |
231 | 4.45M | for (size_t offset = 0; offset < index; offset++) { |
232 | 4.45M | data32.push_back(static_cast<char32_t>(text[offset])); |
233 | 4.45M | } |
234 | 616 | data32.push_back(code_point); |
235 | 616 | index += code_units; |
236 | 616 | goto supplemental; |
237 | 1.54k | } |
238 | 7.51k | return CodePointBuffer(text); |
239 | 525 | latin1: |
240 | 6.74M | while (index < text.size()) { |
241 | 6.74M | std::tie(code_point, code_units) = |
242 | 6.74M | cel::internal::Utf8Decode(text.substr(index)); |
243 | 6.74M | if (code_point <= 0xff) { |
244 | 6.74M | data8.push_back(static_cast<char>(static_cast<uint8_t>(code_point))); |
245 | 6.74M | index += code_units; |
246 | 6.74M | continue; |
247 | 6.74M | } |
248 | 340 | if (code_point == cel::internal::kUnicodeReplacementCharacter && |
249 | 340 | code_units == 1) { |
250 | | // Thats an invalid UTF-8 encoding. |
251 | 12 | return absl::InvalidArgumentError("Cannot parse malformed UTF-8 input"); |
252 | 12 | } |
253 | 328 | if (code_point <= 0xffff) { |
254 | 183 | data16.reserve(text.size()); |
255 | 2.92M | for (const auto& value : data8) { |
256 | 2.92M | data16.push_back(static_cast<uint8_t>(value)); |
257 | 2.92M | } |
258 | 183 | std::string().swap(data8); |
259 | 183 | data16.push_back(static_cast<char16_t>(code_point)); |
260 | 183 | index += code_units; |
261 | 183 | goto basic; |
262 | 183 | } |
263 | 145 | data32.reserve(text.size()); |
264 | 4.38M | for (const auto& value : data8) { |
265 | 4.38M | data32.push_back(static_cast<uint8_t>(value)); |
266 | 4.38M | } |
267 | 145 | std::string().swap(data8); |
268 | 145 | data32.push_back(code_point); |
269 | 145 | index += code_units; |
270 | 145 | goto supplemental; |
271 | 328 | } |
272 | 185 | return CodePointBuffer(std::move(data8)); |
273 | 1.10k | basic: |
274 | 12.6M | while (index < text.size()) { |
275 | 12.6M | std::tie(code_point, code_units) = |
276 | 12.6M | cel::internal::Utf8Decode(text.substr(index)); |
277 | 12.6M | if (code_point == cel::internal::kUnicodeReplacementCharacter && |
278 | 12.6M | code_units == 1) { |
279 | | // Thats an invalid UTF-8 encoding. |
280 | 80 | return absl::InvalidArgumentError("Cannot parse malformed UTF-8 input"); |
281 | 80 | } |
282 | 12.6M | if (code_point <= 0xffff) { |
283 | 12.6M | data16.push_back(static_cast<char16_t>(code_point)); |
284 | 12.6M | index += code_units; |
285 | 12.6M | continue; |
286 | 12.6M | } |
287 | 300 | data32.reserve(text.size()); |
288 | 5.78M | for (const auto& value : data16) { |
289 | 5.78M | data32.push_back(static_cast<char32_t>(value)); |
290 | 5.78M | } |
291 | 300 | std::u16string().swap(data16); |
292 | 300 | data32.push_back(code_point); |
293 | 300 | index += code_units; |
294 | 300 | goto supplemental; |
295 | 12.6M | } |
296 | 727 | return CodePointBuffer(std::move(data16)); |
297 | 1.06k | supplemental: |
298 | 31.7M | while (index < text.size()) { |
299 | 31.7M | std::tie(code_point, code_units) = |
300 | 31.7M | cel::internal::Utf8Decode(text.substr(index)); |
301 | 31.7M | if (code_point == cel::internal::kUnicodeReplacementCharacter && |
302 | 31.7M | code_units == 1) { |
303 | | // Thats an invalid UTF-8 encoding. |
304 | 164 | return absl::InvalidArgumentError("Cannot parse malformed UTF-8 input"); |
305 | 164 | } |
306 | 31.7M | data32.push_back(code_point); |
307 | 31.7M | index += code_units; |
308 | 31.7M | } |
309 | 897 | return CodePointBuffer(std::move(data32)); |
310 | 1.06k | } |
311 | | |
312 | | class CodePointStream final : public CharStream { |
313 | | public: |
314 | | CodePointStream(CodePointBuffer* buffer, absl::string_view source_name) |
315 | | : buffer_(buffer), |
316 | | source_name_(source_name), |
317 | | size_(buffer_->size()), |
318 | 9.32k | index_(0) {} |
319 | | |
320 | 57.9M | void consume() override { |
321 | 57.9M | if (ABSL_PREDICT_FALSE(index_ >= size_)) { |
322 | 0 | ABSL_ASSERT(LA(1) == IntStream::EOF); |
323 | 0 | throw antlr4::IllegalStateException("cannot consume EOF"); |
324 | 0 | } |
325 | 57.9M | index_++; |
326 | 57.9M | } |
327 | | |
328 | 134M | size_t LA(ssize_t i) override { |
329 | 134M | if (ABSL_PREDICT_FALSE(i == 0)) { |
330 | 0 | return 0; |
331 | 0 | } |
332 | 134M | auto p = static_cast<ssize_t>(index_); |
333 | 134M | if (i < 0) { |
334 | 0 | i++; |
335 | 0 | if (p + i - 1 < 0) { |
336 | 0 | return IntStream::EOF; |
337 | 0 | } |
338 | 0 | } |
339 | 134M | if (p + i - 1 >= static_cast<ssize_t>(size_)) { |
340 | 19.4k | return IntStream::EOF; |
341 | 19.4k | } |
342 | 134M | return buffer_->at(static_cast<size_t>(p + i - 1)); |
343 | 134M | } |
344 | | |
345 | 16.9M | ssize_t mark() override { return -1; } |
346 | | |
347 | 16.9M | void release(ssize_t marker) override {} |
348 | | |
349 | 46.8M | size_t index() override { return index_; } |
350 | | |
351 | 7.51M | void seek(size_t index) override { index_ = std::min(index, size_); } |
352 | | |
353 | 4.64M | size_t size() override { return size_; } |
354 | | |
355 | 0 | std::string getSourceName() const override { |
356 | 0 | return source_name_.empty() ? IntStream::UNKNOWN_SOURCE_NAME |
357 | 0 | : std::string(source_name_); |
358 | 0 | } |
359 | | |
360 | 6.56M | std::string getText(const antlr4::misc::Interval& interval) override { |
361 | 6.56M | if (ABSL_PREDICT_FALSE(interval.a < 0 || interval.b < 0)) { |
362 | 0 | return std::string(); |
363 | 0 | } |
364 | 6.56M | size_t start = static_cast<size_t>(interval.a); |
365 | 6.56M | if (ABSL_PREDICT_FALSE(start >= size_)) { |
366 | 0 | return std::string(); |
367 | 0 | } |
368 | 6.56M | size_t stop = static_cast<size_t>(interval.b); |
369 | 6.56M | if (ABSL_PREDICT_FALSE(stop >= size_)) { |
370 | 571 | stop = size_ - 1; |
371 | 571 | } |
372 | 6.56M | return buffer_->ToString(start, stop + 1); |
373 | 6.56M | } |
374 | | |
375 | 0 | std::string toString() const override { return buffer_->ToString(0, size_); } |
376 | | |
377 | | private: |
378 | | CodePointBuffer* const buffer_; |
379 | | const absl::string_view source_name_; |
380 | | const size_t size_; |
381 | | size_t index_; |
382 | | }; |
383 | | |
384 | | // Scoped helper for incrementing the parse recursion count. |
385 | | // Increments on creation, decrements on destruction (stack unwind). |
386 | | class ScopedIncrement final { |
387 | | public: |
388 | | explicit ScopedIncrement(int& recursion_depth) |
389 | 14.2M | : recursion_depth_(recursion_depth) { |
390 | 14.2M | ++recursion_depth_; |
391 | 14.2M | } |
392 | | |
393 | 14.2M | ~ScopedIncrement() { --recursion_depth_; } |
394 | | |
395 | | private: |
396 | | int& recursion_depth_; |
397 | | }; |
398 | | |
399 | | // balancer performs tree balancing on operators whose arguments are of equal |
400 | | // precedence. |
401 | | // |
402 | | // The purpose of the balancer is to ensure a compact serialization format for |
403 | | // the logical &&, || operators which have a tendency to create long DAGs which |
404 | | // are skewed in one direction. Since the operators are commutative re-ordering |
405 | | // the terms *must not* affect the evaluation result. |
406 | | // |
407 | | // Based on code from //third_party/cel/go/parser/helper.go |
408 | | class ExpressionBalancer final { |
409 | | public: |
410 | | ExpressionBalancer(std::shared_ptr<SourceFactory> sf, std::string function, |
411 | | Expr expr); |
412 | | |
413 | | // addTerm adds an operation identifier and term to the set of terms to be |
414 | | // balanced. |
415 | | void AddTerm(int64_t op, Expr term); |
416 | | |
417 | | // balance creates a balanced tree from the sub-terms and returns the final |
418 | | // Expr value. |
419 | | Expr Balance(); |
420 | | |
421 | | private: |
422 | | // balancedTree recursively balances the terms provided to a commutative |
423 | | // operator. |
424 | | Expr BalancedTree(int lo, int hi); |
425 | | |
426 | | private: |
427 | | std::shared_ptr<SourceFactory> sf_; |
428 | | std::string function_; |
429 | | std::vector<Expr> terms_; |
430 | | std::vector<int64_t> ops_; |
431 | | }; |
432 | | |
433 | | ExpressionBalancer::ExpressionBalancer(std::shared_ptr<SourceFactory> sf, |
434 | | std::string function, Expr expr) |
435 | | : sf_(std::move(sf)), |
436 | | function_(std::move(function)), |
437 | | terms_{std::move(expr)}, |
438 | 1.97k | ops_{} {} |
439 | | |
440 | 145k | void ExpressionBalancer::AddTerm(int64_t op, Expr term) { |
441 | 145k | terms_.push_back(std::move(term)); |
442 | 145k | ops_.push_back(op); |
443 | 145k | } |
444 | | |
445 | 1.97k | Expr ExpressionBalancer::Balance() { |
446 | 1.97k | if (terms_.size() == 1) { |
447 | 0 | return terms_[0]; |
448 | 0 | } |
449 | 1.97k | return BalancedTree(0, ops_.size() - 1); |
450 | 1.97k | } |
451 | | |
452 | 145k | Expr ExpressionBalancer::BalancedTree(int lo, int hi) { |
453 | 145k | int mid = (lo + hi + 1) / 2; |
454 | | |
455 | 145k | Expr left; |
456 | 145k | if (mid == lo) { |
457 | 64.4k | left = terms_[mid]; |
458 | 81.0k | } else { |
459 | 81.0k | left = BalancedTree(lo, mid - 1); |
460 | 81.0k | } |
461 | | |
462 | 145k | Expr right; |
463 | 145k | if (mid == hi) { |
464 | 83.0k | right = terms_[mid + 1]; |
465 | 83.0k | } else { |
466 | 62.4k | right = BalancedTree(mid + 1, hi); |
467 | 62.4k | } |
468 | 145k | return sf_->NewGlobalCall(ops_[mid], function_, |
469 | 145k | {std::move(left), std::move(right)}); |
470 | 145k | } |
471 | | |
472 | | class ParserVisitor final : public CelBaseVisitor, |
473 | | public antlr4::BaseErrorListener { |
474 | | public: |
475 | | ParserVisitor(absl::string_view description, absl::string_view expression, |
476 | | const int max_recursion_depth, |
477 | | const std::vector<Macro>& macros = {}, |
478 | | const bool add_macro_calls = false); |
479 | | ~ParserVisitor() override; |
480 | | |
481 | | antlrcpp::Any visit(antlr4::tree::ParseTree* tree) override; |
482 | | |
483 | | antlrcpp::Any visitStart(CelParser::StartContext* ctx) override; |
484 | | antlrcpp::Any visitExpr(CelParser::ExprContext* ctx) override; |
485 | | antlrcpp::Any visitConditionalOr( |
486 | | CelParser::ConditionalOrContext* ctx) override; |
487 | | antlrcpp::Any visitConditionalAnd( |
488 | | CelParser::ConditionalAndContext* ctx) override; |
489 | | antlrcpp::Any visitRelation(CelParser::RelationContext* ctx) override; |
490 | | antlrcpp::Any visitCalc(CelParser::CalcContext* ctx) override; |
491 | | antlrcpp::Any visitUnary(CelParser::UnaryContext* ctx); |
492 | | antlrcpp::Any visitLogicalNot(CelParser::LogicalNotContext* ctx) override; |
493 | | antlrcpp::Any visitNegate(CelParser::NegateContext* ctx) override; |
494 | | antlrcpp::Any visitSelectOrCall(CelParser::SelectOrCallContext* ctx) override; |
495 | | antlrcpp::Any visitIndex(CelParser::IndexContext* ctx) override; |
496 | | antlrcpp::Any visitCreateMessage( |
497 | | CelParser::CreateMessageContext* ctx) override; |
498 | | antlrcpp::Any visitFieldInitializerList( |
499 | | CelParser::FieldInitializerListContext* ctx) override; |
500 | | antlrcpp::Any visitIdentOrGlobalCall( |
501 | | CelParser::IdentOrGlobalCallContext* ctx) override; |
502 | | antlrcpp::Any visitNested(CelParser::NestedContext* ctx) override; |
503 | | antlrcpp::Any visitCreateList(CelParser::CreateListContext* ctx) override; |
504 | | std::vector<google::api::expr::v1alpha1::Expr> visitList( |
505 | | CelParser::ExprListContext* ctx); |
506 | | antlrcpp::Any visitCreateStruct(CelParser::CreateStructContext* ctx) override; |
507 | | antlrcpp::Any visitConstantLiteral( |
508 | | CelParser::ConstantLiteralContext* ctx) override; |
509 | | antlrcpp::Any visitPrimaryExpr(CelParser::PrimaryExprContext* ctx) override; |
510 | | antlrcpp::Any visitMemberExpr(CelParser::MemberExprContext* ctx) override; |
511 | | |
512 | | antlrcpp::Any visitMapInitializerList( |
513 | | CelParser::MapInitializerListContext* ctx) override; |
514 | | antlrcpp::Any visitInt(CelParser::IntContext* ctx) override; |
515 | | antlrcpp::Any visitUint(CelParser::UintContext* ctx) override; |
516 | | antlrcpp::Any visitDouble(CelParser::DoubleContext* ctx) override; |
517 | | antlrcpp::Any visitString(CelParser::StringContext* ctx) override; |
518 | | antlrcpp::Any visitBytes(CelParser::BytesContext* ctx) override; |
519 | | antlrcpp::Any visitBoolTrue(CelParser::BoolTrueContext* ctx) override; |
520 | | antlrcpp::Any visitBoolFalse(CelParser::BoolFalseContext* ctx) override; |
521 | | antlrcpp::Any visitNull(CelParser::NullContext* ctx) override; |
522 | | google::api::expr::v1alpha1::SourceInfo source_info() const; |
523 | | EnrichedSourceInfo enriched_source_info() const; |
524 | | void syntaxError(antlr4::Recognizer* recognizer, |
525 | | antlr4::Token* offending_symbol, size_t line, size_t col, |
526 | | const std::string& msg, std::exception_ptr e) override; |
527 | | bool HasErrored() const; |
528 | | |
529 | | std::string ErrorMessage() const; |
530 | | |
531 | | private: |
532 | | Expr GlobalCallOrMacro(int64_t expr_id, const std::string& function, |
533 | | const std::vector<Expr>& args); |
534 | | Expr ReceiverCallOrMacro(int64_t expr_id, const std::string& function, |
535 | | const Expr& target, const std::vector<Expr>& args); |
536 | | bool ExpandMacro(int64_t expr_id, const std::string& function, |
537 | | const Expr& target, const std::vector<Expr>& args, |
538 | | Expr* macro_expr); |
539 | | std::string ExtractQualifiedName(antlr4::ParserRuleContext* ctx, |
540 | | const Expr* e); |
541 | | |
542 | | private: |
543 | | absl::string_view description_; |
544 | | absl::string_view expression_; |
545 | | std::shared_ptr<SourceFactory> sf_; |
546 | | std::map<std::string, Macro> macros_; |
547 | | int recursion_depth_; |
548 | | const int max_recursion_depth_; |
549 | | const bool add_macro_calls_; |
550 | | }; |
551 | | |
552 | | ParserVisitor::ParserVisitor(absl::string_view description, |
553 | | absl::string_view expression, |
554 | | const int max_recursion_depth, |
555 | | const std::vector<Macro>& macros, |
556 | | const bool add_macro_calls) |
557 | | : description_(description), |
558 | | expression_(expression), |
559 | | sf_(std::make_shared<SourceFactory>(expression)), |
560 | | recursion_depth_(0), |
561 | | max_recursion_depth_(max_recursion_depth), |
562 | 9.21k | add_macro_calls_(add_macro_calls) { |
563 | 64.5k | for (const auto& m : macros) { |
564 | 64.5k | macros_.emplace(m.key(), m); |
565 | 64.5k | } |
566 | 9.21k | } |
567 | | |
568 | 9.21k | ParserVisitor::~ParserVisitor() {} |
569 | | |
570 | | template <typename T, typename = std::enable_if_t< |
571 | | std::is_base_of<antlr4::tree::ParseTree, T>::value>> |
572 | 92.9M | T* tree_as(antlr4::tree::ParseTree* tree) { |
573 | 92.9M | return dynamic_cast<T*>(tree); |
574 | 92.9M | } parser.cc:cel_parser_internal::CelParser::PrimaryExprContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::PrimaryExprContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 5.81M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 5.81M | return dynamic_cast<T*>(tree); | 574 | 5.81M | } |
parser.cc:cel_parser_internal::CelParser::SelectOrCallContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::SelectOrCallContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 94.1k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 94.1k | return dynamic_cast<T*>(tree); | 574 | 94.1k | } |
parser.cc:cel_parser_internal::CelParser::IndexContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::IndexContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 20.9k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 20.9k | return dynamic_cast<T*>(tree); | 574 | 20.9k | } |
parser.cc:cel_parser_internal::CelParser::CreateMessageContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::CreateMessageContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 5.49k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 5.49k | return dynamic_cast<T*>(tree); | 574 | 5.49k | } |
parser.cc:cel_parser_internal::CelParser::NestedContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::NestedContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 2.89M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 2.89M | return dynamic_cast<T*>(tree); | 574 | 2.89M | } |
parser.cc:cel_parser_internal::CelParser::IdentOrGlobalCallContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::IdentOrGlobalCallContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 2.89M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 2.89M | return dynamic_cast<T*>(tree); | 574 | 2.89M | } |
parser.cc:cel_parser_internal::CelParser::CreateListContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::CreateListContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.78M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.78M | return dynamic_cast<T*>(tree); | 574 | 1.78M | } |
parser.cc:cel_parser_internal::CelParser::CreateStructContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::CreateStructContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.78M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.78M | return dynamic_cast<T*>(tree); | 574 | 1.78M | } |
parser.cc:cel_parser_internal::CelParser::ConstantLiteralContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::ConstantLiteralContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.12M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.12M | return dynamic_cast<T*>(tree); | 574 | 1.12M | } |
parser.cc:cel_parser_internal::CelParser::IntContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::IntContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.12M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.12M | return dynamic_cast<T*>(tree); | 574 | 1.12M | } |
parser.cc:cel_parser_internal::CelParser::UintContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::UintContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 525k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 525k | return dynamic_cast<T*>(tree); | 574 | 525k | } |
parser.cc:cel_parser_internal::CelParser::DoubleContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::DoubleContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 523k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 523k | return dynamic_cast<T*>(tree); | 574 | 523k | } |
parser.cc:cel_parser_internal::CelParser::StringContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::StringContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 503k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 503k | return dynamic_cast<T*>(tree); | 574 | 503k | } |
parser.cc:cel_parser_internal::CelParser::BytesContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::BytesContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 5.58k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 5.58k | return dynamic_cast<T*>(tree); | 574 | 5.58k | } |
parser.cc:cel_parser_internal::CelParser::BoolFalseContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::BoolFalseContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.69k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.69k | return dynamic_cast<T*>(tree); | 574 | 1.69k | } |
parser.cc:cel_parser_internal::CelParser::BoolTrueContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::BoolTrueContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 1.17k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 1.17k | return dynamic_cast<T*>(tree); | 574 | 1.17k | } |
parser.cc:cel_parser_internal::CelParser::NullContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::NullContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 819 | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 819 | return dynamic_cast<T*>(tree); | 574 | 819 | } |
parser.cc:cel_parser_internal::CelParser::StartContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::StartContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 14.2M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 14.2M | return dynamic_cast<T*>(tree); | 574 | 14.2M | } |
parser.cc:cel_parser_internal::CelParser::ExprContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::ExprContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 14.1M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 14.1M | return dynamic_cast<T*>(tree); | 574 | 14.1M | } |
parser.cc:cel_parser_internal::CelParser::ConditionalAndContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::ConditionalAndContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 13.1M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 13.1M | return dynamic_cast<T*>(tree); | 574 | 13.1M | } |
parser.cc:cel_parser_internal::CelParser::ConditionalOrContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::ConditionalOrContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 10.9M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 10.9M | return dynamic_cast<T*>(tree); | 574 | 10.9M | } |
parser.cc:cel_parser_internal::CelParser::RelationContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::RelationContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 8.80M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 8.80M | return dynamic_cast<T*>(tree); | 574 | 8.80M | } |
parser.cc:cel_parser_internal::CelParser::CalcContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::CalcContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 6.49M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 6.49M | return dynamic_cast<T*>(tree); | 574 | 6.49M | } |
parser.cc:cel_parser_internal::CelParser::LogicalNotContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::LogicalNotContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 2.98M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 2.98M | return dynamic_cast<T*>(tree); | 574 | 2.98M | } |
parser.cc:cel_parser_internal::CelParser::MemberExprContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::MemberExprContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 2.91M | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 2.91M | return dynamic_cast<T*>(tree); | 574 | 2.91M | } |
parser.cc:cel_parser_internal::CelParser::MapInitializerListContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::MapInitializerListContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 73.7k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 73.7k | return dynamic_cast<T*>(tree); | 574 | 73.7k | } |
parser.cc:cel_parser_internal::CelParser::NegateContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::NegateContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 73.7k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 73.7k | return dynamic_cast<T*>(tree); | 574 | 73.7k | } |
parser.cc:cel_parser_internal::CelParser::UnaryContext* google::api::expr::parser::(anonymous namespace)::tree_as<cel_parser_internal::CelParser::UnaryContext, void>(antlr4::tree::ParseTree*) Line | Count | Source | 572 | 11.6k | T* tree_as(antlr4::tree::ParseTree* tree) { | 573 | 11.6k | return dynamic_cast<T*>(tree); | 574 | 11.6k | } |
Unexecuted instantiation: parser.cc:antlr4::ParserRuleContext* google::api::expr::parser::(anonymous namespace)::tree_as<antlr4::ParserRuleContext, void>(antlr4::tree::ParseTree*) |
575 | | |
576 | 14.2M | antlrcpp::Any ParserVisitor::visit(antlr4::tree::ParseTree* tree) { |
577 | 14.2M | ScopedIncrement inc(recursion_depth_); |
578 | 14.2M | if (recursion_depth_ > max_recursion_depth_) { |
579 | 1.01k | return sf_->ReportError( |
580 | 1.01k | SourceFactory::NoLocation(), |
581 | 1.01k | absl::StrFormat("Exceeded max recursion depth of %d when parsing.", |
582 | 1.01k | max_recursion_depth_)); |
583 | 1.01k | } |
584 | 14.2M | if (auto* ctx = tree_as<CelParser::StartContext>(tree)) { |
585 | 8.59k | return visitStart(ctx); |
586 | 14.1M | } else if (auto* ctx = tree_as<CelParser::ExprContext>(tree)) { |
587 | 1.00M | return visitExpr(ctx); |
588 | 13.1M | } else if (auto* ctx = tree_as<CelParser::ConditionalAndContext>(tree)) { |
589 | 2.23M | return visitConditionalAnd(ctx); |
590 | 10.9M | } else if (auto* ctx = tree_as<CelParser::ConditionalOrContext>(tree)) { |
591 | 2.14M | return visitConditionalOr(ctx); |
592 | 8.80M | } else if (auto* ctx = tree_as<CelParser::RelationContext>(tree)) { |
593 | 2.31M | return visitRelation(ctx); |
594 | 6.49M | } else if (auto* ctx = tree_as<CelParser::CalcContext>(tree)) { |
595 | 3.50M | return visitCalc(ctx); |
596 | 3.50M | } else if (auto* ctx = tree_as<CelParser::LogicalNotContext>(tree)) { |
597 | 3.00k | return visitLogicalNot(ctx); |
598 | 2.98M | } else if (auto* ctx = tree_as<CelParser::PrimaryExprContext>(tree)) { |
599 | 73.5k | return visitPrimaryExpr(ctx); |
600 | 2.91M | } else if (auto* ctx = tree_as<CelParser::MemberExprContext>(tree)) { |
601 | 2.83M | return visitMemberExpr(ctx); |
602 | 2.83M | } else if (auto* ctx = tree_as<CelParser::SelectOrCallContext>(tree)) { |
603 | 7.19k | return visitSelectOrCall(ctx); |
604 | 73.7k | } else if (auto* ctx = tree_as<CelParser::MapInitializerListContext>(tree)) { |
605 | 0 | return visitMapInitializerList(ctx); |
606 | 73.7k | } else if (auto* ctx = tree_as<CelParser::NegateContext>(tree)) { |
607 | 57.8k | return visitNegate(ctx); |
608 | 57.8k | } else if (auto* ctx = tree_as<CelParser::IndexContext>(tree)) { |
609 | 4.26k | return visitIndex(ctx); |
610 | 11.6k | } else if (auto* ctx = tree_as<CelParser::UnaryContext>(tree)) { |
611 | 9.78k | return visitUnary(ctx); |
612 | 9.78k | } else if (auto* ctx = tree_as<CelParser::CreateListContext>(tree)) { |
613 | 0 | return visitCreateList(ctx); |
614 | 1.88k | } else if (auto* ctx = tree_as<CelParser::CreateMessageContext>(tree)) { |
615 | 1.08k | return visitCreateMessage(ctx); |
616 | 1.08k | } else if (auto* ctx = tree_as<CelParser::CreateStructContext>(tree)) { |
617 | 0 | return visitCreateStruct(ctx); |
618 | 0 | } |
619 | | |
620 | 801 | if (tree) { |
621 | 0 | return sf_->ReportError(tree_as<antlr4::ParserRuleContext>(tree), |
622 | 0 | "unknown parsetree type"); |
623 | 0 | } |
624 | 801 | return sf_->ReportError(SourceFactory::NoLocation(), "<<nil>> parsetree"); |
625 | 801 | } |
626 | | |
627 | | antlrcpp::Any ParserVisitor::visitPrimaryExpr( |
628 | 2.89M | CelParser::PrimaryExprContext* pctx) { |
629 | 2.89M | CelParser::PrimaryContext* primary = pctx->primary(); |
630 | 2.89M | if (auto* ctx = tree_as<CelParser::NestedContext>(primary)) { |
631 | 1.57k | return visitNested(ctx); |
632 | 2.89M | } else if (auto* ctx = |
633 | 2.89M | tree_as<CelParser::IdentOrGlobalCallContext>(primary)) { |
634 | 1.10M | return visitIdentOrGlobalCall(ctx); |
635 | 1.78M | } else if (auto* ctx = tree_as<CelParser::CreateListContext>(primary)) { |
636 | 5.53k | return visitCreateList(ctx); |
637 | 1.78M | } else if (auto* ctx = tree_as<CelParser::CreateStructContext>(primary)) { |
638 | 659k | return visitCreateStruct(ctx); |
639 | 1.12M | } else if (auto* ctx = tree_as<CelParser::ConstantLiteralContext>(primary)) { |
640 | 1.12M | return visitConstantLiteral(ctx); |
641 | 1.12M | } |
642 | 1.27k | return sf_->ReportError(pctx, "invalid primary expression"); |
643 | 2.89M | } |
644 | | |
645 | | antlrcpp::Any ParserVisitor::visitMemberExpr( |
646 | 2.83M | CelParser::MemberExprContext* mctx) { |
647 | 2.83M | CelParser::MemberContext* member = mctx->member(); |
648 | 2.83M | if (auto* ctx = tree_as<CelParser::PrimaryExprContext>(member)) { |
649 | 2.81M | return visitPrimaryExpr(ctx); |
650 | 2.81M | } else if (auto* ctx = tree_as<CelParser::SelectOrCallContext>(member)) { |
651 | 8.24k | return visitSelectOrCall(ctx); |
652 | 8.24k | } else if (auto* ctx = tree_as<CelParser::IndexContext>(member)) { |
653 | 1.39k | return visitIndex(ctx); |
654 | 3.60k | } else if (auto* ctx = tree_as<CelParser::CreateMessageContext>(member)) { |
655 | 3.60k | return visitCreateMessage(ctx); |
656 | 3.60k | } |
657 | 0 | return sf_->ReportError(mctx, "unsupported simple expression"); |
658 | 2.83M | } |
659 | | |
660 | 8.59k | antlrcpp::Any ParserVisitor::visitStart(CelParser::StartContext* ctx) { |
661 | 8.59k | return visit(ctx->expr()); |
662 | 8.59k | } |
663 | | |
664 | 2.14M | antlrcpp::Any ParserVisitor::visitExpr(CelParser::ExprContext* ctx) { |
665 | 2.14M | auto result = std::any_cast<Expr>(visit(ctx->e)); |
666 | 2.14M | if (!ctx->op) { |
667 | 2.14M | return result; |
668 | 2.14M | } |
669 | 1.97k | int64_t op_id = sf_->Id(ctx->op); |
670 | 1.97k | Expr if_true = std::any_cast<Expr>(visit(ctx->e1)); |
671 | 1.97k | Expr if_false = std::any_cast<Expr>(visit(ctx->e2)); |
672 | | |
673 | 1.97k | return GlobalCallOrMacro(op_id, CelOperator::CONDITIONAL, |
674 | 1.97k | {result, if_true, if_false}); |
675 | 2.14M | } |
676 | | |
677 | | antlrcpp::Any ParserVisitor::visitConditionalOr( |
678 | 2.14M | CelParser::ConditionalOrContext* ctx) { |
679 | 2.14M | auto result = std::any_cast<Expr>(visit(ctx->e)); |
680 | 2.14M | if (ctx->ops.empty()) { |
681 | 2.14M | return result; |
682 | 2.14M | } |
683 | 1.07k | ExpressionBalancer b(sf_, CelOperator::LOGICAL_OR, result); |
684 | 88.0k | for (size_t i = 0; i < ctx->ops.size(); ++i) { |
685 | 87.0k | auto op = ctx->ops[i]; |
686 | 87.0k | if (i >= ctx->e1.size()) { |
687 | 0 | return sf_->ReportError(ctx, "unexpected character, wanted '||'"); |
688 | 0 | } |
689 | 87.0k | auto next = std::any_cast<Expr>(visit(ctx->e1[i])); |
690 | 87.0k | int64_t op_id = sf_->Id(op); |
691 | 87.0k | b.AddTerm(op_id, next); |
692 | 87.0k | } |
693 | 1.07k | return b.Balance(); |
694 | 1.07k | } |
695 | | |
696 | | antlrcpp::Any ParserVisitor::visitConditionalAnd( |
697 | 2.23M | CelParser::ConditionalAndContext* ctx) { |
698 | 2.23M | auto result = std::any_cast<Expr>(visit(ctx->e)); |
699 | 2.23M | if (ctx->ops.empty()) { |
700 | 2.23M | return result; |
701 | 2.23M | } |
702 | 909 | ExpressionBalancer b(sf_, CelOperator::LOGICAL_AND, result); |
703 | 59.4k | for (size_t i = 0; i < ctx->ops.size(); ++i) { |
704 | 58.5k | auto op = ctx->ops[i]; |
705 | 58.5k | if (i >= ctx->e1.size()) { |
706 | 0 | return sf_->ReportError(ctx, "unexpected character, wanted '&&'"); |
707 | 0 | } |
708 | 58.5k | auto next = std::any_cast<Expr>(visit(ctx->e1[i])); |
709 | 58.5k | int64_t op_id = sf_->Id(op); |
710 | 58.5k | b.AddTerm(op_id, next); |
711 | 58.5k | } |
712 | 909 | return b.Balance(); |
713 | 909 | } |
714 | | |
715 | 2.31M | antlrcpp::Any ParserVisitor::visitRelation(CelParser::RelationContext* ctx) { |
716 | 2.31M | if (ctx->calc()) { |
717 | 2.30M | return visit(ctx->calc()); |
718 | 2.30M | } |
719 | 8.03k | std::string op_text; |
720 | 8.03k | if (ctx->op) { |
721 | 8.03k | op_text = ctx->op->getText(); |
722 | 8.03k | } |
723 | 8.03k | auto op = ReverseLookupOperator(op_text); |
724 | 8.03k | if (op) { |
725 | 8.03k | auto lhs = std::any_cast<Expr>(visit(ctx->relation(0))); |
726 | 8.03k | int64_t op_id = sf_->Id(ctx->op); |
727 | 8.03k | auto rhs = std::any_cast<Expr>(visit(ctx->relation(1))); |
728 | 8.03k | return GlobalCallOrMacro(op_id, *op, {lhs, rhs}); |
729 | 8.03k | } |
730 | 0 | return sf_->ReportError(ctx, "operator not found"); |
731 | 8.03k | } |
732 | | |
733 | 3.50M | antlrcpp::Any ParserVisitor::visitCalc(CelParser::CalcContext* ctx) { |
734 | 3.50M | if (ctx->unary()) { |
735 | 2.90M | return visit(ctx->unary()); |
736 | 2.90M | } |
737 | 599k | std::string op_text; |
738 | 599k | if (ctx->op) { |
739 | 599k | op_text = ctx->op->getText(); |
740 | 599k | } |
741 | 599k | auto op = ReverseLookupOperator(op_text); |
742 | 599k | if (op) { |
743 | 599k | auto lhs = std::any_cast<Expr>(visit(ctx->calc(0))); |
744 | 599k | int64_t op_id = sf_->Id(ctx->op); |
745 | 599k | auto rhs = std::any_cast<Expr>(visit(ctx->calc(1))); |
746 | 599k | return GlobalCallOrMacro(op_id, *op, {lhs, rhs}); |
747 | 599k | } |
748 | 0 | return sf_->ReportError(ctx, "operator not found"); |
749 | 599k | } |
750 | | |
751 | 9.78k | antlrcpp::Any ParserVisitor::visitUnary(CelParser::UnaryContext* ctx) { |
752 | 9.78k | return sf_->NewLiteralString(ctx, "<<error>>"); |
753 | 9.78k | } |
754 | | |
755 | | antlrcpp::Any ParserVisitor::visitLogicalNot( |
756 | 3.00k | CelParser::LogicalNotContext* ctx) { |
757 | 3.00k | if (ctx->ops.size() % 2 == 0) { |
758 | 131 | return visit(ctx->member()); |
759 | 131 | } |
760 | 2.87k | int64_t op_id = sf_->Id(ctx->ops[0]); |
761 | 2.87k | auto target = std::any_cast<Expr>(visit(ctx->member())); |
762 | 2.87k | return GlobalCallOrMacro(op_id, CelOperator::LOGICAL_NOT, {target}); |
763 | 3.00k | } |
764 | | |
765 | 57.8k | antlrcpp::Any ParserVisitor::visitNegate(CelParser::NegateContext* ctx) { |
766 | 57.8k | if (ctx->ops.size() % 2 == 0) { |
767 | 3.96k | return visit(ctx->member()); |
768 | 3.96k | } |
769 | 53.8k | int64_t op_id = sf_->Id(ctx->ops[0]); |
770 | 53.8k | auto target = std::any_cast<Expr>(visit(ctx->member())); |
771 | 53.8k | return GlobalCallOrMacro(op_id, CelOperator::NEGATE, {target}); |
772 | 57.8k | } |
773 | | |
774 | | antlrcpp::Any ParserVisitor::visitSelectOrCall( |
775 | 15.4k | CelParser::SelectOrCallContext* ctx) { |
776 | 15.4k | auto operand = std::any_cast<Expr>(visit(ctx->member())); |
777 | | // Handle the error case where no valid identifier is specified. |
778 | 15.4k | if (!ctx->id) { |
779 | 147 | return sf_->NewExpr(ctx); |
780 | 147 | } |
781 | 15.2k | auto id = ctx->id->getText(); |
782 | 15.2k | if (ctx->open) { |
783 | 6.62k | int64_t op_id = sf_->Id(ctx->open); |
784 | 6.62k | return ReceiverCallOrMacro(op_id, id, operand, visitList(ctx->args)); |
785 | 6.62k | } |
786 | 8.66k | return sf_->NewSelect(ctx, operand, id); |
787 | 15.2k | } |
788 | | |
789 | 5.65k | antlrcpp::Any ParserVisitor::visitIndex(CelParser::IndexContext* ctx) { |
790 | 5.65k | auto target = std::any_cast<Expr>(visit(ctx->member())); |
791 | 5.65k | int64_t op_id = sf_->Id(ctx->op); |
792 | 5.65k | auto index = std::any_cast<Expr>(visit(ctx->index)); |
793 | 5.65k | return GlobalCallOrMacro(op_id, CelOperator::INDEX, {target, index}); |
794 | 5.65k | } |
795 | | |
796 | | antlrcpp::Any ParserVisitor::visitCreateMessage( |
797 | 4.69k | CelParser::CreateMessageContext* ctx) { |
798 | 4.69k | auto target = std::any_cast<Expr>(visit(ctx->member())); |
799 | 4.69k | int64_t obj_id = sf_->Id(ctx->op); |
800 | 4.69k | std::string message_name = ExtractQualifiedName(ctx, &target); |
801 | 4.69k | if (!message_name.empty()) { |
802 | 3.46k | auto entries = std::any_cast<std::vector<Expr::CreateStruct::Entry>>( |
803 | 3.46k | visitFieldInitializerList(ctx->entries)); |
804 | 3.46k | return sf_->NewObject(obj_id, message_name, entries); |
805 | 3.46k | } else { |
806 | 1.22k | return sf_->NewExpr(obj_id); |
807 | 1.22k | } |
808 | 4.69k | } |
809 | | |
810 | | antlrcpp::Any ParserVisitor::visitFieldInitializerList( |
811 | 3.46k | CelParser::FieldInitializerListContext* ctx) { |
812 | 3.46k | std::vector<Expr::CreateStruct::Entry> res; |
813 | 3.46k | if (!ctx || ctx->fields.empty()) { |
814 | 2.78k | return res; |
815 | 2.78k | } |
816 | | |
817 | 685 | res.resize(ctx->fields.size()); |
818 | 3.87k | for (size_t i = 0; i < ctx->fields.size(); ++i) { |
819 | 3.33k | if (i >= ctx->cols.size() || i >= ctx->values.size()) { |
820 | | // This is the result of a syntax error detected elsewhere. |
821 | 139 | return res; |
822 | 139 | } |
823 | 3.19k | const auto& f = ctx->fields[i]; |
824 | 3.19k | int64_t init_id = sf_->Id(ctx->cols[i]); |
825 | 3.19k | auto value = std::any_cast<Expr>(visit(ctx->values[i])); |
826 | 3.19k | auto field = sf_->NewObjectField(init_id, f->getText(), value); |
827 | 3.19k | res[i] = field; |
828 | 3.19k | } |
829 | | |
830 | 546 | return res; |
831 | 685 | } |
832 | | |
833 | | antlrcpp::Any ParserVisitor::visitIdentOrGlobalCall( |
834 | 1.10M | CelParser::IdentOrGlobalCallContext* ctx) { |
835 | 1.10M | std::string ident_name; |
836 | 1.10M | if (ctx->leadingDot) { |
837 | 1.83k | ident_name = "."; |
838 | 1.83k | } |
839 | 1.10M | if (!ctx->id) { |
840 | 209 | return sf_->NewExpr(ctx); |
841 | 209 | } |
842 | 1.10M | if (sf_->IsReserved(ctx->id->getText())) { |
843 | 534 | return sf_->ReportError( |
844 | 534 | ctx, absl::StrFormat("reserved identifier: %s", ctx->id->getText())); |
845 | 534 | } |
846 | | // check if ID is in reserved identifiers |
847 | 1.10M | ident_name += ctx->id->getText(); |
848 | 1.10M | if (ctx->op) { |
849 | 2.22k | int64_t op_id = sf_->Id(ctx->op); |
850 | 2.22k | return GlobalCallOrMacro(op_id, ident_name, visitList(ctx->args)); |
851 | 2.22k | } |
852 | 1.09M | return sf_->NewIdent(ctx->id, ident_name); |
853 | 1.10M | } |
854 | | |
855 | 1.57k | antlrcpp::Any ParserVisitor::visitNested(CelParser::NestedContext* ctx) { |
856 | 1.57k | return visit(ctx->e); |
857 | 1.57k | } |
858 | | |
859 | | antlrcpp::Any ParserVisitor::visitCreateList( |
860 | 5.53k | CelParser::CreateListContext* ctx) { |
861 | 5.53k | int64_t list_id = sf_->Id(ctx->op); |
862 | 5.53k | return sf_->NewList(list_id, visitList(ctx->elems)); |
863 | 5.53k | } |
864 | | |
865 | 14.3k | std::vector<Expr> ParserVisitor::visitList(CelParser::ExprListContext* ctx) { |
866 | 14.3k | std::vector<Expr> rv; |
867 | 14.3k | if (!ctx) return rv; |
868 | 11.7k | std::transform(ctx->e.begin(), ctx->e.end(), std::back_inserter(rv), |
869 | 1.14M | [this](CelParser::ExprContext* expr_ctx) { |
870 | 1.14M | return std::any_cast<Expr>(visitExpr(expr_ctx)); |
871 | 1.14M | }); |
872 | 11.7k | return rv; |
873 | 14.3k | } |
874 | | |
875 | | antlrcpp::Any ParserVisitor::visitCreateStruct( |
876 | 659k | CelParser::CreateStructContext* ctx) { |
877 | 659k | int64_t struct_id = sf_->Id(ctx->op); |
878 | 659k | std::vector<Expr::CreateStruct::Entry> entries; |
879 | 659k | if (ctx->entries) { |
880 | 479k | entries = std::any_cast<std::vector<Expr::CreateStruct::Entry>>( |
881 | 479k | visitMapInitializerList(ctx->entries)); |
882 | 479k | } |
883 | 659k | return sf_->NewMap(struct_id, entries); |
884 | 659k | } |
885 | | |
886 | | antlrcpp::Any ParserVisitor::visitConstantLiteral( |
887 | 1.12M | CelParser::ConstantLiteralContext* clctx) { |
888 | 1.12M | CelParser::LiteralContext* literal = clctx->literal(); |
889 | 1.12M | if (auto* ctx = tree_as<CelParser::IntContext>(literal)) { |
890 | 596k | return visitInt(ctx); |
891 | 596k | } else if (auto* ctx = tree_as<CelParser::UintContext>(literal)) { |
892 | 2.20k | return visitUint(ctx); |
893 | 523k | } else if (auto* ctx = tree_as<CelParser::DoubleContext>(literal)) { |
894 | 19.9k | return visitDouble(ctx); |
895 | 503k | } else if (auto* ctx = tree_as<CelParser::StringContext>(literal)) { |
896 | 497k | return visitString(ctx); |
897 | 497k | } else if (auto* ctx = tree_as<CelParser::BytesContext>(literal)) { |
898 | 3.89k | return visitBytes(ctx); |
899 | 3.89k | } else if (auto* ctx = tree_as<CelParser::BoolFalseContext>(literal)) { |
900 | 524 | return visitBoolFalse(ctx); |
901 | 1.17k | } else if (auto* ctx = tree_as<CelParser::BoolTrueContext>(literal)) { |
902 | 353 | return visitBoolTrue(ctx); |
903 | 819 | } else if (auto* ctx = tree_as<CelParser::NullContext>(literal)) { |
904 | 259 | return visitNull(ctx); |
905 | 259 | } |
906 | 560 | return sf_->ReportError(clctx, "invalid constant literal expression"); |
907 | 1.12M | } |
908 | | |
909 | | antlrcpp::Any ParserVisitor::visitMapInitializerList( |
910 | 479k | CelParser::MapInitializerListContext* ctx) { |
911 | 479k | std::vector<Expr::CreateStruct::Entry> res; |
912 | 479k | if (!ctx || ctx->keys.empty()) { |
913 | 0 | return res; |
914 | 0 | } |
915 | | |
916 | 479k | res.resize(ctx->cols.size()); |
917 | 971k | for (size_t i = 0; i < ctx->cols.size(); ++i) { |
918 | 492k | int64_t col_id = sf_->Id(ctx->cols[i]); |
919 | 492k | auto key = std::any_cast<Expr>(visit(ctx->keys[i])); |
920 | 492k | auto value = std::any_cast<Expr>(visit(ctx->values[i])); |
921 | 492k | res[i] = sf_->NewMapEntry(col_id, key, value); |
922 | 492k | } |
923 | 479k | return res; |
924 | 479k | } |
925 | | |
926 | 596k | antlrcpp::Any ParserVisitor::visitInt(CelParser::IntContext* ctx) { |
927 | 596k | std::string value; |
928 | 596k | if (ctx->sign) { |
929 | 7.42k | value = ctx->sign->getText(); |
930 | 7.42k | } |
931 | 596k | value += ctx->tok->getText(); |
932 | 596k | int64_t int_value; |
933 | 596k | if (absl::StartsWith(ctx->tok->getText(), "0x")) { |
934 | 719 | if (absl::SimpleHexAtoi(value, &int_value)) { |
935 | 475 | return sf_->NewLiteralInt(ctx, int_value); |
936 | 475 | } else { |
937 | 244 | return sf_->ReportError(ctx, "invalid hex int literal"); |
938 | 244 | } |
939 | 719 | } |
940 | 595k | if (absl::SimpleAtoi(value, &int_value)) { |
941 | 593k | return sf_->NewLiteralInt(ctx, int_value); |
942 | 593k | } else { |
943 | 2.30k | return sf_->ReportError(ctx, "invalid int literal"); |
944 | 2.30k | } |
945 | 595k | } |
946 | | |
947 | 2.20k | antlrcpp::Any ParserVisitor::visitUint(CelParser::UintContext* ctx) { |
948 | 2.20k | std::string value = ctx->tok->getText(); |
949 | | // trim the 'u' designator included in the uint literal. |
950 | 2.20k | if (!value.empty()) { |
951 | 2.20k | value.resize(value.size() - 1); |
952 | 2.20k | } |
953 | 2.20k | uint64_t uint_value; |
954 | 2.20k | if (absl::StartsWith(ctx->tok->getText(), "0x")) { |
955 | 527 | if (absl::SimpleHexAtoi(value, &uint_value)) { |
956 | 176 | return sf_->NewLiteralUint(ctx, uint_value); |
957 | 351 | } else { |
958 | 351 | return sf_->ReportError(ctx, "invalid hex uint literal"); |
959 | 351 | } |
960 | 527 | } |
961 | 1.67k | if (absl::SimpleAtoi(value, &uint_value)) { |
962 | 867 | return sf_->NewLiteralUint(ctx, uint_value); |
963 | 867 | } else { |
964 | 811 | return sf_->ReportError(ctx, "invalid uint literal"); |
965 | 811 | } |
966 | 1.67k | } |
967 | | |
968 | 19.9k | antlrcpp::Any ParserVisitor::visitDouble(CelParser::DoubleContext* ctx) { |
969 | 19.9k | std::string value; |
970 | 19.9k | if (ctx->sign) { |
971 | 1.24k | value = ctx->sign->getText(); |
972 | 1.24k | } |
973 | 19.9k | value += ctx->tok->getText(); |
974 | 19.9k | double double_value; |
975 | 19.9k | if (absl::SimpleAtod(value, &double_value)) { |
976 | 19.9k | return sf_->NewLiteralDouble(ctx, double_value); |
977 | 19.9k | } else { |
978 | 0 | return sf_->ReportError(ctx, "invalid double literal"); |
979 | 0 | } |
980 | 19.9k | } |
981 | | |
982 | 497k | antlrcpp::Any ParserVisitor::visitString(CelParser::StringContext* ctx) { |
983 | 497k | auto status_or_value = cel::internal::ParseStringLiteral(ctx->tok->getText()); |
984 | 497k | if (!status_or_value.ok()) { |
985 | 3.75k | return sf_->ReportError(ctx, status_or_value.status().message()); |
986 | 3.75k | } |
987 | 493k | return sf_->NewLiteralString(ctx, status_or_value.value()); |
988 | 497k | } |
989 | | |
990 | 3.89k | antlrcpp::Any ParserVisitor::visitBytes(CelParser::BytesContext* ctx) { |
991 | 3.89k | auto status_or_value = cel::internal::ParseBytesLiteral(ctx->tok->getText()); |
992 | 3.89k | if (!status_or_value.ok()) { |
993 | 1.59k | return sf_->ReportError(ctx, status_or_value.status().message()); |
994 | 1.59k | } |
995 | 2.29k | return sf_->NewLiteralBytes(ctx, status_or_value.value()); |
996 | 3.89k | } |
997 | | |
998 | 353 | antlrcpp::Any ParserVisitor::visitBoolTrue(CelParser::BoolTrueContext* ctx) { |
999 | 353 | return sf_->NewLiteralBool(ctx, true); |
1000 | 353 | } |
1001 | | |
1002 | 524 | antlrcpp::Any ParserVisitor::visitBoolFalse(CelParser::BoolFalseContext* ctx) { |
1003 | 524 | return sf_->NewLiteralBool(ctx, false); |
1004 | 524 | } |
1005 | | |
1006 | 259 | antlrcpp::Any ParserVisitor::visitNull(CelParser::NullContext* ctx) { |
1007 | 259 | return sf_->NewLiteralNull(ctx); |
1008 | 259 | } |
1009 | | |
1010 | 2.50k | google::api::expr::v1alpha1::SourceInfo ParserVisitor::source_info() const { |
1011 | 2.50k | return sf_->source_info(); |
1012 | 2.50k | } |
1013 | | |
1014 | 2.50k | EnrichedSourceInfo ParserVisitor::enriched_source_info() const { |
1015 | 2.50k | return sf_->enriched_source_info(); |
1016 | 2.50k | } |
1017 | | |
1018 | | void ParserVisitor::syntaxError(antlr4::Recognizer* recognizer, |
1019 | | antlr4::Token* offending_symbol, size_t line, |
1020 | | size_t col, const std::string& msg, |
1021 | 1.97M | std::exception_ptr e) { |
1022 | 1.97M | sf_->ReportError(line, col, "Syntax error: " + msg); |
1023 | 1.97M | } |
1024 | | |
1025 | 9.21k | bool ParserVisitor::HasErrored() const { return !sf_->errors().empty(); } |
1026 | | |
1027 | 6.71k | std::string ParserVisitor::ErrorMessage() const { |
1028 | 6.71k | return sf_->ErrorMessage(description_, expression_); |
1029 | 6.71k | } |
1030 | | |
1031 | | Expr ParserVisitor::GlobalCallOrMacro(int64_t expr_id, |
1032 | | const std::string& function, |
1033 | 674k | const std::vector<Expr>& args) { |
1034 | 674k | Expr macro_expr; |
1035 | 674k | if (ExpandMacro(expr_id, function, Expr::default_instance(), args, |
1036 | 674k | ¯o_expr)) { |
1037 | 379 | return macro_expr; |
1038 | 379 | } |
1039 | | |
1040 | 674k | return sf_->NewGlobalCall(expr_id, function, args); |
1041 | 674k | } |
1042 | | |
1043 | | Expr ParserVisitor::ReceiverCallOrMacro(int64_t expr_id, |
1044 | | const std::string& function, |
1045 | | const Expr& target, |
1046 | 6.62k | const std::vector<Expr>& args) { |
1047 | 6.62k | Expr macro_expr; |
1048 | 6.62k | if (ExpandMacro(expr_id, function, target, args, ¯o_expr)) { |
1049 | 3.66k | return macro_expr; |
1050 | 3.66k | } |
1051 | | |
1052 | 2.96k | return sf_->NewReceiverCall(expr_id, function, target, args); |
1053 | 6.62k | } |
1054 | | |
1055 | | bool ParserVisitor::ExpandMacro(int64_t expr_id, const std::string& function, |
1056 | | const Expr& target, |
1057 | | const std::vector<Expr>& args, |
1058 | 681k | Expr* macro_expr) { |
1059 | 681k | std::string macro_key = absl::StrFormat("%s:%d:%s", function, args.size(), |
1060 | 681k | target.id() != 0 ? "true" : "false"); |
1061 | 681k | auto m = macros_.find(macro_key); |
1062 | 681k | if (m == macros_.end()) { |
1063 | 675k | std::string var_arg_macro_key = absl::StrFormat( |
1064 | 675k | "%s:*:%s", function, target.id() != 0 ? "true" : "false"); |
1065 | 675k | m = macros_.find(var_arg_macro_key); |
1066 | 675k | if (m == macros_.end()) { |
1067 | 675k | return false; |
1068 | 675k | } |
1069 | 675k | } |
1070 | | |
1071 | 5.42k | Expr expr = m->second.Expand(sf_, expr_id, target, args); |
1072 | 5.42k | if (expr.expr_kind_case() != Expr::EXPR_KIND_NOT_SET) { |
1073 | 4.04k | *macro_expr = std::move(expr); |
1074 | 4.04k | if (add_macro_calls_) { |
1075 | | // If the macro is nested, the full expression id is used as an argument |
1076 | | // id in the tree. Using this ID instead of expr_id allows argument id |
1077 | | // lookups in macro_calls when building the map and iterating |
1078 | | // the AST. |
1079 | 0 | sf_->AddMacroCall(macro_expr->id(), target, args, function); |
1080 | 0 | } |
1081 | 4.04k | return true; |
1082 | 4.04k | } |
1083 | 1.38k | return false; |
1084 | 5.42k | } |
1085 | | |
1086 | | std::string ParserVisitor::ExtractQualifiedName(antlr4::ParserRuleContext* ctx, |
1087 | 9.29k | const Expr* e) { |
1088 | 9.29k | if (!e) { |
1089 | 0 | return ""; |
1090 | 0 | } |
1091 | | |
1092 | 9.29k | switch (e->expr_kind_case()) { |
1093 | 3.46k | case Expr::kIdentExpr: |
1094 | 3.46k | return e->ident_expr().name(); |
1095 | 4.60k | case Expr::kSelectExpr: { |
1096 | 4.60k | auto& s = e->select_expr(); |
1097 | 4.60k | std::string prefix = ExtractQualifiedName(ctx, &s.operand()); |
1098 | 4.60k | if (!prefix.empty()) { |
1099 | 2.59k | return prefix + "." + s.field(); |
1100 | 2.59k | } |
1101 | 4.60k | } break; |
1102 | 2.00k | default: |
1103 | 1.22k | break; |
1104 | 9.29k | } |
1105 | 3.23k | sf_->ReportError(sf_->GetSourceLocation(e->id()), |
1106 | 3.23k | "expected a qualified name"); |
1107 | 3.23k | return ""; |
1108 | 9.29k | } |
1109 | | |
1110 | | // Replacements for absl::StrReplaceAll for escaping standard whitespace |
1111 | | // characters. |
1112 | | static constexpr auto kStandardReplacements = |
1113 | | std::array<std::pair<absl::string_view, absl::string_view>, 3>{ |
1114 | | std::make_pair("\n", "\\n"), |
1115 | | std::make_pair("\r", "\\r"), |
1116 | | std::make_pair("\t", "\\t"), |
1117 | | }; |
1118 | | |
1119 | | static constexpr absl::string_view kSingleQuote = "'"; |
1120 | | |
1121 | | // ExprRecursionListener extends the standard ANTLR CelParser to ensure that |
1122 | | // recursive entries into the 'expr' rule are limited to a configurable depth so |
1123 | | // as to prevent stack overflows. |
1124 | | class ExprRecursionListener : public ParseTreeListener { |
1125 | | public: |
1126 | | explicit ExprRecursionListener( |
1127 | | const int max_recursion_depth = kDefaultMaxRecursionDepth) |
1128 | 9.21k | : max_recursion_depth_(max_recursion_depth), recursion_depth_(0) {} |
1129 | 0 | ~ExprRecursionListener() override {} |
1130 | | |
1131 | 7.24M | void visitTerminal(TerminalNode* node) override{}; |
1132 | 145k | void visitErrorNode(ErrorNode* error) override{}; |
1133 | | void enterEveryRule(ParserRuleContext* ctx) override; |
1134 | | void exitEveryRule(ParserRuleContext* ctx) override; |
1135 | | |
1136 | | private: |
1137 | | const int max_recursion_depth_; |
1138 | | int recursion_depth_; |
1139 | | }; |
1140 | | |
1141 | 25.1M | void ExprRecursionListener::enterEveryRule(ParserRuleContext* ctx) { |
1142 | | // Throw a ParseCancellationException since the parsing would otherwise |
1143 | | // continue if this were treated as a syntax error and the problem would |
1144 | | // continue to manifest. |
1145 | 25.1M | if (ctx->getRuleIndex() == CelParser::RuleExpr) { |
1146 | 2.39M | if (recursion_depth_ >= max_recursion_depth_) { |
1147 | 5 | throw ParseCancellationException( |
1148 | 5 | absl::StrFormat("Expression recursion limit exceeded. limit: %d", |
1149 | 5 | max_recursion_depth_)); |
1150 | 5 | } |
1151 | 2.39M | recursion_depth_++; |
1152 | 2.39M | } |
1153 | 25.1M | } |
1154 | | |
1155 | 25.1M | void ExprRecursionListener::exitEveryRule(ParserRuleContext* ctx) { |
1156 | 25.1M | if (ctx->getRuleIndex() == CelParser::RuleExpr) { |
1157 | 2.39M | recursion_depth_--; |
1158 | 2.39M | } |
1159 | 25.1M | } |
1160 | | |
1161 | | class RecoveryLimitErrorStrategy : public DefaultErrorStrategy { |
1162 | | public: |
1163 | | explicit RecoveryLimitErrorStrategy( |
1164 | | int recovery_limit = kDefaultErrorRecoveryLimit, |
1165 | | int recovery_token_lookahead_limit = |
1166 | | kDefaultErrorRecoveryTokenLookaheadLimit) |
1167 | | : recovery_limit_(recovery_limit), |
1168 | | recovery_attempts_(0), |
1169 | 9.21k | recovery_token_lookahead_limit_(recovery_token_lookahead_limit) {} |
1170 | | |
1171 | 33.0k | void recover(Parser* recognizer, std::exception_ptr e) override { |
1172 | 33.0k | checkRecoveryLimit(recognizer); |
1173 | 33.0k | DefaultErrorStrategy::recover(recognizer, e); |
1174 | 33.0k | } |
1175 | | |
1176 | 28.9k | Token* recoverInline(Parser* recognizer) override { |
1177 | 28.9k | checkRecoveryLimit(recognizer); |
1178 | 28.9k | return DefaultErrorStrategy::recoverInline(recognizer); |
1179 | 28.9k | } |
1180 | | |
1181 | | // Override the ANTLR implementation to introduce a token lookahead limit as |
1182 | | // this prevents pathologically constructed, yet small (< 16kb) inputs from |
1183 | | // consuming inordinate amounts of compute. |
1184 | | // |
1185 | | // This method is only called on error recovery paths. |
1186 | 37.0k | void consumeUntil(Parser* recognizer, const IntervalSet& set) override { |
1187 | 37.0k | size_t ttype = recognizer->getInputStream()->LA(1); |
1188 | 37.0k | int recovery_search_depth = 0; |
1189 | 161k | while (ttype != Token::EOF && !set.contains(ttype) && |
1190 | 161k | recovery_search_depth++ < recovery_token_lookahead_limit_) { |
1191 | 124k | recognizer->consume(); |
1192 | 124k | ttype = recognizer->getInputStream()->LA(1); |
1193 | 124k | } |
1194 | | // Halt all parsing if the lookahead limit is reached during error recovery. |
1195 | 37.0k | if (recovery_search_depth == recovery_token_lookahead_limit_) { |
1196 | 3 | throw ParseCancellationException("Unable to find a recovery token"); |
1197 | 3 | } |
1198 | 37.0k | } |
1199 | | |
1200 | | protected: |
1201 | 45.8k | std::string escapeWSAndQuote(const std::string& s) const override { |
1202 | 45.8k | std::string result; |
1203 | 45.8k | result.reserve(s.size() + 2); |
1204 | 45.8k | absl::StrAppend(&result, kSingleQuote, s, kSingleQuote); |
1205 | 45.8k | absl::StrReplaceAll(kStandardReplacements, &result); |
1206 | 45.8k | return result; |
1207 | 45.8k | } |
1208 | | |
1209 | | private: |
1210 | 61.9k | void checkRecoveryLimit(Parser* recognizer) { |
1211 | 61.9k | if (recovery_attempts_++ >= recovery_limit_) { |
1212 | 615 | std::string too_many_errors = |
1213 | 615 | absl::StrFormat("More than %d parse errors.", recovery_limit_); |
1214 | 615 | recognizer->notifyErrorListeners(too_many_errors); |
1215 | 615 | throw ParseCancellationException(too_many_errors); |
1216 | 615 | } |
1217 | 61.9k | } |
1218 | | |
1219 | | int recovery_limit_; |
1220 | | int recovery_attempts_; |
1221 | | int recovery_token_lookahead_limit_; |
1222 | | }; |
1223 | | |
1224 | | } // namespace |
1225 | | |
1226 | | absl::StatusOr<ParsedExpr> Parse(absl::string_view expression, |
1227 | | absl::string_view description, |
1228 | 9.61k | const ParserOptions& options) { |
1229 | 9.61k | return ParseWithMacros(expression, Macro::AllMacros(), description, options); |
1230 | 9.61k | } |
1231 | | |
1232 | | absl::StatusOr<ParsedExpr> ParseWithMacros(absl::string_view expression, |
1233 | | const std::vector<Macro>& macros, |
1234 | | absl::string_view description, |
1235 | 9.61k | const ParserOptions& options) { |
1236 | 9.61k | CEL_ASSIGN_OR_RETURN(auto verbose_parsed_expr, |
1237 | 2.50k | EnrichedParse(expression, macros, description, options)); |
1238 | 2.50k | return verbose_parsed_expr.parsed_expr(); |
1239 | 9.61k | } |
1240 | | |
1241 | | absl::StatusOr<VerboseParsedExpr> EnrichedParse( |
1242 | | absl::string_view expression, const std::vector<Macro>& macros, |
1243 | 9.61k | absl::string_view description, const ParserOptions& options) { |
1244 | 9.61k | try { |
1245 | 9.61k | CEL_ASSIGN_OR_RETURN(auto buffer, MakeCodePointBuffer(expression)); |
1246 | 9.32k | CodePointStream input(&buffer, description); |
1247 | 9.32k | if (input.size() > options.expression_size_codepoint_limit) { |
1248 | 103 | return absl::InvalidArgumentError(absl::StrCat( |
1249 | 103 | "expression size exceeds codepoint limit.", " input size: ", |
1250 | 103 | input.size(), ", limit: ", options.expression_size_codepoint_limit)); |
1251 | 103 | } |
1252 | 9.21k | CelLexer lexer(&input); |
1253 | 9.21k | CommonTokenStream tokens(&lexer); |
1254 | 9.21k | CelParser parser(&tokens); |
1255 | 9.21k | ExprRecursionListener listener(options.max_recursion_depth); |
1256 | 9.21k | ParserVisitor visitor(description, expression, options.max_recursion_depth, |
1257 | 9.21k | macros, options.add_macro_calls); |
1258 | | |
1259 | 9.21k | lexer.removeErrorListeners(); |
1260 | 9.21k | parser.removeErrorListeners(); |
1261 | 9.21k | lexer.addErrorListener(&visitor); |
1262 | 9.21k | parser.addErrorListener(&visitor); |
1263 | 9.21k | parser.addParseListener(&listener); |
1264 | | |
1265 | | // Limit the number of error recovery attempts to prevent bad expressions |
1266 | | // from consuming lots of cpu / memory. |
1267 | 9.21k | parser.setErrorHandler(std::make_shared<RecoveryLimitErrorStrategy>( |
1268 | 9.21k | options.error_recovery_limit, |
1269 | 9.21k | options.error_recovery_token_lookahead_limit)); |
1270 | | |
1271 | 9.21k | Expr expr; |
1272 | 9.21k | try { |
1273 | 9.21k | expr = std::any_cast<Expr>(visitor.visit(parser.start())); |
1274 | 9.21k | } catch (const ParseCancellationException& e) { |
1275 | 623 | if (visitor.HasErrored()) { |
1276 | 622 | return absl::InvalidArgumentError(visitor.ErrorMessage()); |
1277 | 622 | } |
1278 | 1 | return absl::CancelledError(e.what()); |
1279 | 623 | } |
1280 | | |
1281 | 8.59k | if (visitor.HasErrored()) { |
1282 | 6.09k | return absl::InvalidArgumentError(visitor.ErrorMessage()); |
1283 | 6.09k | } |
1284 | | |
1285 | | // root is deleted as part of the parser context |
1286 | 2.50k | ParsedExpr parsed_expr; |
1287 | 2.50k | *(parsed_expr.mutable_expr()) = std::move(expr); |
1288 | 2.50k | auto enriched_source_info = visitor.enriched_source_info(); |
1289 | 2.50k | *(parsed_expr.mutable_source_info()) = visitor.source_info(); |
1290 | 2.50k | return VerboseParsedExpr(std::move(parsed_expr), |
1291 | 2.50k | std::move(enriched_source_info)); |
1292 | 8.59k | } catch (const std::exception& e) { |
1293 | 0 | return absl::AbortedError(e.what()); |
1294 | 0 | } catch (const char* what) { |
1295 | | // ANTLRv4 has historically thrown C string literals. |
1296 | 0 | return absl::AbortedError(what); |
1297 | 0 | } catch (...) { |
1298 | | // We guarantee to never throw and always return a status. |
1299 | 0 | return absl::UnknownError("An unknown exception occurred"); |
1300 | 0 | } |
1301 | 9.61k | } |
1302 | | |
1303 | | } // namespace google::api::expr::parser |