/proc/self/cwd/parser/source_factory.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2021 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "parser/source_factory.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <cstdint> |
19 | | #include <limits> |
20 | | #include <string> |
21 | | #include <utility> |
22 | | |
23 | | #include "google/protobuf/struct.pb.h" |
24 | | #include "absl/container/flat_hash_set.h" |
25 | | #include "absl/memory/memory.h" |
26 | | #include "absl/strings/numbers.h" |
27 | | #include "absl/strings/str_format.h" |
28 | | #include "absl/strings/str_join.h" |
29 | | #include "absl/strings/str_split.h" |
30 | | #include "common/operators.h" |
31 | | |
32 | | namespace google::api::expr::parser { |
33 | | namespace { |
34 | | |
35 | | const int kMaxErrorsToReport = 100; |
36 | | |
37 | | using common::CelOperator; |
38 | | using google::api::expr::v1alpha1::Expr; |
39 | | |
40 | 902k | int32_t PositiveOrMax(int32_t value) { |
41 | 902k | return value >= 0 ? value : std::numeric_limits<int32_t>::max(); |
42 | 902k | } |
43 | | |
44 | | } // namespace |
45 | | |
46 | | SourceFactory::SourceFactory(absl::string_view expression) |
47 | 9.21k | : next_id_(1), num_errors_(0) { |
48 | 9.21k | CalcLineOffsets(expression); |
49 | 9.21k | } |
50 | | |
51 | 4.23M | int64_t SourceFactory::Id(const antlr4::Token* token) { |
52 | 4.23M | int64_t new_id = next_id_; |
53 | 4.23M | positions_.emplace( |
54 | 4.23M | new_id, SourceLocation{ |
55 | 4.23M | static_cast<int32_t>(token->getLine()), |
56 | 4.23M | static_cast<int32_t>(token->getCharPositionInLine()), |
57 | 4.23M | static_cast<int32_t>(token->getStopIndex()), line_offsets_}); |
58 | 4.23M | next_id_ += 1; |
59 | 4.23M | return new_id; |
60 | 4.23M | } |
61 | | |
62 | | const SourceFactory::SourceLocation& SourceFactory::GetSourceLocation( |
63 | 32.2k | int64_t id) const { |
64 | 32.2k | return positions_.at(id); |
65 | 32.2k | } |
66 | | |
67 | 1.81k | const SourceFactory::SourceLocation SourceFactory::NoLocation() { |
68 | 1.81k | return SourceLocation(-1, -1, -1, {}); |
69 | 1.81k | } |
70 | | |
71 | 1.13M | int64_t SourceFactory::Id(antlr4::ParserRuleContext* ctx) { |
72 | 1.13M | return Id(ctx->getStart()); |
73 | 1.13M | } |
74 | | |
75 | 2.00M | int64_t SourceFactory::Id(const SourceLocation& location) { |
76 | 2.00M | int64_t new_id = next_id_; |
77 | 2.00M | positions_.emplace(new_id, location); |
78 | 2.00M | next_id_ += 1; |
79 | 2.00M | return new_id; |
80 | 2.00M | } |
81 | | |
82 | 27.7k | int64_t SourceFactory::NextMacroId(int64_t macro_id) { |
83 | 27.7k | return Id(GetSourceLocation(macro_id)); |
84 | 27.7k | } |
85 | | |
86 | 5.74M | Expr SourceFactory::NewExpr(int64_t id) { |
87 | 5.74M | Expr expr; |
88 | 5.74M | expr.set_id(id); |
89 | 5.74M | return expr; |
90 | 5.74M | } |
91 | | |
92 | 1.12M | Expr SourceFactory::NewExpr(antlr4::ParserRuleContext* ctx) { |
93 | 1.12M | return NewExpr(Id(ctx)); |
94 | 1.12M | } |
95 | | |
96 | 1.10M | Expr SourceFactory::NewExpr(const antlr4::Token* token) { |
97 | 1.10M | return NewExpr(Id(token)); |
98 | 1.10M | } |
99 | | |
100 | | Expr SourceFactory::NewGlobalCall(int64_t id, const std::string& function, |
101 | 827k | const std::vector<Expr>& args) { |
102 | 827k | Expr expr = NewExpr(id); |
103 | 827k | auto call_expr = expr.mutable_call_expr(); |
104 | 827k | call_expr->set_function(function); |
105 | 827k | std::for_each(args.begin(), args.end(), |
106 | 1.60M | [&call_expr](const Expr& e) { *call_expr->add_args() = e; }); |
107 | 827k | return expr; |
108 | 827k | } |
109 | | |
110 | | Expr SourceFactory::NewGlobalCallForMacro(int64_t macro_id, |
111 | | const std::string& function, |
112 | 7.87k | const std::vector<Expr>& args) { |
113 | 7.87k | return NewGlobalCall(NextMacroId(macro_id), function, args); |
114 | 7.87k | } |
115 | | |
116 | | Expr SourceFactory::NewReceiverCall(int64_t id, const std::string& function, |
117 | | const Expr& target, |
118 | 2.96k | const std::vector<Expr>& args) { |
119 | 2.96k | Expr expr = NewExpr(id); |
120 | 2.96k | auto call_expr = expr.mutable_call_expr(); |
121 | 2.96k | call_expr->set_function(function); |
122 | 2.96k | *call_expr->mutable_target() = target; |
123 | 2.96k | std::for_each(args.begin(), args.end(), |
124 | 429k | [&call_expr](const Expr& e) { *call_expr->add_args() = e; }); |
125 | 2.96k | return expr; |
126 | 2.96k | } |
127 | | |
128 | | Expr SourceFactory::NewIdent(const antlr4::Token* token, |
129 | 1.09M | const std::string& ident_name) { |
130 | 1.09M | Expr expr = NewExpr(token); |
131 | 1.09M | expr.mutable_ident_expr()->set_name(ident_name); |
132 | 1.09M | return expr; |
133 | 1.09M | } |
134 | | |
135 | | Expr SourceFactory::NewIdentForMacro(int64_t macro_id, |
136 | 7.18k | const std::string& ident_name) { |
137 | 7.18k | Expr expr = NewExpr(NextMacroId(macro_id)); |
138 | 7.18k | expr.mutable_ident_expr()->set_name(ident_name); |
139 | 7.18k | return expr; |
140 | 7.18k | } |
141 | | |
142 | | Expr SourceFactory::NewSelect( |
143 | | ::cel_parser_internal::CelParser::SelectOrCallContext* ctx, Expr& operand, |
144 | 8.66k | const std::string& field) { |
145 | 8.66k | Expr expr = NewExpr(ctx->op); |
146 | 8.66k | auto select_expr = expr.mutable_select_expr(); |
147 | 8.66k | *select_expr->mutable_operand() = operand; |
148 | 8.66k | select_expr->set_field(field); |
149 | 8.66k | return expr; |
150 | 8.66k | } |
151 | | |
152 | | Expr SourceFactory::NewSelectForMacro(int64_t macro_id, const Expr& operand, |
153 | 0 | const std::string& field) { |
154 | 0 | Expr expr = NewExpr(NextMacroId(macro_id)); |
155 | 0 | auto select_expr = expr.mutable_select_expr(); |
156 | 0 | *select_expr->mutable_operand() = operand; |
157 | 0 | select_expr->set_field(field); |
158 | 0 | return expr; |
159 | 0 | } |
160 | | |
161 | | Expr SourceFactory::NewPresenceTestForMacro(int64_t macro_id, |
162 | | const Expr& operand, |
163 | 379 | const std::string& field) { |
164 | 379 | Expr expr = NewExpr(NextMacroId(macro_id)); |
165 | 379 | auto select_expr = expr.mutable_select_expr(); |
166 | 379 | *select_expr->mutable_operand() = operand; |
167 | 379 | select_expr->set_field(field); |
168 | 379 | select_expr->set_test_only(true); |
169 | 379 | return expr; |
170 | 379 | } |
171 | | |
172 | | Expr SourceFactory::NewObject( |
173 | | int64_t obj_id, const std::string& type_name, |
174 | 3.46k | const std::vector<Expr::CreateStruct::Entry>& entries) { |
175 | 3.46k | auto expr = NewExpr(obj_id); |
176 | 3.46k | auto struct_expr = expr.mutable_struct_expr(); |
177 | 3.46k | struct_expr->set_message_name(type_name); |
178 | 3.46k | std::for_each(entries.begin(), entries.end(), |
179 | 3.46k | [struct_expr](const Expr::CreateStruct::Entry& e) { |
180 | 3.33k | struct_expr->add_entries()->CopyFrom(e); |
181 | 3.33k | }); |
182 | 3.46k | return expr; |
183 | 3.46k | } |
184 | | |
185 | | Expr::CreateStruct::Entry SourceFactory::NewObjectField( |
186 | 3.19k | int64_t field_id, const std::string& field, const Expr& value) { |
187 | 3.19k | Expr::CreateStruct::Entry entry; |
188 | 3.19k | entry.set_id(field_id); |
189 | 3.19k | entry.set_field_key(field); |
190 | 3.19k | *entry.mutable_value() = value; |
191 | 3.19k | return entry; |
192 | 3.19k | } |
193 | | |
194 | | Expr SourceFactory::NewComprehension(int64_t id, const std::string& iter_var, |
195 | | const Expr& iter_range, |
196 | | const std::string& accu_var, |
197 | | const Expr& accu_init, |
198 | | const Expr& condition, const Expr& step, |
199 | 3.66k | const Expr& result) { |
200 | 3.66k | Expr expr = NewExpr(id); |
201 | 3.66k | auto comp_expr = expr.mutable_comprehension_expr(); |
202 | 3.66k | comp_expr->set_iter_var(iter_var); |
203 | 3.66k | *comp_expr->mutable_iter_range() = iter_range; |
204 | 3.66k | comp_expr->set_accu_var(accu_var); |
205 | 3.66k | *comp_expr->mutable_accu_init() = accu_init; |
206 | 3.66k | *comp_expr->mutable_loop_condition() = condition; |
207 | 3.66k | *comp_expr->mutable_loop_step() = step; |
208 | 3.66k | *comp_expr->mutable_result() = result; |
209 | 3.66k | return expr; |
210 | 3.66k | } |
211 | | |
212 | | Expr SourceFactory::FoldForMacro(int64_t macro_id, const std::string& iter_var, |
213 | | const Expr& iter_range, |
214 | | const std::string& accu_var, |
215 | | const Expr& accu_init, const Expr& condition, |
216 | 3.66k | const Expr& step, const Expr& result) { |
217 | 3.66k | return NewComprehension(NextMacroId(macro_id), iter_var, iter_range, accu_var, |
218 | 3.66k | accu_init, condition, step, result); |
219 | 3.66k | } |
220 | | |
221 | 9.33k | Expr SourceFactory::NewList(int64_t list_id, const std::vector<Expr>& elems) { |
222 | 9.33k | auto expr = NewExpr(list_id); |
223 | 9.33k | auto list_expr = expr.mutable_list_expr(); |
224 | 9.33k | std::for_each(elems.begin(), elems.end(), |
225 | 694k | [list_expr](const Expr& e) { *list_expr->add_elements() = e; }); |
226 | 9.33k | return expr; |
227 | 9.33k | } |
228 | | |
229 | | Expr SourceFactory::NewQuantifierExprForMacro( |
230 | | SourceFactory::QuantifierKind kind, int64_t macro_id, const Expr& target, |
231 | 2.24k | const std::vector<Expr>& args) { |
232 | 2.24k | if (args.empty()) { |
233 | 0 | return Expr(); |
234 | 0 | } |
235 | 2.24k | if (!args[0].has_ident_expr()) { |
236 | 485 | auto loc = GetSourceLocation(args[0].id()); |
237 | 485 | return ReportError(loc, "argument must be a simple name"); |
238 | 485 | } |
239 | 1.76k | std::string v = args[0].ident_expr().name(); |
240 | | |
241 | | // traditional variable name assigned to the fold accumulator variable. |
242 | 1.76k | const std::string AccumulatorName = "__result__"; |
243 | | |
244 | 5.28k | auto accu_ident = [this, ¯o_id, &AccumulatorName]() { |
245 | 5.28k | return NewIdentForMacro(macro_id, AccumulatorName); |
246 | 5.28k | }; |
247 | | |
248 | 1.76k | Expr init; |
249 | 1.76k | Expr condition; |
250 | 1.76k | Expr step; |
251 | 1.76k | Expr result; |
252 | 1.76k | switch (kind) { |
253 | 741 | case QUANTIFIER_ALL: |
254 | 741 | init = NewLiteralBoolForMacro(macro_id, true); |
255 | 741 | condition = NewGlobalCallForMacro( |
256 | 741 | macro_id, CelOperator::NOT_STRICTLY_FALSE, {accu_ident()}); |
257 | 741 | step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_AND, |
258 | 741 | {accu_ident(), args[1]}); |
259 | 741 | result = accu_ident(); |
260 | 741 | break; |
261 | | |
262 | 418 | case QUANTIFIER_EXISTS: |
263 | 418 | init = NewLiteralBoolForMacro(macro_id, false); |
264 | 418 | condition = NewGlobalCallForMacro( |
265 | 418 | macro_id, CelOperator::NOT_STRICTLY_FALSE, |
266 | 418 | {NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_NOT, |
267 | 418 | {accu_ident()})}); |
268 | 418 | step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_OR, |
269 | 418 | {accu_ident(), args[1]}); |
270 | 418 | result = accu_ident(); |
271 | 418 | break; |
272 | | |
273 | 601 | case QUANTIFIER_EXISTS_ONE: { |
274 | 601 | Expr zero_expr = NewLiteralIntForMacro(macro_id, 0); |
275 | 601 | Expr one_expr = NewLiteralIntForMacro(macro_id, 1); |
276 | 601 | init = zero_expr; |
277 | 601 | condition = NewLiteralBoolForMacro(macro_id, true); |
278 | 601 | step = NewGlobalCallForMacro( |
279 | 601 | macro_id, CelOperator::CONDITIONAL, |
280 | 601 | {args[1], |
281 | 601 | NewGlobalCallForMacro(macro_id, CelOperator::ADD, |
282 | 601 | {accu_ident(), one_expr}), |
283 | 601 | accu_ident()}); |
284 | 601 | result = NewGlobalCallForMacro(macro_id, CelOperator::EQUALS, |
285 | 601 | {accu_ident(), one_expr}); |
286 | 601 | break; |
287 | 0 | } |
288 | 1.76k | } |
289 | 1.76k | return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, |
290 | 1.76k | step, result); |
291 | 1.76k | } |
292 | | |
293 | 0 | Expr SourceFactory::BuildArgForMacroCall(const Expr& expr) { |
294 | 0 | if (macro_calls_.find(expr.id()) != macro_calls_.end()) { |
295 | 0 | Expr result_expr; |
296 | 0 | result_expr.set_id(expr.id()); |
297 | 0 | return result_expr; |
298 | 0 | } |
299 | | // Call expression could have args or sub-args that are also macros found in |
300 | | // macro_calls. |
301 | 0 | if (expr.has_call_expr()) { |
302 | 0 | Expr result_expr; |
303 | 0 | result_expr.set_id(expr.id()); |
304 | 0 | auto mutable_expr = result_expr.mutable_call_expr(); |
305 | 0 | mutable_expr->set_function(expr.call_expr().function()); |
306 | 0 | if (expr.call_expr().has_target()) { |
307 | 0 | *mutable_expr->mutable_target() = |
308 | 0 | BuildArgForMacroCall(expr.call_expr().target()); |
309 | 0 | } |
310 | 0 | for (const auto& arg : expr.call_expr().args()) { |
311 | | // Iterate the AST from `expr` recursively looking for macros. Because we |
312 | | // are at most starting from the top level macro, this recursion is |
313 | | // bounded by the size of the AST. This means that the depth check on the |
314 | | // AST during parsing will catch recursion overflows before we get to |
315 | | // here. |
316 | 0 | *mutable_expr->mutable_args()->Add() = BuildArgForMacroCall(arg); |
317 | 0 | } |
318 | 0 | return result_expr; |
319 | 0 | } |
320 | 0 | if (expr.has_list_expr()) { |
321 | 0 | Expr result_expr; |
322 | 0 | result_expr.set_id(expr.id()); |
323 | 0 | const auto& list_expr = expr.list_expr(); |
324 | 0 | auto mutable_list_expr = result_expr.mutable_list_expr(); |
325 | 0 | for (const auto& elem : list_expr.elements()) { |
326 | 0 | *mutable_list_expr->mutable_elements()->Add() = |
327 | 0 | BuildArgForMacroCall(elem); |
328 | 0 | } |
329 | 0 | return result_expr; |
330 | 0 | } |
331 | 0 | return expr; |
332 | 0 | } |
333 | | |
334 | | void SourceFactory::AddMacroCall(int64_t macro_id, const Expr& target, |
335 | | const std::vector<Expr>& args, |
336 | 0 | std::string function) { |
337 | 0 | Expr macro_call; |
338 | 0 | auto mutable_macro_call = macro_call.mutable_call_expr(); |
339 | 0 | mutable_macro_call->set_function(function); |
340 | | |
341 | | // Populating empty targets can cause erros when iterating the macro_calls |
342 | | // expressions, such as the expression_printer in testing. |
343 | 0 | if (target.expr_kind_case() != Expr::ExprKindCase::EXPR_KIND_NOT_SET) { |
344 | 0 | Expr expr; |
345 | 0 | if (macro_calls_.find(target.id()) != macro_calls_.end()) { |
346 | 0 | expr.set_id(target.id()); |
347 | 0 | } else { |
348 | 0 | expr = BuildArgForMacroCall(target); |
349 | 0 | } |
350 | 0 | *mutable_macro_call->mutable_target() = expr; |
351 | 0 | } |
352 | |
|
353 | 0 | for (const auto& arg : args) { |
354 | 0 | *mutable_macro_call->mutable_args()->Add() = BuildArgForMacroCall(arg); |
355 | 0 | } |
356 | 0 | macro_calls_.emplace(macro_id, macro_call); |
357 | 0 | } |
358 | | |
359 | | Expr SourceFactory::NewFilterExprForMacro(int64_t macro_id, const Expr& target, |
360 | 851 | const std::vector<Expr>& args) { |
361 | 851 | if (args.empty()) { |
362 | 0 | return Expr(); |
363 | 0 | } |
364 | 851 | if (!args[0].has_ident_expr()) { |
365 | 206 | auto loc = GetSourceLocation(args[0].id()); |
366 | 206 | return ReportError(loc, "argument is not an identifier"); |
367 | 206 | } |
368 | 645 | std::string v = args[0].ident_expr().name(); |
369 | | |
370 | | // traditional variable name assigned to the fold accumulator variable. |
371 | 645 | const std::string AccumulatorName = "__result__"; |
372 | | |
373 | 645 | Expr filter = args[1]; |
374 | 645 | Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName); |
375 | 645 | Expr init = NewListForMacro(macro_id, {}); |
376 | 645 | Expr condition = NewLiteralBoolForMacro(macro_id, true); |
377 | 645 | Expr step = |
378 | 645 | NewGlobalCallForMacro(macro_id, CelOperator::ADD, |
379 | 645 | {accu_expr, NewListForMacro(macro_id, {args[0]})}); |
380 | 645 | step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, |
381 | 645 | {filter, step, accu_expr}); |
382 | 645 | return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, |
383 | 645 | step, accu_expr); |
384 | 851 | } |
385 | | |
386 | | Expr SourceFactory::NewListForMacro(int64_t macro_id, |
387 | 3.80k | const std::vector<Expr>& elems) { |
388 | 3.80k | return NewList(NextMacroId(macro_id), elems); |
389 | 3.80k | } |
390 | | |
391 | | Expr SourceFactory::NewMap( |
392 | 659k | int64_t map_id, const std::vector<Expr::CreateStruct::Entry>& entries) { |
393 | 659k | auto expr = NewExpr(map_id); |
394 | 659k | auto struct_expr = expr.mutable_struct_expr(); |
395 | 659k | std::for_each(entries.begin(), entries.end(), |
396 | 659k | [struct_expr](const Expr::CreateStruct::Entry& e) { |
397 | 492k | struct_expr->add_entries()->CopyFrom(e); |
398 | 492k | }); |
399 | 659k | return expr; |
400 | 659k | } |
401 | | |
402 | | Expr SourceFactory::NewMapForMacro(int64_t macro_id, const Expr& target, |
403 | 1.78k | const std::vector<Expr>& args) { |
404 | 1.78k | if (args.empty()) { |
405 | 0 | return Expr(); |
406 | 0 | } |
407 | 1.78k | if (!args[0].has_ident_expr()) { |
408 | 521 | auto loc = GetSourceLocation(args[0].id()); |
409 | 521 | return ReportError(loc, "argument is not an identifier"); |
410 | 521 | } |
411 | 1.25k | std::string v = args[0].ident_expr().name(); |
412 | | |
413 | 1.25k | Expr fn; |
414 | 1.25k | Expr filter; |
415 | 1.25k | bool has_filter = false; |
416 | 1.25k | if (args.size() == 3) { |
417 | 788 | filter = args[1]; |
418 | 788 | has_filter = true; |
419 | 788 | fn = args[2]; |
420 | 788 | } else { |
421 | 471 | fn = args[1]; |
422 | 471 | } |
423 | | |
424 | | // traditional variable name assigned to the fold accumulator variable. |
425 | 1.25k | const std::string AccumulatorName = "__result__"; |
426 | | |
427 | 1.25k | Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName); |
428 | 1.25k | Expr init = NewListForMacro(macro_id, {}); |
429 | 1.25k | Expr condition = NewLiteralBoolForMacro(macro_id, true); |
430 | 1.25k | Expr step = NewGlobalCallForMacro( |
431 | 1.25k | macro_id, CelOperator::ADD, {accu_expr, NewListForMacro(macro_id, {fn})}); |
432 | 1.25k | if (has_filter) { |
433 | 788 | step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL, |
434 | 788 | {filter, step, accu_expr}); |
435 | 788 | } |
436 | 1.25k | return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition, |
437 | 1.25k | step, accu_expr); |
438 | 1.78k | } |
439 | | |
440 | | Expr::CreateStruct::Entry SourceFactory::NewMapEntry(int64_t entry_id, |
441 | | const Expr& key, |
442 | 492k | const Expr& value) { |
443 | 492k | Expr::CreateStruct::Entry entry; |
444 | 492k | entry.set_id(entry_id); |
445 | 492k | *entry.mutable_map_key() = key; |
446 | 492k | *entry.mutable_value() = value; |
447 | 492k | return entry; |
448 | 492k | } |
449 | | |
450 | | Expr SourceFactory::NewLiteralInt(antlr4::ParserRuleContext* ctx, |
451 | 594k | int64_t value) { |
452 | 594k | Expr expr = NewExpr(ctx); |
453 | 594k | expr.mutable_const_expr()->set_int64_value(value); |
454 | 594k | return expr; |
455 | 594k | } |
456 | | |
457 | 1.20k | Expr SourceFactory::NewLiteralIntForMacro(int64_t macro_id, int64_t value) { |
458 | 1.20k | Expr expr = NewExpr(NextMacroId(macro_id)); |
459 | 1.20k | expr.mutable_const_expr()->set_int64_value(value); |
460 | 1.20k | return expr; |
461 | 1.20k | } |
462 | | |
463 | | Expr SourceFactory::NewLiteralUint(antlr4::ParserRuleContext* ctx, |
464 | 1.04k | uint64_t value) { |
465 | 1.04k | Expr expr = NewExpr(ctx); |
466 | 1.04k | expr.mutable_const_expr()->set_uint64_value(value); |
467 | 1.04k | return expr; |
468 | 1.04k | } |
469 | | |
470 | | Expr SourceFactory::NewLiteralDouble(antlr4::ParserRuleContext* ctx, |
471 | 19.9k | double value) { |
472 | 19.9k | Expr expr = NewExpr(ctx); |
473 | 19.9k | expr.mutable_const_expr()->set_double_value(value); |
474 | 19.9k | return expr; |
475 | 19.9k | } |
476 | | |
477 | | Expr SourceFactory::NewLiteralString(antlr4::ParserRuleContext* ctx, |
478 | 503k | const std::string& s) { |
479 | 503k | Expr expr = NewExpr(ctx); |
480 | 503k | expr.mutable_const_expr()->set_string_value(s); |
481 | 503k | return expr; |
482 | 503k | } |
483 | | |
484 | | Expr SourceFactory::NewLiteralBytes(antlr4::ParserRuleContext* ctx, |
485 | 2.29k | const std::string& b) { |
486 | 2.29k | Expr expr = NewExpr(ctx); |
487 | 2.29k | expr.mutable_const_expr()->set_bytes_value(b); |
488 | 2.29k | return expr; |
489 | 2.29k | } |
490 | | |
491 | 877 | Expr SourceFactory::NewLiteralBool(antlr4::ParserRuleContext* ctx, bool b) { |
492 | 877 | Expr expr = NewExpr(ctx); |
493 | 877 | expr.mutable_const_expr()->set_bool_value(b); |
494 | 877 | return expr; |
495 | 877 | } |
496 | | |
497 | 3.66k | Expr SourceFactory::NewLiteralBoolForMacro(int64_t macro_id, bool b) { |
498 | 3.66k | Expr expr = NewExpr(NextMacroId(macro_id)); |
499 | 3.66k | expr.mutable_const_expr()->set_bool_value(b); |
500 | 3.66k | return expr; |
501 | 3.66k | } |
502 | | |
503 | 259 | Expr SourceFactory::NewLiteralNull(antlr4::ParserRuleContext* ctx) { |
504 | 259 | Expr expr = NewExpr(ctx); |
505 | 259 | expr.mutable_const_expr()->set_null_value(::google::protobuf::NULL_VALUE); |
506 | 259 | return expr; |
507 | 259 | } |
508 | | |
509 | 11.4k | Expr SourceFactory::ReportError(int64_t expr_id, absl::string_view msg) { |
510 | 11.4k | num_errors_ += 1; |
511 | 11.4k | Expr expr = NewExpr(expr_id); |
512 | 11.4k | if (errors_truncated_.size() < kMaxErrorsToReport) { |
513 | 8.28k | errors_truncated_.emplace_back(std::string(msg), positions_.at(expr_id)); |
514 | 8.28k | } |
515 | 11.4k | return expr; |
516 | 11.4k | } |
517 | | |
518 | | Expr SourceFactory::ReportError(antlr4::ParserRuleContext* ctx, |
519 | 11.4k | absl::string_view msg) { |
520 | 11.4k | return ReportError(Id(ctx), msg); |
521 | 11.4k | } |
522 | | |
523 | | Expr SourceFactory::ReportError(int32_t line, int32_t col, |
524 | 1.97M | absl::string_view msg) { |
525 | 1.97M | num_errors_ += 1; |
526 | 1.97M | SourceLocation loc(line, col, /*offset_end=*/-1, line_offsets_); |
527 | 1.97M | if (errors_truncated_.size() < kMaxErrorsToReport) { |
528 | 99.3k | errors_truncated_.emplace_back(std::string(msg), loc); |
529 | 99.3k | } |
530 | 1.97M | return NewExpr(Id(loc)); |
531 | 1.97M | } |
532 | | |
533 | | Expr SourceFactory::ReportError(const SourceFactory::SourceLocation& loc, |
534 | 6.25k | absl::string_view msg) { |
535 | 6.25k | num_errors_ += 1; |
536 | 6.25k | if (errors_truncated_.size() < kMaxErrorsToReport) { |
537 | 4.47k | errors_truncated_.emplace_back(std::string(msg), loc); |
538 | 4.47k | } |
539 | 6.25k | return NewExpr(Id(loc)); |
540 | 6.25k | } |
541 | | |
542 | | std::string SourceFactory::ErrorMessage(absl::string_view description, |
543 | 6.71k | absl::string_view expression) const { |
544 | | // Errors are collected as they are encountered, not by their location within |
545 | | // the source. To have a more stable error message as implementation |
546 | | // details change, we sort the collected errors by their source location |
547 | | // first. |
548 | | |
549 | | // Use pointer arithmetic to avoid making unnecessary copies of Error when |
550 | | // sorting. |
551 | 6.71k | std::vector<const Error*> errors_sorted; |
552 | 6.71k | errors_sorted.reserve(errors_truncated_.size()); |
553 | 112k | for (auto& error : errors_truncated_) { |
554 | 112k | errors_sorted.push_back(&error); |
555 | 112k | } |
556 | 6.71k | std::stable_sort(errors_sorted.begin(), errors_sorted.end(), |
557 | 225k | [](const Error* lhs, const Error* rhs) { |
558 | | // SourceLocation::noLocation uses -1 and we ideally want |
559 | | // those to be last. |
560 | 225k | auto lhs_line = PositiveOrMax(lhs->location.line); |
561 | 225k | auto lhs_col = PositiveOrMax(lhs->location.col); |
562 | 225k | auto rhs_line = PositiveOrMax(rhs->location.line); |
563 | 225k | auto rhs_col = PositiveOrMax(rhs->location.col); |
564 | | |
565 | 225k | return lhs_line < rhs_line || |
566 | 225k | (lhs_line == rhs_line && lhs_col < rhs_col); |
567 | 225k | }); |
568 | | |
569 | | // Build the summary error message using the sorted errors. |
570 | 6.71k | bool errors_truncated = num_errors_ > kMaxErrorsToReport; |
571 | 6.71k | std::vector<std::string> messages; |
572 | 6.71k | messages.reserve( |
573 | 6.71k | errors_sorted.size() + |
574 | 6.71k | errors_truncated); // Reserve space for the transform and an |
575 | | // additional element when truncation occurs. |
576 | 6.71k | std::transform( |
577 | 6.71k | errors_sorted.begin(), errors_sorted.end(), std::back_inserter(messages), |
578 | 112k | [this, &description, &expression](const SourceFactory::Error* error) { |
579 | 112k | std::string s = absl::StrFormat( |
580 | 112k | "ERROR: %s:%zu:%zu: %s", description, error->location.line, |
581 | | // add one to the 0-based column |
582 | 112k | error->location.col + 1, error->message); |
583 | 112k | std::string snippet = GetSourceLine(error->location.line, expression); |
584 | 112k | std::string::size_type pos = 0; |
585 | 978k | while ((pos = snippet.find('\t', pos)) != std::string::npos) { |
586 | 866k | snippet.replace(pos, 1, " "); |
587 | 866k | } |
588 | 112k | std::string src_line = "\n | " + snippet; |
589 | 112k | std::string ind_line = "\n | "; |
590 | 162M | for (int i = 0; i < error->location.col; ++i) { |
591 | 162M | ind_line += "."; |
592 | 162M | } |
593 | 112k | ind_line += "^"; |
594 | 112k | s += src_line + ind_line; |
595 | 112k | return s; |
596 | 112k | }); |
597 | 6.71k | if (errors_truncated) { |
598 | 459 | messages.emplace_back(absl::StrCat(num_errors_ - kMaxErrorsToReport, |
599 | 459 | " more errors were truncated.")); |
600 | 459 | } |
601 | 6.71k | return absl::StrJoin(messages, "\n"); |
602 | 6.71k | } |
603 | | |
604 | 1.10M | bool SourceFactory::IsReserved(absl::string_view ident_name) { |
605 | 1.10M | static const auto* reserved_words = new absl::flat_hash_set<std::string>( |
606 | 1.10M | {"as", "break", "const", "continue", "else", "false", "for", |
607 | 1.10M | "function", "if", "import", "in", "let", "loop", "package", |
608 | 1.10M | "namespace", "null", "return", "true", "var", "void", "while"}); |
609 | 1.10M | return reserved_words->find(ident_name) != reserved_words->end(); |
610 | 1.10M | } |
611 | | |
612 | 2.50k | google::api::expr::v1alpha1::SourceInfo SourceFactory::source_info() const { |
613 | 2.50k | google::api::expr::v1alpha1::SourceInfo source_info; |
614 | 2.50k | source_info.set_location("<input>"); |
615 | 2.50k | auto positions = source_info.mutable_positions(); |
616 | 2.50k | std::for_each(positions_.begin(), positions_.end(), |
617 | 2.32M | [positions](const std::pair<int64_t, SourceLocation>& loc) { |
618 | 2.32M | positions->insert({loc.first, loc.second.offset}); |
619 | 2.32M | }); |
620 | 2.50k | std::for_each( |
621 | 2.50k | line_offsets_.begin(), line_offsets_.end(), |
622 | 671k | [&source_info](int32_t offset) { source_info.add_line_offsets(offset); }); |
623 | 2.50k | std::for_each(macro_calls_.begin(), macro_calls_.end(), |
624 | 2.50k | [&source_info](const std::pair<int64_t, Expr>& macro_call) { |
625 | 0 | source_info.mutable_macro_calls()->insert( |
626 | 0 | {macro_call.first, macro_call.second}); |
627 | 0 | }); |
628 | 2.50k | return source_info; |
629 | 2.50k | } |
630 | | |
631 | 2.50k | EnrichedSourceInfo SourceFactory::enriched_source_info() const { |
632 | 2.50k | std::map<int64_t, std::pair<int32_t, int32_t>> offset; |
633 | 2.50k | std::for_each( |
634 | 2.50k | positions_.begin(), positions_.end(), |
635 | 2.32M | [&offset](const std::pair<int64_t, SourceLocation>& loc) { |
636 | 2.32M | offset.insert({loc.first, {loc.second.offset, loc.second.offset_end}}); |
637 | 2.32M | }); |
638 | 2.50k | return EnrichedSourceInfo(std::move(offset)); |
639 | 2.50k | } |
640 | | |
641 | 9.21k | void SourceFactory::CalcLineOffsets(absl::string_view expression) { |
642 | 9.21k | std::vector<absl::string_view> lines = absl::StrSplit(expression, '\n'); |
643 | 9.21k | int offset = 0; |
644 | 9.21k | line_offsets_.resize(lines.size()); |
645 | 3.67M | for (size_t i = 0; i < lines.size(); ++i) { |
646 | 3.66M | offset += lines[i].size() + 1; |
647 | 3.66M | line_offsets_[i] = offset; |
648 | 3.66M | } |
649 | 9.21k | } |
650 | | |
651 | 223k | absl::optional<int32_t> SourceFactory::FindLineOffset(int32_t line) const { |
652 | | // note that err.line is 1-based, |
653 | | // while we need the 0-based index |
654 | 223k | if (line == 1) { |
655 | 92.9k | return 0; |
656 | 130k | } else if (line > 1 && line <= static_cast<int32_t>(line_offsets_.size())) { |
657 | 38.4k | return line_offsets_[line - 2]; |
658 | 38.4k | } |
659 | 91.6k | return {}; |
660 | 223k | } |
661 | | |
662 | | std::string SourceFactory::GetSourceLine(int32_t line, |
663 | 112k | absl::string_view expression) const { |
664 | 112k | auto char_start = FindLineOffset(line); |
665 | 112k | if (!char_start) { |
666 | 1.12k | return ""; |
667 | 1.12k | } |
668 | 110k | auto char_end = FindLineOffset(line + 1); |
669 | 110k | if (char_end) { |
670 | 20.4k | return std::string( |
671 | 20.4k | expression.substr(*char_start, *char_end - *char_end - 1)); |
672 | 90.5k | } else { |
673 | 90.5k | return std::string(expression.substr(*char_start)); |
674 | 90.5k | } |
675 | 110k | } |
676 | | |
677 | | } // namespace google::api::expr::parser |