Coverage Report

Created: 2023-05-25 06:18

/proc/self/cwd/parser/source_factory.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "parser/source_factory.h"
16
17
#include <algorithm>
18
#include <cstdint>
19
#include <limits>
20
#include <string>
21
#include <utility>
22
23
#include "google/protobuf/struct.pb.h"
24
#include "absl/container/flat_hash_set.h"
25
#include "absl/memory/memory.h"
26
#include "absl/strings/numbers.h"
27
#include "absl/strings/str_format.h"
28
#include "absl/strings/str_join.h"
29
#include "absl/strings/str_split.h"
30
#include "common/operators.h"
31
32
namespace google::api::expr::parser {
33
namespace {
34
35
const int kMaxErrorsToReport = 100;
36
37
using common::CelOperator;
38
using google::api::expr::v1alpha1::Expr;
39
40
902k
int32_t PositiveOrMax(int32_t value) {
41
902k
  return value >= 0 ? value : std::numeric_limits<int32_t>::max();
42
902k
}
43
44
}  // namespace
45
46
SourceFactory::SourceFactory(absl::string_view expression)
47
9.21k
    : next_id_(1), num_errors_(0) {
48
9.21k
  CalcLineOffsets(expression);
49
9.21k
}
50
51
4.23M
int64_t SourceFactory::Id(const antlr4::Token* token) {
52
4.23M
  int64_t new_id = next_id_;
53
4.23M
  positions_.emplace(
54
4.23M
      new_id, SourceLocation{
55
4.23M
                  static_cast<int32_t>(token->getLine()),
56
4.23M
                  static_cast<int32_t>(token->getCharPositionInLine()),
57
4.23M
                  static_cast<int32_t>(token->getStopIndex()), line_offsets_});
58
4.23M
  next_id_ += 1;
59
4.23M
  return new_id;
60
4.23M
}
61
62
const SourceFactory::SourceLocation& SourceFactory::GetSourceLocation(
63
32.2k
    int64_t id) const {
64
32.2k
  return positions_.at(id);
65
32.2k
}
66
67
1.81k
const SourceFactory::SourceLocation SourceFactory::NoLocation() {
68
1.81k
  return SourceLocation(-1, -1, -1, {});
69
1.81k
}
70
71
1.13M
int64_t SourceFactory::Id(antlr4::ParserRuleContext* ctx) {
72
1.13M
  return Id(ctx->getStart());
73
1.13M
}
74
75
2.00M
int64_t SourceFactory::Id(const SourceLocation& location) {
76
2.00M
  int64_t new_id = next_id_;
77
2.00M
  positions_.emplace(new_id, location);
78
2.00M
  next_id_ += 1;
79
2.00M
  return new_id;
80
2.00M
}
81
82
27.7k
int64_t SourceFactory::NextMacroId(int64_t macro_id) {
83
27.7k
  return Id(GetSourceLocation(macro_id));
84
27.7k
}
85
86
5.74M
Expr SourceFactory::NewExpr(int64_t id) {
87
5.74M
  Expr expr;
88
5.74M
  expr.set_id(id);
89
5.74M
  return expr;
90
5.74M
}
91
92
1.12M
Expr SourceFactory::NewExpr(antlr4::ParserRuleContext* ctx) {
93
1.12M
  return NewExpr(Id(ctx));
94
1.12M
}
95
96
1.10M
Expr SourceFactory::NewExpr(const antlr4::Token* token) {
97
1.10M
  return NewExpr(Id(token));
98
1.10M
}
99
100
Expr SourceFactory::NewGlobalCall(int64_t id, const std::string& function,
101
827k
                                  const std::vector<Expr>& args) {
102
827k
  Expr expr = NewExpr(id);
103
827k
  auto call_expr = expr.mutable_call_expr();
104
827k
  call_expr->set_function(function);
105
827k
  std::for_each(args.begin(), args.end(),
106
1.60M
                [&call_expr](const Expr& e) { *call_expr->add_args() = e; });
107
827k
  return expr;
108
827k
}
109
110
Expr SourceFactory::NewGlobalCallForMacro(int64_t macro_id,
111
                                          const std::string& function,
112
7.87k
                                          const std::vector<Expr>& args) {
113
7.87k
  return NewGlobalCall(NextMacroId(macro_id), function, args);
114
7.87k
}
115
116
Expr SourceFactory::NewReceiverCall(int64_t id, const std::string& function,
117
                                    const Expr& target,
118
2.96k
                                    const std::vector<Expr>& args) {
119
2.96k
  Expr expr = NewExpr(id);
120
2.96k
  auto call_expr = expr.mutable_call_expr();
121
2.96k
  call_expr->set_function(function);
122
2.96k
  *call_expr->mutable_target() = target;
123
2.96k
  std::for_each(args.begin(), args.end(),
124
429k
                [&call_expr](const Expr& e) { *call_expr->add_args() = e; });
125
2.96k
  return expr;
126
2.96k
}
127
128
Expr SourceFactory::NewIdent(const antlr4::Token* token,
129
1.09M
                             const std::string& ident_name) {
130
1.09M
  Expr expr = NewExpr(token);
131
1.09M
  expr.mutable_ident_expr()->set_name(ident_name);
132
1.09M
  return expr;
133
1.09M
}
134
135
Expr SourceFactory::NewIdentForMacro(int64_t macro_id,
136
7.18k
                                     const std::string& ident_name) {
137
7.18k
  Expr expr = NewExpr(NextMacroId(macro_id));
138
7.18k
  expr.mutable_ident_expr()->set_name(ident_name);
139
7.18k
  return expr;
140
7.18k
}
141
142
Expr SourceFactory::NewSelect(
143
    ::cel_parser_internal::CelParser::SelectOrCallContext* ctx, Expr& operand,
144
8.66k
    const std::string& field) {
145
8.66k
  Expr expr = NewExpr(ctx->op);
146
8.66k
  auto select_expr = expr.mutable_select_expr();
147
8.66k
  *select_expr->mutable_operand() = operand;
148
8.66k
  select_expr->set_field(field);
149
8.66k
  return expr;
150
8.66k
}
151
152
Expr SourceFactory::NewSelectForMacro(int64_t macro_id, const Expr& operand,
153
0
                                      const std::string& field) {
154
0
  Expr expr = NewExpr(NextMacroId(macro_id));
155
0
  auto select_expr = expr.mutable_select_expr();
156
0
  *select_expr->mutable_operand() = operand;
157
0
  select_expr->set_field(field);
158
0
  return expr;
159
0
}
160
161
Expr SourceFactory::NewPresenceTestForMacro(int64_t macro_id,
162
                                            const Expr& operand,
163
379
                                            const std::string& field) {
164
379
  Expr expr = NewExpr(NextMacroId(macro_id));
165
379
  auto select_expr = expr.mutable_select_expr();
166
379
  *select_expr->mutable_operand() = operand;
167
379
  select_expr->set_field(field);
168
379
  select_expr->set_test_only(true);
169
379
  return expr;
170
379
}
171
172
Expr SourceFactory::NewObject(
173
    int64_t obj_id, const std::string& type_name,
174
3.46k
    const std::vector<Expr::CreateStruct::Entry>& entries) {
175
3.46k
  auto expr = NewExpr(obj_id);
176
3.46k
  auto struct_expr = expr.mutable_struct_expr();
177
3.46k
  struct_expr->set_message_name(type_name);
178
3.46k
  std::for_each(entries.begin(), entries.end(),
179
3.46k
                [struct_expr](const Expr::CreateStruct::Entry& e) {
180
3.33k
                  struct_expr->add_entries()->CopyFrom(e);
181
3.33k
                });
182
3.46k
  return expr;
183
3.46k
}
184
185
Expr::CreateStruct::Entry SourceFactory::NewObjectField(
186
3.19k
    int64_t field_id, const std::string& field, const Expr& value) {
187
3.19k
  Expr::CreateStruct::Entry entry;
188
3.19k
  entry.set_id(field_id);
189
3.19k
  entry.set_field_key(field);
190
3.19k
  *entry.mutable_value() = value;
191
3.19k
  return entry;
192
3.19k
}
193
194
Expr SourceFactory::NewComprehension(int64_t id, const std::string& iter_var,
195
                                     const Expr& iter_range,
196
                                     const std::string& accu_var,
197
                                     const Expr& accu_init,
198
                                     const Expr& condition, const Expr& step,
199
3.66k
                                     const Expr& result) {
200
3.66k
  Expr expr = NewExpr(id);
201
3.66k
  auto comp_expr = expr.mutable_comprehension_expr();
202
3.66k
  comp_expr->set_iter_var(iter_var);
203
3.66k
  *comp_expr->mutable_iter_range() = iter_range;
204
3.66k
  comp_expr->set_accu_var(accu_var);
205
3.66k
  *comp_expr->mutable_accu_init() = accu_init;
206
3.66k
  *comp_expr->mutable_loop_condition() = condition;
207
3.66k
  *comp_expr->mutable_loop_step() = step;
208
3.66k
  *comp_expr->mutable_result() = result;
209
3.66k
  return expr;
210
3.66k
}
211
212
Expr SourceFactory::FoldForMacro(int64_t macro_id, const std::string& iter_var,
213
                                 const Expr& iter_range,
214
                                 const std::string& accu_var,
215
                                 const Expr& accu_init, const Expr& condition,
216
3.66k
                                 const Expr& step, const Expr& result) {
217
3.66k
  return NewComprehension(NextMacroId(macro_id), iter_var, iter_range, accu_var,
218
3.66k
                          accu_init, condition, step, result);
219
3.66k
}
220
221
9.33k
Expr SourceFactory::NewList(int64_t list_id, const std::vector<Expr>& elems) {
222
9.33k
  auto expr = NewExpr(list_id);
223
9.33k
  auto list_expr = expr.mutable_list_expr();
224
9.33k
  std::for_each(elems.begin(), elems.end(),
225
694k
                [list_expr](const Expr& e) { *list_expr->add_elements() = e; });
226
9.33k
  return expr;
227
9.33k
}
228
229
Expr SourceFactory::NewQuantifierExprForMacro(
230
    SourceFactory::QuantifierKind kind, int64_t macro_id, const Expr& target,
231
2.24k
    const std::vector<Expr>& args) {
232
2.24k
  if (args.empty()) {
233
0
    return Expr();
234
0
  }
235
2.24k
  if (!args[0].has_ident_expr()) {
236
485
    auto loc = GetSourceLocation(args[0].id());
237
485
    return ReportError(loc, "argument must be a simple name");
238
485
  }
239
1.76k
  std::string v = args[0].ident_expr().name();
240
241
  // traditional variable name assigned to the fold accumulator variable.
242
1.76k
  const std::string AccumulatorName = "__result__";
243
244
5.28k
  auto accu_ident = [this, &macro_id, &AccumulatorName]() {
245
5.28k
    return NewIdentForMacro(macro_id, AccumulatorName);
246
5.28k
  };
247
248
1.76k
  Expr init;
249
1.76k
  Expr condition;
250
1.76k
  Expr step;
251
1.76k
  Expr result;
252
1.76k
  switch (kind) {
253
741
    case QUANTIFIER_ALL:
254
741
      init = NewLiteralBoolForMacro(macro_id, true);
255
741
      condition = NewGlobalCallForMacro(
256
741
          macro_id, CelOperator::NOT_STRICTLY_FALSE, {accu_ident()});
257
741
      step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_AND,
258
741
                                   {accu_ident(), args[1]});
259
741
      result = accu_ident();
260
741
      break;
261
262
418
    case QUANTIFIER_EXISTS:
263
418
      init = NewLiteralBoolForMacro(macro_id, false);
264
418
      condition = NewGlobalCallForMacro(
265
418
          macro_id, CelOperator::NOT_STRICTLY_FALSE,
266
418
          {NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_NOT,
267
418
                                 {accu_ident()})});
268
418
      step = NewGlobalCallForMacro(macro_id, CelOperator::LOGICAL_OR,
269
418
                                   {accu_ident(), args[1]});
270
418
      result = accu_ident();
271
418
      break;
272
273
601
    case QUANTIFIER_EXISTS_ONE: {
274
601
      Expr zero_expr = NewLiteralIntForMacro(macro_id, 0);
275
601
      Expr one_expr = NewLiteralIntForMacro(macro_id, 1);
276
601
      init = zero_expr;
277
601
      condition = NewLiteralBoolForMacro(macro_id, true);
278
601
      step = NewGlobalCallForMacro(
279
601
          macro_id, CelOperator::CONDITIONAL,
280
601
          {args[1],
281
601
           NewGlobalCallForMacro(macro_id, CelOperator::ADD,
282
601
                                 {accu_ident(), one_expr}),
283
601
           accu_ident()});
284
601
      result = NewGlobalCallForMacro(macro_id, CelOperator::EQUALS,
285
601
                                     {accu_ident(), one_expr});
286
601
      break;
287
0
    }
288
1.76k
  }
289
1.76k
  return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition,
290
1.76k
                      step, result);
291
1.76k
}
292
293
0
Expr SourceFactory::BuildArgForMacroCall(const Expr& expr) {
294
0
  if (macro_calls_.find(expr.id()) != macro_calls_.end()) {
295
0
    Expr result_expr;
296
0
    result_expr.set_id(expr.id());
297
0
    return result_expr;
298
0
  }
299
  // Call expression could have args or sub-args that are also macros found in
300
  // macro_calls.
301
0
  if (expr.has_call_expr()) {
302
0
    Expr result_expr;
303
0
    result_expr.set_id(expr.id());
304
0
    auto mutable_expr = result_expr.mutable_call_expr();
305
0
    mutable_expr->set_function(expr.call_expr().function());
306
0
    if (expr.call_expr().has_target()) {
307
0
      *mutable_expr->mutable_target() =
308
0
          BuildArgForMacroCall(expr.call_expr().target());
309
0
    }
310
0
    for (const auto& arg : expr.call_expr().args()) {
311
      // Iterate the AST from `expr` recursively looking for macros. Because we
312
      // are at most starting from the top level macro, this recursion is
313
      // bounded by the size of the AST. This means that the depth check on the
314
      // AST during parsing will catch recursion overflows before we get to
315
      // here.
316
0
      *mutable_expr->mutable_args()->Add() = BuildArgForMacroCall(arg);
317
0
    }
318
0
    return result_expr;
319
0
  }
320
0
  if (expr.has_list_expr()) {
321
0
    Expr result_expr;
322
0
    result_expr.set_id(expr.id());
323
0
    const auto& list_expr = expr.list_expr();
324
0
    auto mutable_list_expr = result_expr.mutable_list_expr();
325
0
    for (const auto& elem : list_expr.elements()) {
326
0
      *mutable_list_expr->mutable_elements()->Add() =
327
0
          BuildArgForMacroCall(elem);
328
0
    }
329
0
    return result_expr;
330
0
  }
331
0
  return expr;
332
0
}
333
334
void SourceFactory::AddMacroCall(int64_t macro_id, const Expr& target,
335
                                 const std::vector<Expr>& args,
336
0
                                 std::string function) {
337
0
  Expr macro_call;
338
0
  auto mutable_macro_call = macro_call.mutable_call_expr();
339
0
  mutable_macro_call->set_function(function);
340
341
  // Populating empty targets can cause erros when iterating the macro_calls
342
  // expressions, such as the expression_printer in testing.
343
0
  if (target.expr_kind_case() != Expr::ExprKindCase::EXPR_KIND_NOT_SET) {
344
0
    Expr expr;
345
0
    if (macro_calls_.find(target.id()) != macro_calls_.end()) {
346
0
      expr.set_id(target.id());
347
0
    } else {
348
0
      expr = BuildArgForMacroCall(target);
349
0
    }
350
0
    *mutable_macro_call->mutable_target() = expr;
351
0
  }
352
353
0
  for (const auto& arg : args) {
354
0
    *mutable_macro_call->mutable_args()->Add() = BuildArgForMacroCall(arg);
355
0
  }
356
0
  macro_calls_.emplace(macro_id, macro_call);
357
0
}
358
359
Expr SourceFactory::NewFilterExprForMacro(int64_t macro_id, const Expr& target,
360
851
                                          const std::vector<Expr>& args) {
361
851
  if (args.empty()) {
362
0
    return Expr();
363
0
  }
364
851
  if (!args[0].has_ident_expr()) {
365
206
    auto loc = GetSourceLocation(args[0].id());
366
206
    return ReportError(loc, "argument is not an identifier");
367
206
  }
368
645
  std::string v = args[0].ident_expr().name();
369
370
  // traditional variable name assigned to the fold accumulator variable.
371
645
  const std::string AccumulatorName = "__result__";
372
373
645
  Expr filter = args[1];
374
645
  Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName);
375
645
  Expr init = NewListForMacro(macro_id, {});
376
645
  Expr condition = NewLiteralBoolForMacro(macro_id, true);
377
645
  Expr step =
378
645
      NewGlobalCallForMacro(macro_id, CelOperator::ADD,
379
645
                            {accu_expr, NewListForMacro(macro_id, {args[0]})});
380
645
  step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL,
381
645
                               {filter, step, accu_expr});
382
645
  return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition,
383
645
                      step, accu_expr);
384
851
}
385
386
Expr SourceFactory::NewListForMacro(int64_t macro_id,
387
3.80k
                                    const std::vector<Expr>& elems) {
388
3.80k
  return NewList(NextMacroId(macro_id), elems);
389
3.80k
}
390
391
Expr SourceFactory::NewMap(
392
659k
    int64_t map_id, const std::vector<Expr::CreateStruct::Entry>& entries) {
393
659k
  auto expr = NewExpr(map_id);
394
659k
  auto struct_expr = expr.mutable_struct_expr();
395
659k
  std::for_each(entries.begin(), entries.end(),
396
659k
                [struct_expr](const Expr::CreateStruct::Entry& e) {
397
492k
                  struct_expr->add_entries()->CopyFrom(e);
398
492k
                });
399
659k
  return expr;
400
659k
}
401
402
Expr SourceFactory::NewMapForMacro(int64_t macro_id, const Expr& target,
403
1.78k
                                   const std::vector<Expr>& args) {
404
1.78k
  if (args.empty()) {
405
0
    return Expr();
406
0
  }
407
1.78k
  if (!args[0].has_ident_expr()) {
408
521
    auto loc = GetSourceLocation(args[0].id());
409
521
    return ReportError(loc, "argument is not an identifier");
410
521
  }
411
1.25k
  std::string v = args[0].ident_expr().name();
412
413
1.25k
  Expr fn;
414
1.25k
  Expr filter;
415
1.25k
  bool has_filter = false;
416
1.25k
  if (args.size() == 3) {
417
788
    filter = args[1];
418
788
    has_filter = true;
419
788
    fn = args[2];
420
788
  } else {
421
471
    fn = args[1];
422
471
  }
423
424
  // traditional variable name assigned to the fold accumulator variable.
425
1.25k
  const std::string AccumulatorName = "__result__";
426
427
1.25k
  Expr accu_expr = NewIdentForMacro(macro_id, AccumulatorName);
428
1.25k
  Expr init = NewListForMacro(macro_id, {});
429
1.25k
  Expr condition = NewLiteralBoolForMacro(macro_id, true);
430
1.25k
  Expr step = NewGlobalCallForMacro(
431
1.25k
      macro_id, CelOperator::ADD, {accu_expr, NewListForMacro(macro_id, {fn})});
432
1.25k
  if (has_filter) {
433
788
    step = NewGlobalCallForMacro(macro_id, CelOperator::CONDITIONAL,
434
788
                                 {filter, step, accu_expr});
435
788
  }
436
1.25k
  return FoldForMacro(macro_id, v, target, AccumulatorName, init, condition,
437
1.25k
                      step, accu_expr);
438
1.78k
}
439
440
Expr::CreateStruct::Entry SourceFactory::NewMapEntry(int64_t entry_id,
441
                                                     const Expr& key,
442
492k
                                                     const Expr& value) {
443
492k
  Expr::CreateStruct::Entry entry;
444
492k
  entry.set_id(entry_id);
445
492k
  *entry.mutable_map_key() = key;
446
492k
  *entry.mutable_value() = value;
447
492k
  return entry;
448
492k
}
449
450
Expr SourceFactory::NewLiteralInt(antlr4::ParserRuleContext* ctx,
451
594k
                                  int64_t value) {
452
594k
  Expr expr = NewExpr(ctx);
453
594k
  expr.mutable_const_expr()->set_int64_value(value);
454
594k
  return expr;
455
594k
}
456
457
1.20k
Expr SourceFactory::NewLiteralIntForMacro(int64_t macro_id, int64_t value) {
458
1.20k
  Expr expr = NewExpr(NextMacroId(macro_id));
459
1.20k
  expr.mutable_const_expr()->set_int64_value(value);
460
1.20k
  return expr;
461
1.20k
}
462
463
Expr SourceFactory::NewLiteralUint(antlr4::ParserRuleContext* ctx,
464
1.04k
                                   uint64_t value) {
465
1.04k
  Expr expr = NewExpr(ctx);
466
1.04k
  expr.mutable_const_expr()->set_uint64_value(value);
467
1.04k
  return expr;
468
1.04k
}
469
470
Expr SourceFactory::NewLiteralDouble(antlr4::ParserRuleContext* ctx,
471
19.9k
                                     double value) {
472
19.9k
  Expr expr = NewExpr(ctx);
473
19.9k
  expr.mutable_const_expr()->set_double_value(value);
474
19.9k
  return expr;
475
19.9k
}
476
477
Expr SourceFactory::NewLiteralString(antlr4::ParserRuleContext* ctx,
478
503k
                                     const std::string& s) {
479
503k
  Expr expr = NewExpr(ctx);
480
503k
  expr.mutable_const_expr()->set_string_value(s);
481
503k
  return expr;
482
503k
}
483
484
Expr SourceFactory::NewLiteralBytes(antlr4::ParserRuleContext* ctx,
485
2.29k
                                    const std::string& b) {
486
2.29k
  Expr expr = NewExpr(ctx);
487
2.29k
  expr.mutable_const_expr()->set_bytes_value(b);
488
2.29k
  return expr;
489
2.29k
}
490
491
877
Expr SourceFactory::NewLiteralBool(antlr4::ParserRuleContext* ctx, bool b) {
492
877
  Expr expr = NewExpr(ctx);
493
877
  expr.mutable_const_expr()->set_bool_value(b);
494
877
  return expr;
495
877
}
496
497
3.66k
Expr SourceFactory::NewLiteralBoolForMacro(int64_t macro_id, bool b) {
498
3.66k
  Expr expr = NewExpr(NextMacroId(macro_id));
499
3.66k
  expr.mutable_const_expr()->set_bool_value(b);
500
3.66k
  return expr;
501
3.66k
}
502
503
259
Expr SourceFactory::NewLiteralNull(antlr4::ParserRuleContext* ctx) {
504
259
  Expr expr = NewExpr(ctx);
505
259
  expr.mutable_const_expr()->set_null_value(::google::protobuf::NULL_VALUE);
506
259
  return expr;
507
259
}
508
509
11.4k
Expr SourceFactory::ReportError(int64_t expr_id, absl::string_view msg) {
510
11.4k
  num_errors_ += 1;
511
11.4k
  Expr expr = NewExpr(expr_id);
512
11.4k
  if (errors_truncated_.size() < kMaxErrorsToReport) {
513
8.28k
    errors_truncated_.emplace_back(std::string(msg), positions_.at(expr_id));
514
8.28k
  }
515
11.4k
  return expr;
516
11.4k
}
517
518
Expr SourceFactory::ReportError(antlr4::ParserRuleContext* ctx,
519
11.4k
                                absl::string_view msg) {
520
11.4k
  return ReportError(Id(ctx), msg);
521
11.4k
}
522
523
Expr SourceFactory::ReportError(int32_t line, int32_t col,
524
1.97M
                                absl::string_view msg) {
525
1.97M
  num_errors_ += 1;
526
1.97M
  SourceLocation loc(line, col, /*offset_end=*/-1, line_offsets_);
527
1.97M
  if (errors_truncated_.size() < kMaxErrorsToReport) {
528
99.3k
    errors_truncated_.emplace_back(std::string(msg), loc);
529
99.3k
  }
530
1.97M
  return NewExpr(Id(loc));
531
1.97M
}
532
533
Expr SourceFactory::ReportError(const SourceFactory::SourceLocation& loc,
534
6.25k
                                absl::string_view msg) {
535
6.25k
  num_errors_ += 1;
536
6.25k
  if (errors_truncated_.size() < kMaxErrorsToReport) {
537
4.47k
    errors_truncated_.emplace_back(std::string(msg), loc);
538
4.47k
  }
539
6.25k
  return NewExpr(Id(loc));
540
6.25k
}
541
542
std::string SourceFactory::ErrorMessage(absl::string_view description,
543
6.71k
                                        absl::string_view expression) const {
544
  // Errors are collected as they are encountered, not by their location within
545
  // the source. To have a more stable error message as implementation
546
  // details change, we sort the collected errors by their source location
547
  // first.
548
549
  // Use pointer arithmetic to avoid making unnecessary copies of Error when
550
  // sorting.
551
6.71k
  std::vector<const Error*> errors_sorted;
552
6.71k
  errors_sorted.reserve(errors_truncated_.size());
553
112k
  for (auto& error : errors_truncated_) {
554
112k
    errors_sorted.push_back(&error);
555
112k
  }
556
6.71k
  std::stable_sort(errors_sorted.begin(), errors_sorted.end(),
557
225k
                   [](const Error* lhs, const Error* rhs) {
558
                     // SourceLocation::noLocation uses -1 and we ideally want
559
                     // those to be last.
560
225k
                     auto lhs_line = PositiveOrMax(lhs->location.line);
561
225k
                     auto lhs_col = PositiveOrMax(lhs->location.col);
562
225k
                     auto rhs_line = PositiveOrMax(rhs->location.line);
563
225k
                     auto rhs_col = PositiveOrMax(rhs->location.col);
564
565
225k
                     return lhs_line < rhs_line ||
566
225k
                            (lhs_line == rhs_line && lhs_col < rhs_col);
567
225k
                   });
568
569
  // Build the summary error message using the sorted errors.
570
6.71k
  bool errors_truncated = num_errors_ > kMaxErrorsToReport;
571
6.71k
  std::vector<std::string> messages;
572
6.71k
  messages.reserve(
573
6.71k
      errors_sorted.size() +
574
6.71k
      errors_truncated);  // Reserve space for the transform and an
575
                          // additional element when truncation occurs.
576
6.71k
  std::transform(
577
6.71k
      errors_sorted.begin(), errors_sorted.end(), std::back_inserter(messages),
578
112k
      [this, &description, &expression](const SourceFactory::Error* error) {
579
112k
        std::string s = absl::StrFormat(
580
112k
            "ERROR: %s:%zu:%zu: %s", description, error->location.line,
581
            // add one to the 0-based column
582
112k
            error->location.col + 1, error->message);
583
112k
        std::string snippet = GetSourceLine(error->location.line, expression);
584
112k
        std::string::size_type pos = 0;
585
978k
        while ((pos = snippet.find('\t', pos)) != std::string::npos) {
586
866k
          snippet.replace(pos, 1, " ");
587
866k
        }
588
112k
        std::string src_line = "\n | " + snippet;
589
112k
        std::string ind_line = "\n | ";
590
162M
        for (int i = 0; i < error->location.col; ++i) {
591
162M
          ind_line += ".";
592
162M
        }
593
112k
        ind_line += "^";
594
112k
        s += src_line + ind_line;
595
112k
        return s;
596
112k
      });
597
6.71k
  if (errors_truncated) {
598
459
    messages.emplace_back(absl::StrCat(num_errors_ - kMaxErrorsToReport,
599
459
                                       " more errors were truncated."));
600
459
  }
601
6.71k
  return absl::StrJoin(messages, "\n");
602
6.71k
}
603
604
1.10M
bool SourceFactory::IsReserved(absl::string_view ident_name) {
605
1.10M
  static const auto* reserved_words = new absl::flat_hash_set<std::string>(
606
1.10M
      {"as",        "break", "const",  "continue", "else", "false", "for",
607
1.10M
       "function",  "if",    "import", "in",       "let",  "loop",  "package",
608
1.10M
       "namespace", "null",  "return", "true",     "var",  "void",  "while"});
609
1.10M
  return reserved_words->find(ident_name) != reserved_words->end();
610
1.10M
}
611
612
2.50k
google::api::expr::v1alpha1::SourceInfo SourceFactory::source_info() const {
613
2.50k
  google::api::expr::v1alpha1::SourceInfo source_info;
614
2.50k
  source_info.set_location("<input>");
615
2.50k
  auto positions = source_info.mutable_positions();
616
2.50k
  std::for_each(positions_.begin(), positions_.end(),
617
2.32M
                [positions](const std::pair<int64_t, SourceLocation>& loc) {
618
2.32M
                  positions->insert({loc.first, loc.second.offset});
619
2.32M
                });
620
2.50k
  std::for_each(
621
2.50k
      line_offsets_.begin(), line_offsets_.end(),
622
671k
      [&source_info](int32_t offset) { source_info.add_line_offsets(offset); });
623
2.50k
  std::for_each(macro_calls_.begin(), macro_calls_.end(),
624
2.50k
                [&source_info](const std::pair<int64_t, Expr>& macro_call) {
625
0
                  source_info.mutable_macro_calls()->insert(
626
0
                      {macro_call.first, macro_call.second});
627
0
                });
628
2.50k
  return source_info;
629
2.50k
}
630
631
2.50k
EnrichedSourceInfo SourceFactory::enriched_source_info() const {
632
2.50k
  std::map<int64_t, std::pair<int32_t, int32_t>> offset;
633
2.50k
  std::for_each(
634
2.50k
      positions_.begin(), positions_.end(),
635
2.32M
      [&offset](const std::pair<int64_t, SourceLocation>& loc) {
636
2.32M
        offset.insert({loc.first, {loc.second.offset, loc.second.offset_end}});
637
2.32M
      });
638
2.50k
  return EnrichedSourceInfo(std::move(offset));
639
2.50k
}
640
641
9.21k
void SourceFactory::CalcLineOffsets(absl::string_view expression) {
642
9.21k
  std::vector<absl::string_view> lines = absl::StrSplit(expression, '\n');
643
9.21k
  int offset = 0;
644
9.21k
  line_offsets_.resize(lines.size());
645
3.67M
  for (size_t i = 0; i < lines.size(); ++i) {
646
3.66M
    offset += lines[i].size() + 1;
647
3.66M
    line_offsets_[i] = offset;
648
3.66M
  }
649
9.21k
}
650
651
223k
absl::optional<int32_t> SourceFactory::FindLineOffset(int32_t line) const {
652
  // note that err.line is 1-based,
653
  // while we need the 0-based index
654
223k
  if (line == 1) {
655
92.9k
    return 0;
656
130k
  } else if (line > 1 && line <= static_cast<int32_t>(line_offsets_.size())) {
657
38.4k
    return line_offsets_[line - 2];
658
38.4k
  }
659
91.6k
  return {};
660
223k
}
661
662
std::string SourceFactory::GetSourceLine(int32_t line,
663
112k
                                         absl::string_view expression) const {
664
112k
  auto char_start = FindLineOffset(line);
665
112k
  if (!char_start) {
666
1.12k
    return "";
667
1.12k
  }
668
110k
  auto char_end = FindLineOffset(line + 1);
669
110k
  if (char_end) {
670
20.4k
    return std::string(
671
20.4k
        expression.substr(*char_start, *char_end - *char_end - 1));
672
90.5k
  } else {
673
90.5k
    return std::string(expression.substr(*char_start));
674
90.5k
  }
675
110k
}
676
677
}  // namespace google::api::expr::parser