1
#include "source/extensions/path/uri_template_lib/uri_template_internal.h"
2

            
3
#include <optional>
4
#include <string>
5
#include <type_traits>
6
#include <variant>
7
#include <vector>
8

            
9
#include "source/common/common/fmt.h"
10
#include "source/common/runtime/runtime_features.h"
11

            
12
#include "absl/container/flat_hash_set.h"
13
#include "absl/flags/flag.h"
14
#include "absl/functional/function_ref.h"
15
#include "absl/status/status.h"
16
#include "absl/status/statusor.h"
17
#include "absl/strings/match.h"
18
#include "absl/strings/str_cat.h"
19
#include "absl/strings/str_join.h"
20
#include "absl/strings/str_replace.h"
21
#include "absl/strings/str_split.h"
22
#include "absl/strings/string_view.h"
23
#include "absl/types/variant.h"
24
#include "re2/re2.h"
25

            
26
namespace Envoy {
27
namespace Extensions {
28
namespace UriTemplate {
29

            
30
namespace Internal {
31

            
32
namespace {
33

            
34
#ifndef SWIG
35
// Silence warnings about missing initializers for members of LazyRE2.
36
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
37
#endif
38

            
39
constexpr unsigned long kPatternMatchingMaxVariablesPerPath = 5;
40
constexpr unsigned long kPatternMatchingMaxVariableNameLen = 16;
41
constexpr unsigned long kPatternMatchingMinVariableNameLen = 1;
42

            
43
// Valid pchar from https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
44
constexpr absl::string_view kLiteral = "a-zA-Z0-9-._~" // Unreserved
45
                                       "%"             // pct-encoded
46
                                       "!$&'()+,;"     // sub-delims excluding *=
47
                                       ":@"
48
                                       "="; // user included "=" allowed
49

            
50
// Additional literal that allows "*" in the pattern.
51
// This should replace "kLiteral" after removal of
52
// "reloadable_features.uri_template_match_on_asterisk" runtime guard.
53
// Valid pchar from https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
54
constexpr absl::string_view kLiteralWithAsterisk = "a-zA-Z0-9-._~" // Unreserved
55
                                                   "%"             // pct-encoded
56
                                                   "!$&'()+,;"     // sub-delims excluding *=
57
                                                   ":@"
58
                                                   "=*"; // reserved characters
59

            
60
// Default operator used for the variable when none specified.
61
constexpr Operator kDefaultVariableOperator = Operator::PathGlob;
62

            
63
// Visitor for displaying debug info of a ParsedSegment/Variable.var_match.
64
struct ToStringVisitor {
65
  template <typename T> std::string operator()(const T& val) const;
66
};
67

            
68
// Formatter used to allow joining variants together with StrJoin.
69
struct ToStringFormatter {
70
126
  template <typename T> void operator()(std::string* out, const T& t) const {
71
126
    absl::StrAppend(out, absl::visit(ToStringVisitor(), t));
72
126
  }
73
};
74

            
75
// Visitor for converting a ParsedSegment variant to the regex.
76
struct ToRegexPatternVisitor {
77
984
  template <typename T> std::string operator()(const T& val) const { return toRegexPattern(val); }
78
};
79

            
80
// Formatter used to allow joining variants together with StrJoin.
81
struct ToRegexPatternFormatter {
82
984
  template <typename T> void operator()(std::string* out, const T& t) const {
83
984
    absl::StrAppend(out, absl::visit(ToRegexPatternVisitor(), t));
84
984
  }
85
};
86

            
87
41
std::string toString(const Literal val) { return std::string(val); }
88

            
89
52
std::string toString(const Operator val) {
90
52
  switch (val) {
91
37
  case Operator::PathGlob:
92
37
    return "*";
93
15
  case Operator::TextGlob:
94
15
    return "**";
95
52
  }
96
  return "";
97
52
}
98

            
99
41
std::string toString(const Variable val) {
100
41
  if (val.match_.empty()) {
101
16
    return absl::StrCat("{", val.name_, "}");
102
16
  }
103

            
104
25
  return absl::StrCat("{", val.name_, "=", absl::StrJoin(val.match_, "/", ToStringFormatter()),
105
25
                      "}");
106
41
}
107

            
108
126
template <typename T> std::string ToStringVisitor::operator()(const T& val) const {
109
126
  return toString(val);
110
126
}
111

            
112
template <typename T>
113
absl::StatusOr<T>
114
alsoUpdatePattern(absl::FunctionRef<absl::StatusOr<ParsedResult<T>>(absl::string_view)> parse_func,
115
1293
                  absl::string_view* patt) {
116

            
117
1293
  absl::StatusOr<ParsedResult<T>> status = parse_func(*patt);
118
1293
  if (!status.ok()) {
119
44
    return status.status();
120
44
  }
121
1249
  ParsedResult<T> result = *std::move(status);
122

            
123
1249
  *patt = result.unparsed_pattern_;
124
1249
  return result.parsed_value_;
125
1293
}
126

            
127
} // namespace
128

            
129
8
std::string Variable::debugString() const { return toString(*this); }
130

            
131
30
std::string ParsedPathPattern::debugString() const {
132
30
  return absl::StrCat("/", absl::StrJoin(parsed_segments_, "/", ToStringFormatter()), suffix_);
133
30
}
134

            
135
540
bool isValidLiteral(absl::string_view literal) {
136
540
  static const std::string* kValidLiteralRegexAsterisk =
137
540
      new std::string(absl::StrCat("^[", kLiteralWithAsterisk, "]+$"));
138
540
  static const std::string* kValidLiteralRegex =
139
540
      new std::string(absl::StrCat("^[", kLiteral, "]+$"));
140
540
  static const LazyRE2 literal_regex_asterisk = {kValidLiteralRegexAsterisk->data()};
141
540
  static const LazyRE2 literal_regex = {kValidLiteralRegex->data()};
142

            
143
540
  return Runtime::runtimeFeatureEnabled("envoy.reloadable_features.uri_template_match_on_asterisk")
144
540
             ? RE2::FullMatch(literal, *literal_regex_asterisk)
145
540
             : RE2::FullMatch(literal, *literal_regex);
146
540
}
147

            
148
300
bool isValidRewriteLiteral(absl::string_view literal) {
149
300
  static const std::string* kValidLiteralRegex =
150
300
      new std::string(absl::StrCat("^[", kLiteral, "/]+$"));
151
300
  static const std::string* kValidLiteralRegexAsterisk =
152
300
      new std::string(absl::StrCat("^[", kLiteralWithAsterisk, "/]+$"));
153

            
154
300
  static const LazyRE2 literal_regex = {kValidLiteralRegex->data()};
155
300
  static const LazyRE2 literal_regex_asterisk = {kValidLiteralRegexAsterisk->data()};
156

            
157
300
  return Runtime::runtimeFeatureEnabled("envoy.reloadable_features.uri_template_match_on_asterisk")
158
300
             ? RE2::FullMatch(literal, *literal_regex_asterisk)
159
300
             : RE2::FullMatch(literal, *literal_regex);
160
300
}
161

            
162
772
bool isValidVariableName(absl::string_view variable) {
163
772
  static const LazyRE2 variable_regex = {"^[a-zA-Z][a-zA-Z0-9_]*$"};
164
772
  return RE2::FullMatch(variable, *variable_regex);
165
772
}
166

            
167
525
absl::StatusOr<ParsedResult<Literal>> parseLiteral(absl::string_view pattern) {
168
525
  absl::string_view literal =
169
525
      std::vector<absl::string_view>(absl::StrSplit(pattern, absl::MaxSplits('/', 1)))[0];
170
525
  absl::string_view unparsed_pattern = pattern.substr(literal.size());
171
525
  if (!isValidLiteral(literal)) {
172
35
    return absl::InvalidArgumentError(fmt::format("Invalid literal: \"{}\"", literal));
173
35
  }
174
490
  return ParsedResult<Literal>(literal, unparsed_pattern);
175
525
}
176

            
177
298
absl::StatusOr<ParsedResult<Operator>> parseOperator(absl::string_view pattern) {
178
298
  if (absl::StartsWith(pattern, "**")) {
179
116
    return ParsedResult<Operator>(Operator::TextGlob, pattern.substr(2));
180
116
  }
181
182
  if (absl::StartsWith(pattern, "*")) {
182
181
    return ParsedResult<Operator>(Operator::PathGlob, pattern.substr(1));
183
181
  }
184
1
  return absl::InvalidArgumentError(fmt::format("Invalid Operator: \"{}\"", pattern));
185
182
}
186

            
187
499
absl::StatusOr<ParsedResult<Variable>> parseVariable(absl::string_view pattern) {
188
  // Locate the variable pattern to parse.
189
499
  if (pattern.size() < 2 || (pattern)[0] != '{') {
190
1
    return absl::InvalidArgumentError(fmt::format("Invalid variable: \"{}\"", pattern));
191
1
  }
192
498
  std::vector<absl::string_view> parts = absl::StrSplit(pattern.substr(1), absl::MaxSplits('}', 1));
193
498
  if (parts.size() != 2) {
194
3
    return absl::InvalidArgumentError(fmt::format("Unmatched variable bracket in \"{}\"", pattern));
195
3
  }
196
495
  absl::string_view unparsed_pattern = parts[1];
197

            
198
  // Parse the actual variable pattern, starting with the variable name.
199
495
  std::vector<absl::string_view> variable_parts = absl::StrSplit(parts[0], absl::MaxSplits('=', 1));
200
495
  if (!isValidVariableName(variable_parts[0])) {
201
9
    return absl::InvalidArgumentError(
202
9
        fmt::format("Invalid variable name: \"{}\"", variable_parts[0]));
203
9
  }
204
486
  Variable var = Variable(variable_parts[0], {});
205

            
206
  // Parse the variable match pattern (if any).
207
486
  if (variable_parts.size() < 2) {
208
291
    return ParsedResult<Variable>(var, unparsed_pattern);
209
291
  }
210
195
  absl::string_view pattern_item = variable_parts[1];
211
195
  if (pattern_item.empty()) {
212
1
    return absl::InvalidArgumentError("Empty variable match");
213
1
  }
214
416
  while (!pattern_item.empty()) {
215
229
    absl::variant<Operator, Literal> match;
216
229
    if (pattern_item[0] == '*') {
217

            
218
196
      absl::StatusOr<Operator> status = alsoUpdatePattern<Operator>(parseOperator, &pattern_item);
219
196
      if (!status.ok()) {
220
        return status.status();
221
      }
222
196
      match = *std::move(status);
223

            
224
196
    } else {
225
33
      absl::StatusOr<Literal> status = alsoUpdatePattern<Literal>(parseLiteral, &pattern_item);
226
33
      if (!status.ok()) {
227
4
        return status.status();
228
4
      }
229
29
      match = *std::move(status);
230
29
    }
231
225
    var.match_.push_back(match);
232
225
    if (!pattern_item.empty()) {
233
38
      if (pattern_item[0] != '/' || pattern_item.size() == 1) {
234
3
        return absl::InvalidArgumentError(
235
3
            fmt::format("Invalid variable match: \"{}\"", pattern_item));
236
3
      }
237
35
      pattern_item = pattern_item.substr(1);
238
35
    }
239
225
  }
240

            
241
187
  return ParsedResult<Variable>(var, unparsed_pattern);
242
194
}
243

            
244
absl::StatusOr<absl::flat_hash_set<absl::string_view>>
245
281
gatherCaptureNames(const struct ParsedPathPattern& pattern) {
246
281
  absl::flat_hash_set<absl::string_view> captured_variables;
247

            
248
950
  for (const ParsedSegment& segment : pattern.parsed_segments_) {
249
950
    if (!absl::holds_alternative<Variable>(segment)) {
250
494
      continue;
251
494
    }
252
456
    if (captured_variables.size() >= kPatternMatchingMaxVariablesPerPath) {
253
2
      return absl::InvalidArgumentError(
254
2
          fmt::format("Exceeded variable count limit ({})", kPatternMatchingMaxVariablesPerPath));
255
2
    }
256
454
    absl::string_view name = absl::get<Variable>(segment).name_;
257

            
258
454
    if (name.size() < kPatternMatchingMinVariableNameLen ||
259
454
        name.size() > kPatternMatchingMaxVariableNameLen) {
260
2
      return absl::InvalidArgumentError(fmt::format(
261
2
          "Invalid variable name length (length of \"{}\" should be at least {} and no more than "
262
2
          "{})",
263
2
          name, kPatternMatchingMinVariableNameLen, kPatternMatchingMaxVariableNameLen));
264
2
    }
265
452
    if (captured_variables.contains(name)) {
266
1
      return absl::InvalidArgumentError(fmt::format("Repeated variable name: \"{}\"", name));
267
1
    }
268
451
    captured_variables.emplace(name);
269
451
  }
270

            
271
276
  return captured_variables;
272
281
}
273

            
274
276
absl::Status validateNoOperatorAfterTextGlob(const struct ParsedPathPattern& pattern) {
275
276
  bool seen_text_glob = false;
276
930
  for (const ParsedSegment& segment : pattern.parsed_segments_) {
277
930
    if (absl::holds_alternative<Operator>(segment)) {
278
94
      if (seen_text_glob) {
279
5
        return absl::InvalidArgumentError("Glob after text glob.");
280
5
      }
281
89
      seen_text_glob = (absl::get<Operator>(segment) == Operator::TextGlob);
282
836
    } else if (absl::holds_alternative<Variable>(segment)) {
283
440
      const Variable& var = absl::get<Variable>(segment);
284
440
      if (var.match_.empty()) {
285
269
        if (seen_text_glob) {
286
          // A variable with no explicit matcher is treated as a path glob.
287
2
          return absl::InvalidArgumentError("Implicit variable path glob after text glob.");
288
2
        }
289
275
      } else {
290
196
        for (const absl::variant<Operator, absl::string_view>& var_seg : var.match_) {
291
196
          if (!absl::holds_alternative<Operator>(var_seg)) {
292
20
            continue;
293
20
          }
294
176
          if (seen_text_glob) {
295
5
            return absl::InvalidArgumentError("Glob after text glob.");
296
5
          }
297
171
          seen_text_glob = (absl::get<Operator>(var_seg) == Operator::TextGlob);
298
171
        }
299
171
      }
300
440
    }
301
930
  }
302
264
  return absl::OkStatus();
303
276
}
304

            
305
325
absl::StatusOr<ParsedPathPattern> parsePathPatternSyntax(absl::string_view path) {
306
325
  struct ParsedPathPattern parsed_pattern;
307

            
308
325
  static const LazyRE2 printable_regex = {"^/[[:graph:]]*$"};
309
325
  if (!RE2::FullMatch(path, *printable_regex)) {
310
3
    return absl::InvalidArgumentError(fmt::format("Invalid pattern: \"{}\"", path));
311
3
  }
312

            
313
  // Parse the leading '/'
314
322
  path = path.substr(1);
315

            
316
  // Do the initial lexical parsing.
317
1272
  while (!path.empty()) {
318
1026
    ParsedSegment segment;
319
1026
    if (path[0] == '*') {
320
99
      absl::StatusOr<Operator> status = alsoUpdatePattern<Operator>(parseOperator, &path);
321
99
      if (!status.ok()) {
322
        return status.status();
323
      }
324
99
      segment = *std::move(status);
325
927
    } else if (path[0] == '{') {
326
474
      absl::StatusOr<Variable> status = alsoUpdatePattern<Variable>(parseVariable, &path);
327
474
      if (!status.ok()) {
328
9
        return status.status();
329
9
      }
330
465
      segment = *std::move(status);
331
549
    } else {
332
453
      absl::StatusOr<Literal> status = alsoUpdatePattern<Literal>(parseLiteral, &path);
333
453
      if (!status.ok()) {
334
22
        return status.status();
335
22
      }
336
431
      segment = *std::move(status);
337
431
    }
338
995
    parsed_pattern.parsed_segments_.push_back(segment);
339

            
340
    // Deal with trailing '/' or suffix.
341
995
    if (!path.empty()) {
342
750
      if (path == "/") {
343
        // Single trailing '/' at the end, mark this with empty literal.
344
7
        parsed_pattern.parsed_segments_.emplace_back("");
345
7
        break;
346
743
      } else if (path[0] == '/') {
347
        // Have '/' followed by more text, parse the '/'.
348
705
        path = path.substr(1);
349
705
      } else {
350
        // Not followed by '/', treat as suffix.
351
38
        absl::StatusOr<Literal> status = alsoUpdatePattern<Literal>(parseLiteral, &path);
352
38
        if (!status.ok()) {
353
9
          return status.status();
354
9
        }
355
29
        parsed_pattern.suffix_ = *std::move(status);
356
29
        if (!path.empty()) {
357
          // Suffix didn't parse whole remaining pattern ('/' in path).
358
1
          return absl::InvalidArgumentError("Prefix match not supported.");
359
1
        }
360
28
        break;
361
29
      }
362
750
    }
363
995
  }
364
281
  absl::StatusOr<absl::flat_hash_set<absl::string_view>> status =
365
281
      gatherCaptureNames(parsed_pattern);
366
281
  if (!status.ok()) {
367
5
    return status.status();
368
5
  }
369
276
  parsed_pattern.captured_variables_ = *std::move(status);
370

            
371
276
  absl::Status validate_status = validateNoOperatorAfterTextGlob(parsed_pattern);
372
276
  if (!validate_status.ok()) {
373
12
    return validate_status;
374
12
  }
375

            
376
264
  return parsed_pattern;
377
276
}
378

            
379
684
std::string toRegexPattern(absl::string_view pattern) {
380
684
  return absl::StrReplaceAll(
381
684
      pattern, {{"$", "\\$"}, {"(", "\\("}, {")", "\\)"}, {"+", "\\+"}, {".", "\\."}});
382
684
}
383

            
384
483
std::string toRegexPattern(Operator pattern) {
385
483
  static const std::string* kPathGlobRegex = new std::string(absl::StrCat("[", kLiteral, "]+"));
386
483
  static const std::string* kTextGlobRegex = new std::string(absl::StrCat("[", kLiteral, "/]*"));
387

            
388
483
  static const std::string* kPathGlobRegexAsterisk =
389
483
      new std::string(absl::StrCat("[", kLiteralWithAsterisk, "]+"));
390
483
  static const std::string* kTextGlobRegexAsterisk =
391
483
      new std::string(absl::StrCat("[", kLiteralWithAsterisk, "/]*"));
392

            
393
483
  if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.uri_template_match_on_asterisk")) {
394
481
    switch (pattern) {
395
387
    case Operator::PathGlob: // "*"
396
387
      return *kPathGlobRegexAsterisk;
397
94
    case Operator::TextGlob: // "**"
398
94
      return *kTextGlobRegexAsterisk;
399
481
    }
400
481
  } else {
401
2
    switch (pattern) {
402
1
    case Operator::PathGlob: // "*"
403
1
      return *kPathGlobRegex;
404
1
    case Operator::TextGlob: // "**"
405
1
      return *kTextGlobRegex;
406
2
    }
407
2
  }
408
  return "";
409
483
}
410

            
411
403
std::string toRegexPattern(const Variable& pattern) {
412
403
  return absl::StrCat("(?P<", pattern.name_, ">",
413
403
                      pattern.match_.empty()
414
403
                          ? toRegexPattern(kDefaultVariableOperator)
415
403
                          : absl::StrJoin(pattern.match_, "/", ToRegexPatternFormatter()),
416
403
                      ")");
417
403
}
418

            
419
236
std::string toRegexPattern(const struct ParsedPathPattern& pattern) {
420
236
  return absl::StrCat("/", absl::StrJoin(pattern.parsed_segments_, "/", ToRegexPatternFormatter()),
421
236
                      toRegexPattern(pattern.suffix_));
422
236
}
423

            
424
} // namespace Internal
425
} // namespace UriTemplate
426
} // namespace Extensions
427
} // namespace Envoy