1
#include "source/common/http/path_utility.h"
2

            
3
#include "source/common/common/logger.h"
4

            
5
#include "absl/strings/str_join.h"
6
#include "absl/strings/str_replace.h"
7
#include "absl/strings/str_split.h"
8
#include "absl/types/optional.h"
9
#include "url/url_canon.h"
10
#include "url/url_canon_stdstring.h"
11

            
12
namespace Envoy {
13
namespace Http {
14

            
15
namespace {
16
1985
absl::optional<std::string> canonicalizePath(absl::string_view original_path) {
17
1985
  std::string canonical_path;
18
1985
  url::Component in_component(0, original_path.size());
19
1985
  url::Component out_component;
20
1985
  url::StdStringCanonOutput output(&canonical_path);
21
1985
  if (!url::CanonicalizePath(original_path.data(), in_component, &output, &out_component)) {
22
15
    return absl::nullopt;
23
15
  }
24
1970
  output.Complete();
25
1970
  return absl::make_optional(std::move(canonical_path));
26
1985
}
27
} // namespace
28

            
29
/* static */
30
1985
bool PathUtil::canonicalPath(RequestHeaderMap& headers) {
31
1985
  ASSERT(headers.Path());
32
1985
  const auto original_path = headers.getPathValue();
33
  // canonicalPath is supposed to apply on path component in URL instead of :path header
34
1985
  const auto query_pos = original_path.find('?');
35
1985
  auto normalized_path_opt = canonicalizePath(
36
1985
      query_pos == original_path.npos
37
1985
          ? original_path
38
1985
          : absl::string_view(original_path.data(), query_pos) // '?' is not included
39
1985
  );
40

            
41
1985
  if (!normalized_path_opt.has_value()) {
42
15
    return false;
43
15
  }
44
1970
  auto& normalized_path = normalized_path_opt.value();
45
1970
  const absl::string_view query_suffix =
46
1970
      query_pos == original_path.npos
47
1970
          ? absl::string_view{}
48
1970
          : absl::string_view{original_path.data() + query_pos, original_path.size() - query_pos};
49
1970
  if (!query_suffix.empty()) {
50
976
    normalized_path.insert(normalized_path.end(), query_suffix.begin(), query_suffix.end());
51
976
  }
52
1970
  headers.setPath(normalized_path);
53
1970
  return true;
54
1985
}
55

            
56
459
void PathUtil::mergeSlashes(RequestHeaderMap& headers) {
57
459
  ASSERT(headers.Path());
58
459
  const auto original_path = headers.getPathValue();
59
  // Only operate on path component in URL.
60
459
  const absl::string_view::size_type query_start = original_path.find('?');
61
459
  const absl::string_view path = original_path.substr(0, query_start);
62
459
  const absl::string_view query = absl::ClippedSubstr(original_path, query_start);
63
459
  if (path.find("//") == absl::string_view::npos) {
64
441
    return;
65
441
  }
66
18
  const absl::string_view path_prefix = absl::StartsWith(path, "/") ? "/" : absl::string_view();
67
18
  const absl::string_view path_suffix = absl::EndsWith(path, "/") ? "/" : absl::string_view();
68
18
  headers.setPath(absl::StrCat(path_prefix,
69
18
                               absl::StrJoin(absl::StrSplit(path, '/', absl::SkipEmpty()), "/"),
70
18
                               path_suffix, query));
71
18
}
72

            
73
63
PathUtil::UnescapeSlashesResult PathUtil::unescapeSlashes(RequestHeaderMap& headers) {
74
63
  ASSERT(headers.Path());
75
63
  const auto original_path = headers.getPathValue();
76
63
  const auto original_length = original_path.length();
77
  // Only operate on path component in URL.
78
63
  const absl::string_view::size_type query_start = original_path.find('?');
79
63
  const absl::string_view path = original_path.substr(0, query_start);
80
63
  if (path.find('%') == absl::string_view::npos) {
81
5
    return UnescapeSlashesResult::NotFound;
82
5
  }
83
58
  const absl::string_view query = absl::ClippedSubstr(original_path, query_start);
84

            
85
58
  static const std::vector<std::pair<absl::string_view, absl::string_view>> replacements{
86
58
      {"%2F", "/"},
87
58
      {"%2f", "/"},
88
58
      {"%5C", "\\"},
89
58
      {"%5c", "\\"},
90
58
  };
91
58
  headers.setPath(absl::StrCat(absl::StrReplaceAll(path, replacements), query));
92

            
93
  // Path length will not match if there were unescaped %2f or %5c
94
58
  return headers.getPathValue().length() != original_length
95
58
             ? UnescapeSlashesResult::FoundAndUnescaped
96
58
             : UnescapeSlashesResult::NotFound;
97
63
}
98

            
99
91223
absl::string_view PathUtil::removeQueryAndFragment(const absl::string_view path) {
100
91223
  absl::string_view ret = path;
101
  // Trim query parameters and/or fragment if present.
102
91223
  size_t offset = ret.find_first_of("?#");
103
91223
  if (offset != absl::string_view::npos) {
104
2006
    ret.remove_suffix(ret.length() - offset);
105
2006
  }
106
91223
  return ret;
107
91223
}
108

            
109
} // namespace Http
110
} // namespace Envoy