/proc/self/cwd/source/common/http/path_utility.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/common/http/path_utility.h" |
2 | | |
3 | | #include "source/common/common/logger.h" |
4 | | #include "source/common/runtime/runtime_features.h" |
5 | | |
6 | | #include "absl/strings/str_join.h" |
7 | | #include "absl/strings/str_split.h" |
8 | | #include "absl/types/optional.h" |
9 | | #include "url/url_canon.h" |
10 | | #include "url/url_canon_stdstring.h" |
11 | | |
12 | | namespace Envoy { |
13 | | namespace Http { |
14 | | |
15 | | namespace { |
16 | 2.81k | absl::optional<std::string> canonicalizePath(absl::string_view original_path) { |
17 | 2.81k | std::string canonical_path; |
18 | 2.81k | url::Component in_component(0, original_path.size()); |
19 | 2.81k | url::Component out_component; |
20 | 2.81k | url::StdStringCanonOutput output(&canonical_path); |
21 | 2.81k | if (!url::CanonicalizePath(original_path.data(), in_component, &output, &out_component)) { |
22 | 1 | return absl::nullopt; |
23 | 1 | } |
24 | 2.81k | output.Complete(); |
25 | 2.81k | return absl::make_optional(std::move(canonical_path)); |
26 | 2.81k | } |
27 | | |
28 | | void unescapeInPath(std::string& path, absl::string_view escape_sequence, |
29 | 0 | absl::string_view substitution) { |
30 | 0 | std::vector<absl::string_view> split = absl::StrSplit(path, escape_sequence); |
31 | 0 | if (split.size() == 1) { |
32 | 0 | return; |
33 | 0 | } |
34 | 0 | path = absl::StrJoin(split, substitution); |
35 | 0 | } |
36 | | |
37 | | } // namespace |
38 | | |
39 | | /* static */ |
40 | 2.81k | bool PathUtil::canonicalPath(RequestHeaderMap& headers) { |
41 | 2.81k | ASSERT(headers.Path()); |
42 | 2.81k | const auto original_path = headers.getPathValue(); |
43 | | // canonicalPath is supposed to apply on path component in URL instead of :path header |
44 | 2.81k | const auto query_pos = original_path.find('?'); |
45 | 2.81k | auto normalized_path_opt = canonicalizePath( |
46 | 2.81k | query_pos == original_path.npos |
47 | 2.81k | ? original_path |
48 | 2.81k | : absl::string_view(original_path.data(), query_pos) // '?' is not included |
49 | 2.81k | ); |
50 | | |
51 | 2.81k | if (!normalized_path_opt.has_value()) { |
52 | 1 | return false; |
53 | 1 | } |
54 | 2.81k | auto& normalized_path = normalized_path_opt.value(); |
55 | 2.81k | const absl::string_view query_suffix = |
56 | 2.81k | query_pos == original_path.npos |
57 | 2.81k | ? absl::string_view{} |
58 | 2.81k | : absl::string_view{original_path.data() + query_pos, original_path.size() - query_pos}; |
59 | 2.81k | if (!query_suffix.empty()) { |
60 | 89 | normalized_path.insert(normalized_path.end(), query_suffix.begin(), query_suffix.end()); |
61 | 89 | } |
62 | 2.81k | headers.setPath(normalized_path); |
63 | 2.81k | return true; |
64 | 2.81k | } |
65 | | |
66 | 3.33k | void PathUtil::mergeSlashes(RequestHeaderMap& headers) { |
67 | 3.33k | ASSERT(headers.Path()); |
68 | 3.33k | const auto original_path = headers.getPathValue(); |
69 | | // Only operate on path component in URL. |
70 | 3.33k | const absl::string_view::size_type query_start = original_path.find('?'); |
71 | 3.33k | const absl::string_view path = original_path.substr(0, query_start); |
72 | 3.33k | const absl::string_view query = absl::ClippedSubstr(original_path, query_start); |
73 | 3.33k | if (path.find("//") == absl::string_view::npos) { |
74 | 3.11k | return; |
75 | 3.11k | } |
76 | 228 | const absl::string_view path_prefix = absl::StartsWith(path, "/") ? "/" : absl::string_view(); |
77 | 228 | const absl::string_view path_suffix = absl::EndsWith(path, "/") ? "/" : absl::string_view(); |
78 | 228 | headers.setPath(absl::StrCat(path_prefix, |
79 | 228 | absl::StrJoin(absl::StrSplit(path, '/', absl::SkipEmpty()), "/"), |
80 | 228 | path_suffix, query)); |
81 | 228 | } |
82 | | |
83 | 0 | PathUtil::UnescapeSlashesResult PathUtil::unescapeSlashes(RequestHeaderMap& headers) { |
84 | 0 | ASSERT(headers.Path()); |
85 | 0 | const auto original_path = headers.getPathValue(); |
86 | 0 | const auto original_length = original_path.length(); |
87 | | // Only operate on path component in URL. |
88 | 0 | const absl::string_view::size_type query_start = original_path.find('?'); |
89 | 0 | const absl::string_view path = original_path.substr(0, query_start); |
90 | 0 | if (path.find('%') == absl::string_view::npos) { |
91 | 0 | return UnescapeSlashesResult::NotFound; |
92 | 0 | } |
93 | 0 | const absl::string_view query = absl::ClippedSubstr(original_path, query_start); |
94 | | |
95 | | // TODO(yanavlasov): optimize this by adding case insensitive matcher |
96 | 0 | std::string decoded_path{path}; |
97 | 0 | unescapeInPath(decoded_path, "%2F", "/"); |
98 | 0 | unescapeInPath(decoded_path, "%2f", "/"); |
99 | 0 | unescapeInPath(decoded_path, "%5C", "\\"); |
100 | 0 | unescapeInPath(decoded_path, "%5c", "\\"); |
101 | 0 | headers.setPath(absl::StrCat(decoded_path, query)); |
102 | | // Path length will not match if there were unescaped %2f or %5c |
103 | 0 | return headers.getPathValue().length() != original_length |
104 | 0 | ? UnescapeSlashesResult::FoundAndUnescaped |
105 | 0 | : UnescapeSlashesResult::NotFound; |
106 | 0 | } |
107 | | |
108 | 32.8k | absl::string_view PathUtil::removeQueryAndFragment(const absl::string_view path) { |
109 | 32.8k | absl::string_view ret = path; |
110 | | // Trim query parameters and/or fragment if present. |
111 | 32.8k | size_t offset = ret.find_first_of("?#"); |
112 | 32.8k | if (offset != absl::string_view::npos) { |
113 | 4.65k | ret.remove_suffix(ret.length() - offset); |
114 | 4.65k | } |
115 | 32.8k | return ret; |
116 | 32.8k | } |
117 | | |
118 | | } // namespace Http |
119 | | } // namespace Envoy |