Line data Source code
1 : #include "source/common/http/path_utility.h" 2 : 3 : #include "source/common/common/logger.h" 4 : #include "source/common/runtime/runtime_features.h" 5 : 6 : #include "absl/strings/str_join.h" 7 : #include "absl/strings/str_split.h" 8 : #include "absl/types/optional.h" 9 : #include "url/url_canon.h" 10 : #include "url/url_canon_stdstring.h" 11 : 12 : namespace Envoy { 13 : namespace Http { 14 : 15 : namespace { 16 206 : absl::optional<std::string> canonicalizePath(absl::string_view original_path) { 17 206 : std::string canonical_path; 18 206 : url::Component in_component(0, original_path.size()); 19 206 : url::Component out_component; 20 206 : url::StdStringCanonOutput output(&canonical_path); 21 206 : if (!url::CanonicalizePath(original_path.data(), in_component, &output, &out_component)) { 22 0 : return absl::nullopt; 23 0 : } 24 206 : output.Complete(); 25 206 : return absl::make_optional(std::move(canonical_path)); 26 206 : } 27 : 28 : void unescapeInPath(std::string& path, absl::string_view escape_sequence, 29 0 : absl::string_view substitution) { 30 0 : std::vector<absl::string_view> split = absl::StrSplit(path, escape_sequence); 31 0 : if (split.size() == 1) { 32 0 : return; 33 0 : } 34 0 : path = absl::StrJoin(split, substitution); 35 0 : } 36 : 37 : } // namespace 38 : 39 : /* static */ 40 206 : bool PathUtil::canonicalPath(RequestHeaderMap& headers) { 41 206 : ASSERT(headers.Path()); 42 206 : const auto original_path = headers.getPathValue(); 43 : // canonicalPath is supposed to apply on path component in URL instead of :path header 44 206 : const auto query_pos = original_path.find('?'); 45 206 : auto normalized_path_opt = canonicalizePath( 46 206 : query_pos == original_path.npos 47 206 : ? original_path 48 206 : : absl::string_view(original_path.data(), query_pos) // '?' is not included 49 206 : ); 50 : 51 206 : if (!normalized_path_opt.has_value()) { 52 0 : return false; 53 0 : } 54 206 : auto& normalized_path = normalized_path_opt.value(); 55 206 : const absl::string_view query_suffix = 56 206 : query_pos == original_path.npos 57 206 : ? absl::string_view{} 58 206 : : absl::string_view{original_path.data() + query_pos, original_path.size() - query_pos}; 59 206 : if (!query_suffix.empty()) { 60 7 : normalized_path.insert(normalized_path.end(), query_suffix.begin(), query_suffix.end()); 61 7 : } 62 206 : headers.setPath(normalized_path); 63 206 : return true; 64 206 : } 65 : 66 196 : void PathUtil::mergeSlashes(RequestHeaderMap& headers) { 67 196 : ASSERT(headers.Path()); 68 196 : const auto original_path = headers.getPathValue(); 69 : // Only operate on path component in URL. 70 196 : const absl::string_view::size_type query_start = original_path.find('?'); 71 196 : const absl::string_view path = original_path.substr(0, query_start); 72 196 : const absl::string_view query = absl::ClippedSubstr(original_path, query_start); 73 196 : if (path.find("//") == absl::string_view::npos) { 74 176 : return; 75 176 : } 76 20 : const absl::string_view path_prefix = absl::StartsWith(path, "/") ? "/" : absl::string_view(); 77 20 : const absl::string_view path_suffix = absl::EndsWith(path, "/") ? "/" : absl::string_view(); 78 20 : headers.setPath(absl::StrCat(path_prefix, 79 20 : absl::StrJoin(absl::StrSplit(path, '/', absl::SkipEmpty()), "/"), 80 20 : path_suffix, query)); 81 20 : } 82 : 83 0 : PathUtil::UnescapeSlashesResult PathUtil::unescapeSlashes(RequestHeaderMap& headers) { 84 0 : ASSERT(headers.Path()); 85 0 : const auto original_path = headers.getPathValue(); 86 0 : const auto original_length = original_path.length(); 87 : // Only operate on path component in URL. 88 0 : const absl::string_view::size_type query_start = original_path.find('?'); 89 0 : const absl::string_view path = original_path.substr(0, query_start); 90 0 : if (path.find('%') == absl::string_view::npos) { 91 0 : return UnescapeSlashesResult::NotFound; 92 0 : } 93 0 : const absl::string_view query = absl::ClippedSubstr(original_path, query_start); 94 : 95 : // TODO(yanavlasov): optimize this by adding case insensitive matcher 96 0 : std::string decoded_path{path}; 97 0 : unescapeInPath(decoded_path, "%2F", "/"); 98 0 : unescapeInPath(decoded_path, "%2f", "/"); 99 0 : unescapeInPath(decoded_path, "%5C", "\\"); 100 0 : unescapeInPath(decoded_path, "%5c", "\\"); 101 0 : headers.setPath(absl::StrCat(decoded_path, query)); 102 : // Path length will not match if there were unescaped %2f or %5c 103 0 : return headers.getPathValue().length() != original_length 104 0 : ? UnescapeSlashesResult::FoundAndUnescaped 105 0 : : UnescapeSlashesResult::NotFound; 106 0 : } 107 : 108 706 : absl::string_view PathUtil::removeQueryAndFragment(const absl::string_view path) { 109 706 : absl::string_view ret = path; 110 : // Trim query parameters and/or fragment if present. 111 706 : size_t offset = ret.find_first_of("?#"); 112 706 : if (offset != absl::string_view::npos) { 113 144 : ret.remove_suffix(ret.length() - offset); 114 144 : } 115 706 : return ret; 116 706 : } 117 : 118 : } // namespace Http 119 : } // namespace Envoy