UriUtils.java
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.azurebfs.utils;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID;
/**
* Utility class to help with Abfs url transformation to blob urls.
*/
public final class UriUtils {
private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net";
private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX);
private static final Set<String> FULL_MASK_PARAM_KEYS = new HashSet<>(
Collections.singleton(QUERY_PARAM_SIGNATURE));
private static final Set<String> PARTIAL_MASK_PARAM_KEYS = new HashSet<>(
Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID));
private static final Character CHAR_MASK = 'X';
private static final String FULL_MASK = "XXXXX";
private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550;
private static final int PARTIAL_MASK_VISIBLE_LEN = 18;
/**
* Checks whether a string includes abfs url.
* @param string the string to check.
* @return true if string has abfs url.
*/
public static boolean containsAbfsUrl(final String string) {
if (string == null || string.isEmpty()) {
return false;
}
return ABFS_URI_PATTERN.matcher(string).matches();
}
/**
* Extracts the account name from the host name.
* @param hostName the fully-qualified domain name of the storage service
* endpoint (e.g. {account}.dfs.core.windows.net.
* @return the storage service account name.
*/
public static String extractAccountNameFromHostName(final String hostName) {
if (hostName == null || hostName.isEmpty()) {
return null;
}
if (!containsAbfsUrl(hostName)) {
return null;
}
String[] splitByDot = hostName.split("\\.");
if (splitByDot.length == 0) {
return null;
}
return splitByDot[0];
}
/**
* Generate unique test path for multiple user tests.
*
* @return root test path
*/
public static String generateUniqueTestPath() {
String testUniqueForkId = System.getProperty("test.unique.fork.id");
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test";
}
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
Set<String> queryParamsForFullMask,
Set<String> queryParamsForPartialMask) {
return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY);
}
/**
* Generic function to mask a set of query parameters partially/fully and
* return the resultant query string
* @param keyValueList List of NameValuePair instances for query keys/values
* @param queryParamsForFullMask values for these params will appear as "XXXX"
* @param queryParamsForPartialMask values will be masked with 'X', except for
* the last PARTIAL_MASK_VISIBLE_LEN characters
* @param queryLen to initialize StringBuilder for the masked query
* @return the masked url query part
*/
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
Set<String> queryParamsForFullMask,
Set<String> queryParamsForPartialMask, int queryLen) {
StringBuilder maskedUrl = new StringBuilder(queryLen);
for (NameValuePair keyValuePair : keyValueList) {
String key = keyValuePair.getName();
if (key.isEmpty()) {
throw new IllegalArgumentException("Query param key should not be empty");
}
String value = keyValuePair.getValue();
maskedUrl.append(key);
maskedUrl.append(EQUAL);
if (value != null && !value.isEmpty()) { //no mask
if (queryParamsForFullMask.contains(key)) {
maskedUrl.append(FULL_MASK);
} else if (queryParamsForPartialMask.contains(key)) {
int valueLen = value.length();
int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN
? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2;
maskedUrl.append(value, 0, valueLen - maskedLen);
maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen));
} else {
maskedUrl.append(value);
}
}
maskedUrl.append(AND_MARK);
}
maskedUrl.deleteCharAt(maskedUrl.length() - 1);
return maskedUrl.toString();
}
public static String encodedUrlStr(String url) {
try {
return URLEncoder.encode(url, "UTF-8");
} catch (UnsupportedEncodingException e) {
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
}
}
public static String getMaskedUrl(URL url) {
String queryString = url.getQuery();
if (queryString == null) {
return url.toString();
}
List<NameValuePair> queryKeyValueList = URLEncodedUtils
.parse(queryString, StandardCharsets.UTF_8);
String maskedQueryString = maskUrlQueryParameters(queryKeyValueList,
FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length());
return url.toString().replace(queryString, maskedQueryString);
}
/**
* Changes Blob Endpoint URL to DFS Endpoint URL.
* If original url is not Blob Endpoint URL, it will return the original URL.
* @param url to be converted.
* @return updated URL
* @throws InvalidUriException in case of MalformedURLException.
*/
public static URL changeUrlFromBlobToDfs(URL url) throws InvalidUriException {
try {
url = new URL(replacedUrl(url.toString(), ABFS_BLOB_DOMAIN_NAME, ABFS_DFS_DOMAIN_NAME));
} catch (MalformedURLException ex) {
throw new InvalidUriException(url.toString());
}
return url;
}
/**
* Changes DFS Endpoint URL to Blob Endpoint URL.
* If original url is not DFS Endpoint URL, it will return the original URL.
* @param url to be converted.
* @return updated URL
* @throws InvalidUriException in case of MalformedURLException.
*/
public static URL changeUrlFromDfsToBlob(URL url) throws InvalidUriException {
try {
url = new URL(replacedUrl(url.toString(), ABFS_DFS_DOMAIN_NAME, ABFS_BLOB_DOMAIN_NAME));
} catch (MalformedURLException ex) {
throw new InvalidUriException(url.toString());
}
return url;
}
/**
* Replaces the oldString with newString in the baseUrl.
* It will extract the account url path to make sure we do not replace any
* matching string in blob path or any other part of url
* @param baseUrl the url to be updated.
* @param oldString the string to be replaced.
* @param newString the string to be replaced with.
* @return updated URL
*/
private static String replacedUrl(String baseUrl, String oldString, String newString) {
int startIndex = baseUrl.toString().indexOf("//") + 2;
int endIndex = baseUrl.toString().indexOf("/", startIndex);
if (oldString == null || newString == null|| startIndex < 0
|| endIndex > baseUrl.length() || startIndex > endIndex) {
throw new IllegalArgumentException("Invalid input or indices");
}
StringBuilder sb = new StringBuilder(baseUrl);
int targetIndex = sb.indexOf(oldString, startIndex);
if (targetIndex == -1 || targetIndex >= endIndex) {
return baseUrl; // target not found within the specified range
}
sb.replace(targetIndex, targetIndex + oldString.length(), newString);
return sb.toString();
}
private UriUtils() {
}
}