RegisterTableProcedure.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg.procedure;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.iceberg.IcebergAbstractMetadata;
import com.facebook.presto.iceberg.IcebergMetadataFactory;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.PrestoWarning;
import com.facebook.presto.spi.SchemaNotFoundException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.classloader.ThreadContextClassLoader;
import com.facebook.presto.spi.procedure.Procedure;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import javax.inject.Inject;
import javax.inject.Provider;
import java.io.IOException;
import java.lang.invoke.MethodHandle;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.facebook.presto.common.block.MethodHandleUtil.methodHandle;
import static com.facebook.presto.common.type.StandardTypes.VARCHAR;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA;
import static com.facebook.presto.spi.StandardWarningCode.MULTIPLE_TABLE_METADATA;
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.iceberg.util.LocationUtil.stripTrailingSlash;
public class RegisterTableProcedure
implements Provider<Procedure>
{
private static final MethodHandle REGISTER_TABLE = methodHandle(
RegisterTableProcedure.class,
"registerTable",
ConnectorSession.class,
String.class,
String.class,
String.class,
String.class);
private final IcebergMetadataFactory metadataFactory;
private final HdfsEnvironment hdfsEnvironment;
public static final String METADATA_FOLDER_NAME = "metadata";
private static final String METADATA_FILE_EXTENSION = ".metadata.json";
private static final Pattern METADATA_VERSION_PATTERN = Pattern.compile("(?<version>\\d+)-(?<uuid>[-a-fA-F0-9]*)(?<compression>\\.[a-zA-Z0-9]+)?" + Pattern.quote(METADATA_FILE_EXTENSION) + "(?<compression2>\\.[a-zA-Z0-9]+)?");
private static final Pattern HADOOP_METADATA_VERSION_PATTERN = Pattern.compile("v(?<version>\\d+)(?<compression>\\.[a-zA-Z0-9]+)?" + Pattern.quote(METADATA_FILE_EXTENSION) + "(?<compression2>\\.[a-zA-Z0-9]+)?");
@Inject
public RegisterTableProcedure(
IcebergMetadataFactory metadataFactory,
HdfsEnvironment hdfsEnvironment)
{
this.metadataFactory = requireNonNull(metadataFactory, "metadataFactory is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
}
@Override
public Procedure get()
{
return new Procedure(
"system",
"register_table",
ImmutableList.of(
new Procedure.Argument("schema", VARCHAR),
new Procedure.Argument("table_name", VARCHAR),
new Procedure.Argument("metadata_location", VARCHAR),
new Procedure.Argument("metadata_file", VARCHAR, false, null)),
REGISTER_TABLE.bindTo(this));
}
public void registerTable(ConnectorSession clientSession, String schema, String table, String metadataLocation, String metadataFile)
{
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) {
doRegisterTable(clientSession, schema, table, metadataLocation, Optional.ofNullable(metadataFile));
}
}
private void doRegisterTable(ConnectorSession clientSession, String schema, String table, String metadataLocation, Optional<String> metadataFile)
{
IcebergAbstractMetadata metadata = (IcebergAbstractMetadata) metadataFactory.create();
SchemaTableName schemaTableName = new SchemaTableName(schema, table);
if (!metadata.schemaExists(clientSession, schemaTableName.getSchemaName())) {
throw new SchemaNotFoundException(schemaTableName.getSchemaName());
}
metadataLocation = stripTrailingSlash(metadataLocation);
Path metadataDirectory = metadataLocation.endsWith(METADATA_FOLDER_NAME) ?
new Path(metadataLocation) : new Path(metadataLocation, METADATA_FOLDER_NAME);
Path metadataPath = metadataFile
.map(metadataFileName -> new Path(metadataDirectory, metadataFileName))
.orElseGet(() -> resolveLatestMetadataLocation(
clientSession,
getFileSystem(clientSession, hdfsEnvironment, schemaTableName, metadataDirectory),
metadataDirectory));
metadata.registerTable(clientSession, schemaTableName, metadataPath);
}
public static FileSystem getFileSystem(ConnectorSession clientSession, HdfsEnvironment hdfsEnvironment, SchemaTableName schemaTableName, Path location)
{
HdfsContext hdfsContext = new HdfsContext(
clientSession,
schemaTableName.getSchemaName(),
schemaTableName.getTableName(),
location.getName(),
true);
try {
return hdfsEnvironment.getFileSystem(hdfsContext, location);
}
catch (Exception e) {
throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, format("Error getting file system at path %s", location), e);
}
}
public static Path resolveLatestMetadataLocation(ConnectorSession clientSession, FileSystem fileSystem, Path metadataPath)
{
int maxVersion = -1;
long lastModifiedTime = -1;
Path metadataFile = null;
boolean duplicateVersions = false;
try {
FileStatus[] files = fileSystem.listStatus(metadataPath, name -> name.getName().contains(METADATA_FILE_EXTENSION));
for (FileStatus file : files) {
int version = parseMetadataVersionFromFileName(file.getPath().getName());
if (version > maxVersion) {
maxVersion = version;
metadataFile = file.getPath();
lastModifiedTime = file.getModificationTime();
duplicateVersions = false;
}
else if (version == maxVersion) {
duplicateVersions = true;
long modifiedTime = file.getModificationTime();
if (modifiedTime > lastModifiedTime) {
lastModifiedTime = modifiedTime;
metadataFile = file.getPath();
}
}
}
}
catch (IOException io) {
throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, format("Unable to find metadata at location %s", metadataPath), io);
}
if (duplicateVersions) {
clientSession.getWarningCollector().add(new PrestoWarning(MULTIPLE_TABLE_METADATA, format("Multiple metadata files of most recent version %d found at location %s. Using most recently modified version", maxVersion, metadataPath)));
}
if (metadataFile == null) {
throw new PrestoException(ICEBERG_INVALID_METADATA, format("No metadata found at location %s", metadataPath));
}
return metadataFile;
}
static int parseMetadataVersionFromFileName(String fileName)
{
Matcher matcher = METADATA_VERSION_PATTERN.matcher(fileName);
if (matcher.matches()) {
return parseInt(matcher.group("version"));
}
matcher = HADOOP_METADATA_VERSION_PATTERN.matcher(fileName);
if (matcher.matches()) {
return parseInt(matcher.group("version"));
}
return -1;
}
}