HiveColumnHandle.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;

import java.util.List;
import java.util.Objects;
import java.util.Optional;

import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.hive.BaseHiveColumnHandle.ColumnType.AGGREGATED;
import static com.facebook.presto.hive.BaseHiveColumnHandle.ColumnType.PARTITION_KEY;
import static com.facebook.presto.hive.BaseHiveColumnHandle.ColumnType.SYNTHESIZED;
import static com.facebook.presto.hive.HiveType.HIVE_BINARY;
import static com.facebook.presto.hive.HiveType.HIVE_INT;
import static com.facebook.presto.hive.HiveType.HIVE_LONG;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static com.facebook.presto.spi.plan.AggregationNode.Aggregation;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Iterables.getOnlyElement;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class HiveColumnHandle
        extends BaseHiveColumnHandle
{
    // IDs between -1 and -12 are reserved for hidden columns
    public static final int PATH_COLUMN_INDEX = -11;
    public static final String PATH_COLUMN_NAME = "$path";
    public static final HiveType PATH_HIVE_TYPE = HIVE_STRING;
    public static final TypeSignature PATH_TYPE_SIGNATURE = PATH_HIVE_TYPE.getTypeSignature();

    public static final int BUCKET_COLUMN_INDEX = -12;
    public static final String BUCKET_COLUMN_NAME = "$bucket";
    public static final HiveType BUCKET_HIVE_TYPE = HIVE_INT;
    public static final TypeSignature BUCKET_TYPE_SIGNATURE = BUCKET_HIVE_TYPE.getTypeSignature();

    public static final int ROW_ID_COLUMN_INDEX = -10;
    public static final String ROW_ID_COLUMN_NAME = "$row_id";
    public static final HiveType ROW_ID_TYPE = HIVE_BINARY;
    public static final TypeSignature ROW_ID_TYPE_SIGNATURE = ROW_ID_TYPE.getTypeSignature();

    public static final int FILE_SIZE_COLUMN_INDEX = -9;
    public static final String FILE_SIZE_COLUMN_NAME = "$file_size";
    public static final HiveType FILE_SIZE_TYPE = HIVE_LONG;
    public static final TypeSignature FILE_SIZE_TYPE_SIGNATURE = FILE_SIZE_TYPE.getTypeSignature();

    public static final int FILE_MODIFIED_TIME_COLUMN_INDEX = -8;
    public static final String FILE_MODIFIED_TIME_COLUMN_NAME = "$file_modified_time";
    public static final HiveType FILE_MODIFIED_TIME_TYPE = HIVE_LONG;
    public static final TypeSignature FILE_MODIFIED_TIME_TYPE_SIGNATURE = FILE_MODIFIED_TIME_TYPE.getTypeSignature();

    private static final String UPDATE_ROW_ID_COLUMN_NAME = "$shard_row_id";

    /**
     * Partition key columns have negative indexes that are numbered from -13 to Integer.MIN_VALUE.
     */
    public static final int MAX_PARTITION_KEY_COLUMN_INDEX = -13;

    private final HiveType hiveType;
    private final TypeSignature typeName;
    private final int hiveColumnIndex;
    private final Optional<Aggregation> partialAggregation;

    @JsonCreator
    public HiveColumnHandle(
            @JsonProperty("name") String name,
            @JsonProperty("hiveType") HiveType hiveType,
            @JsonProperty("typeSignature") TypeSignature typeSignature,
            @JsonProperty("hiveColumnIndex") int hiveColumnIndex,
            @JsonProperty("columnType") ColumnType columnType,
            @JsonProperty("comment") Optional<String> comment,
            @JsonProperty("requiredSubfields") List<Subfield> requiredSubfields,
            @JsonProperty("partialAggregation") Optional<Aggregation> partialAggregation)
    {
        super(name, comment, columnType, requiredSubfields);

        checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED || columnType == AGGREGATED, format("hiveColumnIndex:%d is negative, columnType:%s", hiveColumnIndex, columnType.toString()));
        this.hiveColumnIndex = hiveColumnIndex;
        this.hiveType = requireNonNull(hiveType, "hiveType is null");
        this.typeName = requireNonNull(typeSignature, "type is null");
        this.partialAggregation = requireNonNull(partialAggregation, "partialAggregation is null");
        checkArgument(columnType != AGGREGATED || partialAggregation.isPresent(), "Aggregated column does not have aggregate function");
    }

    public HiveColumnHandle(
            String name,
            HiveType hiveType,
            TypeSignature typeSignature,
            int hiveColumnIndex,
            ColumnType columnType,
            Optional<String> comment,
            Optional<Aggregation> partialAggregation)
    {
        this(name, hiveType, typeSignature, hiveColumnIndex, columnType, comment, ImmutableList.of(), partialAggregation);
    }

    @JsonProperty
    public HiveType getHiveType()
    {
        return hiveType;
    }

    @JsonProperty
    public int getHiveColumnIndex()
    {
        return hiveColumnIndex;
    }

    public boolean isPartitionKey()
    {
        return getColumnType() == PARTITION_KEY;
    }

    public boolean isHidden()
    {
        return getColumnType() == SYNTHESIZED;
    }

    public ColumnMetadata getColumnMetadata(TypeManager typeManager)
    {
        return ColumnMetadata.builder()
                .setName(getName())
                .setType(typeManager.getType(typeName))
                .setHidden(isHidden())
                .build();
    }

    public ColumnMetadata getColumnMetadata(TypeManager typeManager, String name)
    {
        return ColumnMetadata.builder()
                .setName(name)
                .setType(typeManager.getType(typeName))
                .setHidden(isHidden())
                .build();
    }

    @JsonProperty
    public Optional<Aggregation> getPartialAggregation()
    {
        return partialAggregation;
    }

    @JsonProperty
    public TypeSignature getTypeSignature()
    {
        return typeName;
    }

    @Override
    public ColumnHandle withRequiredSubfields(List<Subfield> subfields)
    {
        if (isPushedDownSubfield(this)) {
            // This column is already a pushed down subfield column
            return this;
        }

        return new HiveColumnHandle(getName(), hiveType, typeName, hiveColumnIndex, getColumnType(), getComment(), subfields, partialAggregation);
    }

    @Override
    public int hashCode()
    {
        return Objects.hash(getName(), hiveColumnIndex, hiveType, getColumnType(), getComment());
    }

    @Override
    public boolean equals(Object obj)
    {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        HiveColumnHandle other = (HiveColumnHandle) obj;
        return Objects.equals(this.getName(), other.getName()) &&
                Objects.equals(this.hiveColumnIndex, other.hiveColumnIndex) &&
                Objects.equals(this.hiveType, other.hiveType) &&
                Objects.equals(this.getColumnType(), other.getColumnType()) &&
                Objects.equals(this.getComment(), other.getComment()) &&
                Objects.equals(this.getRequiredSubfields(), other.getRequiredSubfields());
    }

    @Override
    public String toString()
    {
        if (getRequiredSubfields().isEmpty()) {
            return getName() + ":" + hiveType + ":" + hiveColumnIndex + ":" + getColumnType();
        }

        return getName() + ":" + hiveType + ":" + hiveColumnIndex + ":" + getColumnType() + ":" + getRequiredSubfields();
    }

    public static HiveColumnHandle updateRowIdHandle()
    {
        // Hive connector only supports metadata delete. It does not support generic row-by-row deletion.
        // Metadata delete is implemented in Presto by generating a plan for row-by-row delete first,
        // and then optimize it into metadata delete. As a result, Hive connector must provide partial
        // plan-time support for row-by-row delete so that planning doesn't fail. This is why we need
        // rowid handle. Note that in Hive connector, rowid handle is not implemented beyond plan-time.

        return new HiveColumnHandle(UPDATE_ROW_ID_COLUMN_NAME, HIVE_LONG, BIGINT.getTypeSignature(), -1, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    public static HiveColumnHandle pathColumnHandle()
    {
        return new HiveColumnHandle(PATH_COLUMN_NAME, PATH_HIVE_TYPE, PATH_TYPE_SIGNATURE, PATH_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    /**
     * The column indicating the bucket id.
     * When table bucketing differs from partition bucketing, this column indicates
     * what bucket the row will fall in under the table bucketing scheme.
     */
    public static HiveColumnHandle bucketColumnHandle()
    {
        return new HiveColumnHandle(BUCKET_COLUMN_NAME, BUCKET_HIVE_TYPE, BUCKET_TYPE_SIGNATURE, BUCKET_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    public static HiveColumnHandle rowIdColumnHandle()
    {
        return new HiveColumnHandle(ROW_ID_COLUMN_NAME, ROW_ID_TYPE, ROW_ID_TYPE_SIGNATURE, ROW_ID_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    public static HiveColumnHandle fileSizeColumnHandle()
    {
        return new HiveColumnHandle(FILE_SIZE_COLUMN_NAME, FILE_SIZE_TYPE, FILE_SIZE_TYPE_SIGNATURE, FILE_SIZE_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    public static HiveColumnHandle fileModifiedTimeColumnHandle()
    {
        return new HiveColumnHandle(FILE_MODIFIED_TIME_COLUMN_NAME, FILE_MODIFIED_TIME_TYPE, FILE_MODIFIED_TIME_TYPE_SIGNATURE, FILE_MODIFIED_TIME_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
    }

    public static boolean isRowIdColumnHandle(HiveColumnHandle column)
    {
        return column.getHiveColumnIndex() == ROW_ID_COLUMN_INDEX;
    }

    public static boolean isPathColumnHandle(HiveColumnHandle column)
    {
        return column.getHiveColumnIndex() == PATH_COLUMN_INDEX;
    }

    public static boolean isBucketColumnHandle(HiveColumnHandle column)
    {
        return column.getHiveColumnIndex() == BUCKET_COLUMN_INDEX;
    }

    /**
     * Return the pushed down subfield if the column represents one
     */
    public static Subfield getPushedDownSubfield(HiveColumnHandle column)
    {
        checkArgument(isPushedDownSubfield(column), format("not a valid pushed down subfield: type=%s, subfields=%s", column.getColumnType(), column.getRequiredSubfields()));
        return getOnlyElement(column.getRequiredSubfields());
    }

    public static boolean isPushedDownSubfield(HiveColumnHandle column)
    {
        return column.getColumnType() == SYNTHESIZED && column.getRequiredSubfields().size() == 1;
    }

    public static boolean isFileSizeColumnHandle(HiveColumnHandle column)
    {
        return column.getHiveColumnIndex() == FILE_SIZE_COLUMN_INDEX;
    }

    public static boolean isFileModifiedTimeColumnHandle(HiveColumnHandle column)
    {
        return column.getHiveColumnIndex() == FILE_MODIFIED_TIME_COLUMN_INDEX;
    }

    public static boolean isInfoColumnHandle(HiveColumnHandle column)
    {
        return isPathColumnHandle(column) || isFileSizeColumnHandle(column)
                || isFileModifiedTimeColumnHandle(column);
    }
}