HiveCoercer.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.block.ArrayBlock;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.block.ColumnarArray;
import com.facebook.presto.common.block.ColumnarMap;
import com.facebook.presto.common.block.ColumnarRow;
import com.facebook.presto.common.block.DictionaryBlock;
import com.facebook.presto.common.block.RowBlock;
import com.facebook.presto.common.predicate.TupleDomainFilter;
import com.facebook.presto.common.type.MapType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.spi.PrestoException;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;

import java.util.List;
import java.util.Optional;
import java.util.function.Function;

import static com.facebook.presto.common.block.ColumnarArray.toColumnarArray;
import static com.facebook.presto.common.block.ColumnarMap.toColumnarMap;
import static com.facebook.presto.common.block.ColumnarRow.toColumnarRow;
import static com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL;
import static com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.RealType.REAL;
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.hive.HiveType.HIVE_BYTE;
import static com.facebook.presto.hive.HiveType.HIVE_DOUBLE;
import static com.facebook.presto.hive.HiveType.HIVE_FLOAT;
import static com.facebook.presto.hive.HiveType.HIVE_INT;
import static com.facebook.presto.hive.HiveType.HIVE_LONG;
import static com.facebook.presto.hive.HiveType.HIVE_SHORT;
import static com.facebook.presto.hive.HiveUtil.extractStructFieldNames;
import static com.facebook.presto.hive.HiveUtil.extractStructFieldTypes;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isArrayType;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isMapType;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isRowType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.slice.Slices.utf8Slice;
import static java.lang.Float.intBitsToFloat;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public interface HiveCoercer
        extends Function<Block, Block>
{
    TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield);

    Type getToType();

    static HiveCoercer createCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
    {
        Type fromType = typeManager.getType(fromHiveType.getTypeSignature());
        Type toType = typeManager.getType(toHiveType.getTypeSignature());
        if (toType instanceof VarcharType && (fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG))) {
            return new IntegerNumberToVarcharCoercer(fromType, toType);
        }
        else if (fromType instanceof VarcharType && (toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) {
            return new VarcharToIntegerNumberCoercer(fromType, toType);
        }
        else if (fromHiveType.equals(HIVE_BYTE) && (toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) {
            return new IntegerNumberUpscaleCoercer(fromType, toType);
        }
        else if (fromHiveType.equals(HIVE_SHORT) && (toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) {
            return new IntegerNumberUpscaleCoercer(fromType, toType);
        }
        else if (fromHiveType.equals(HIVE_INT) && toHiveType.equals(HIVE_LONG)) {
            return new IntegerNumberUpscaleCoercer(fromType, toType);
        }
        else if (fromHiveType.equals(HIVE_FLOAT) && toHiveType.equals(HIVE_DOUBLE)) {
            return new FloatToDoubleCoercer();
        }
        else if (isArrayType(fromType) && isArrayType(toType)) {
            return new ListCoercer(typeManager, fromHiveType, toHiveType);
        }
        else if (isMapType(fromType) && isMapType(toType)) {
            return new MapCoercer(typeManager, fromHiveType, toHiveType);
        }
        else if (isRowType(fromType) && isRowType(toType)) {
            return new StructCoercer(typeManager, fromHiveType, toHiveType);
        }

        throw new PrestoException(NOT_SUPPORTED, format("Unsupported coercion from %s to %s", fromHiveType, toHiveType));
    }

    class IntegerNumberUpscaleCoercer
            implements HiveCoercer
    {
        private final Type fromType;
        private final Type toType;

        public IntegerNumberUpscaleCoercer(Type fromType, Type toType)
        {
            this.fromType = requireNonNull(fromType, "fromType is null");
            this.toType = requireNonNull(toType, "toType is null");
        }

        @Override
        public Block apply(Block block)
        {
            BlockBuilder blockBuilder = toType.createBlockBuilder(null, block.getPositionCount());
            for (int i = 0; i < block.getPositionCount(); i++) {
                if (block.isNull(i)) {
                    blockBuilder.appendNull();
                    continue;
                }
                toType.writeLong(blockBuilder, fromType.getLong(block, i));
            }
            return blockBuilder.build();
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            checkArgument(subfield.getPath().isEmpty(), "Subfields on primitive types are not allowed");
            return filter;
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }

    class IntegerNumberToVarcharCoercer
            implements HiveCoercer
    {
        private final Type fromType;
        private final Type toType;

        public IntegerNumberToVarcharCoercer(Type fromType, Type toType)
        {
            this.fromType = requireNonNull(fromType, "fromType is null");
            this.toType = requireNonNull(toType, "toType is null");
        }

        @Override
        public Block apply(Block block)
        {
            BlockBuilder blockBuilder = toType.createBlockBuilder(null, block.getPositionCount());
            for (int i = 0; i < block.getPositionCount(); i++) {
                if (block.isNull(i)) {
                    blockBuilder.appendNull();
                    continue;
                }
                toType.writeSlice(blockBuilder, utf8Slice(String.valueOf(fromType.getLong(block, i))));
            }
            return blockBuilder.build();
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            checkArgument(subfield.getPath().isEmpty(), "Subfields on primitive types are not allowed");
            return new CoercingFilter(filter);
        }

        private static final class CoercingFilter
                extends TupleDomainFilter.AbstractTupleDomainFilter
        {
            private final TupleDomainFilter delegate;

            public CoercingFilter(TupleDomainFilter delegate)
            {
                super(delegate.isDeterministic(), !delegate.isDeterministic() || delegate.testNull());
                this.delegate = requireNonNull(delegate, "delegate is null");
            }

            @Override
            public boolean testNull()
            {
                return delegate.testNull();
            }

            @Override
            public boolean testLong(long value)
            {
                byte[] bytes = String.valueOf(value).getBytes();
                return delegate.testBytes(bytes, 0, bytes.length);
            }
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }

    class VarcharToIntegerNumberCoercer
            implements HiveCoercer
    {
        private final Type fromType;
        private final Type toType;

        private final long minValue;
        private final long maxValue;

        public VarcharToIntegerNumberCoercer(Type fromType, Type toType)
        {
            this.fromType = requireNonNull(fromType, "fromType is null");
            this.toType = requireNonNull(toType, "toType is null");

            if (toType.equals(TINYINT)) {
                minValue = Byte.MIN_VALUE;
                maxValue = Byte.MAX_VALUE;
            }
            else if (toType.equals(SMALLINT)) {
                minValue = Short.MIN_VALUE;
                maxValue = Short.MAX_VALUE;
            }
            else if (toType.equals(INTEGER)) {
                minValue = Integer.MIN_VALUE;
                maxValue = Integer.MAX_VALUE;
            }
            else if (toType.equals(BIGINT)) {
                minValue = Long.MIN_VALUE;
                maxValue = Long.MAX_VALUE;
            }
            else {
                throw new PrestoException(NOT_SUPPORTED, format("Could not create Coercer from varchar to %s", toType));
            }
        }

        @Override
        public Block apply(Block block)
        {
            BlockBuilder blockBuilder = toType.createBlockBuilder(null, block.getPositionCount());
            for (int i = 0; i < block.getPositionCount(); i++) {
                if (block.isNull(i)) {
                    blockBuilder.appendNull();
                    continue;
                }
                try {
                    long value = Long.parseLong(fromType.getSlice(block, i).toStringUtf8());
                    if (minValue <= value && value <= maxValue) {
                        toType.writeLong(blockBuilder, value);
                    }
                    else {
                        blockBuilder.appendNull();
                    }
                }
                catch (NumberFormatException e) {
                    blockBuilder.appendNull();
                }
            }
            return blockBuilder.build();
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            checkArgument(subfield.getPath().isEmpty(), "Subfields on primitive types are not allowed");
            return new CoercingFilter(filter, minValue, maxValue);
        }

        private static final class CoercingFilter
                extends TupleDomainFilter.AbstractTupleDomainFilter
        {
            private final TupleDomainFilter delegate;
            private final long minValue;
            private final long maxValue;

            public CoercingFilter(TupleDomainFilter delegate, long minValue, long maxValue)
            {
                super(delegate.isDeterministic(), !delegate.isDeterministic() || delegate.testNull());
                this.delegate = requireNonNull(delegate, "delegate is null");
                this.minValue = minValue;
                this.maxValue = maxValue;
            }

            @Override
            public boolean testNull()
            {
                return delegate.testNull();
            }

            @Override
            public boolean testLength(int length)
            {
                return true;
            }

            @Override
            public boolean testBytes(byte[] buffer, int offset, int length)
            {
                long value;
                try {
                    value = Long.valueOf(new String(buffer, offset, length));
                }
                catch (NumberFormatException e) {
                    return delegate.testNull();
                }

                if (minValue <= value && value <= maxValue) {
                    return delegate.testLong(value);
                }
                else {
                    return delegate.testNull();
                }
            }
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }

    class FloatToDoubleCoercer
            implements HiveCoercer
    {
        @Override
        public Block apply(Block block)
        {
            BlockBuilder blockBuilder = DOUBLE.createBlockBuilder(null, block.getPositionCount());
            for (int i = 0; i < block.getPositionCount(); i++) {
                if (block.isNull(i)) {
                    blockBuilder.appendNull();
                    continue;
                }
                DOUBLE.writeDouble(blockBuilder, intBitsToFloat((int) REAL.getLong(block, i)));
            }
            return blockBuilder.build();
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            checkArgument(subfield.getPath().isEmpty(), "Subfields on primitive types are not allowed");
            return filter;
        }

        @Override
        public Type getToType()
        {
            return DOUBLE;
        }
    }

    class ListCoercer
            implements HiveCoercer
    {
        private final HiveCoercer elementCoercer;
        private final Type toType;

        public ListCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
        {
            requireNonNull(typeManager, "typeManage is null");
            requireNonNull(fromHiveType, "fromHiveType is null");
            requireNonNull(toHiveType, "toHiveType is null");
            HiveType fromElementHiveType = HiveType.valueOf(((ListTypeInfo) fromHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
            HiveType toElementHiveType = HiveType.valueOf(((ListTypeInfo) toHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
            this.elementCoercer = fromElementHiveType.equals(toElementHiveType) ? null : createCoercer(typeManager, fromElementHiveType, toElementHiveType);
            this.toType = toHiveType.getType(typeManager);
        }

        @Override
        public Block apply(Block block)
        {
            if (elementCoercer == null) {
                return block;
            }
            ColumnarArray arrayBlock = toColumnarArray(block);
            Block elementsBlock = elementCoercer.apply(arrayBlock.getElementsBlock());
            boolean[] valueIsNull = new boolean[arrayBlock.getPositionCount()];
            int[] offsets = new int[arrayBlock.getPositionCount() + 1];
            for (int i = 0; i < arrayBlock.getPositionCount(); i++) {
                valueIsNull[i] = arrayBlock.isNull(i);
                offsets[i + 1] = offsets[i] + arrayBlock.getLength(i);
            }
            return ArrayBlock.fromElementBlock(arrayBlock.getPositionCount(), Optional.of(valueIsNull), offsets, elementsBlock);
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            if (filter == IS_NULL || filter == IS_NOT_NULL) {
                return filter;
            }

            throw new UnsupportedOperationException("Range filers on array elements are not supported");
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }

    class MapCoercer
            implements HiveCoercer
    {
        private final Type toType;
        private final HiveCoercer keyCoercer;
        private final HiveCoercer valueCoercer;

        public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
        {
            requireNonNull(typeManager, "typeManage is null");
            requireNonNull(fromHiveType, "fromHiveType is null");
            this.toType = requireNonNull(toHiveType, "toHiveType is null").getType(typeManager);
            HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
            HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
            HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
            HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
            this.keyCoercer = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType);
            this.valueCoercer = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType);
        }

        @Override
        public Block apply(Block block)
        {
            ColumnarMap mapBlock = toColumnarMap(block);
            Block keysBlock = keyCoercer == null ? mapBlock.getKeysBlock() : keyCoercer.apply(mapBlock.getKeysBlock());
            Block valuesBlock = valueCoercer == null ? mapBlock.getValuesBlock() : valueCoercer.apply(mapBlock.getValuesBlock());
            int positionCount = mapBlock.getPositionCount();
            boolean[] valueIsNull = new boolean[positionCount];
            int[] offsets = new int[positionCount + 1];
            for (int i = 0; i < positionCount; i++) {
                valueIsNull[i] = mapBlock.isNull(i);
                offsets[i + 1] = offsets[i] + mapBlock.getEntryCount(i);
            }
            return ((MapType) toType).createBlockFromKeyValue(positionCount, Optional.of(valueIsNull), offsets, keysBlock, valuesBlock);
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            if (filter == IS_NULL || filter == IS_NOT_NULL) {
                return filter;
            }

            throw new UnsupportedOperationException("Range filers on map elements are not supported");
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }

    class StructCoercer
            implements HiveCoercer
    {
        private final HiveCoercer[] coercers;
        private final Block[] nullBlocks;
        private final List<String> toFieldNames;
        private final Type toType;

        public StructCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
        {
            requireNonNull(typeManager, "typeManage is null");
            requireNonNull(fromHiveType, "fromHiveType is null");
            requireNonNull(toHiveType, "toHiveType is null");
            List<HiveType> fromFieldTypes = extractStructFieldTypes(fromHiveType);
            List<HiveType> toFieldTypes = extractStructFieldTypes(toHiveType);
            this.coercers = new HiveCoercer[toFieldTypes.size()];
            this.nullBlocks = new Block[toFieldTypes.size()];
            for (int i = 0; i < coercers.length; i++) {
                if (i >= fromFieldTypes.size()) {
                    nullBlocks[i] = toFieldTypes.get(i).getType(typeManager).createBlockBuilder(null, 1).appendNull().build();
                }
                else if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i))) {
                    coercers[i] = createCoercer(typeManager, fromFieldTypes.get(i), toFieldTypes.get(i));
                }
            }
            this.toFieldNames = extractStructFieldNames(toHiveType);
            this.toType = toHiveType.getType(typeManager);
        }

        @Override
        public Block apply(Block block)
        {
            ColumnarRow rowBlock = toColumnarRow(block);
            Block[] fields = new Block[coercers.length];
            int[] ids = new int[rowBlock.getField(0).getPositionCount()];
            for (int i = 0; i < coercers.length; i++) {
                if (coercers[i] != null) {
                    fields[i] = coercers[i].apply(rowBlock.getField(i));
                }
                else if (i < rowBlock.getFieldCount()) {
                    fields[i] = rowBlock.getField(i);
                }
                else {
                    fields[i] = new DictionaryBlock(nullBlocks[i], ids);
                }
            }
            boolean[] valueIsNull = new boolean[rowBlock.getPositionCount()];
            for (int i = 0; i < rowBlock.getPositionCount(); i++) {
                valueIsNull[i] = rowBlock.isNull(i);
            }
            return RowBlock.fromFieldBlocks(valueIsNull.length, Optional.of(valueIsNull), fields);
        }

        @Override
        public TupleDomainFilter toCoercingFilter(TupleDomainFilter filter, Subfield subfield)
        {
            if (filter == IS_NULL || filter == IS_NOT_NULL) {
                return filter;
            }

            if (subfield.getPath().size() > 0) {
                String fieldName = ((Subfield.NestedField) subfield.getPath().get(0)).getName();
                for (int i = 0; i < toFieldNames.size(); i++) {
                    if (fieldName.equals(toFieldNames.get(i))) {
                        HiveCoercer coercer = coercers[i];
                        if (coercer == null) {
                            // the column value will be null
                            //  -> only isNull method will be called
                            //   -> the original filter will work just fine
                            return filter;
                        }
                        return coercer.toCoercingFilter(filter, subfield.tail(fieldName));
                    }
                }
                throw new IllegalArgumentException("Struct field not found: " + fieldName);
            }

            throw new UnsupportedOperationException("Range filers on struct types are not supported");
        }

        @Override
        public Type getToType()
        {
            return toType;
        }
    }
}