TestMapredParquetOutputFormat.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.parquet.write;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Progressable;
import org.apache.parquet.hadoop.ParquetOutputFormat;
import org.apache.parquet.schema.MessageType;

import java.io.IOException;
import java.util.Optional;
import java.util.Properties;

import static java.util.Objects.requireNonNull;

/*
  MapredParquetOutputFormat creates the Parquet schema from the column types,
  which is not always what we want. Because, in that case for decimal type
  the schema always specifies FIXED_LEN_BYTE_ARRAY as the backing type. But,
  we also want to test the cases were the backing type is INT32/INT64, which requires
  a custom Parquet schema.
*/
public class TestMapredParquetOutputFormat
        extends MapredParquetOutputFormat
{
    private final Optional<MessageType> schema;

    public TestMapredParquetOutputFormat(Optional<MessageType> schema, boolean singleLevelArray)
    {
        super(new ParquetOutputFormat<>(new TestDataWritableWriteSupport(singleLevelArray)));
        this.schema = requireNonNull(schema, "schema is null");
    }

    @Override
    public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf,
            Path finalOutPath,
            Class<? extends Writable> valueClass,
            boolean isCompressed,
            Properties tableProperties,
            Progressable progress)
            throws IOException
    {
        if (schema.isPresent()) {
            DataWritableWriteSupport.setSchema(schema.get(), jobConf);
            return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress, tableProperties);
        }
        return super.getHiveRecordWriter(jobConf, finalOutPath, valueClass, isCompressed, tableProperties, progress);
    }
}