TestParquetPageSourceFactory.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive.parquet;
import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.HiveColumnHandle;
import com.facebook.presto.hive.HiveType;
import com.facebook.presto.spi.SchemaTableName;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.testng.annotations.Test;
import java.util.Optional;
import static com.facebook.presto.hive.BaseHiveColumnHandle.ColumnType.REGULAR;
import static com.facebook.presto.hive.BaseHiveColumnHandle.ColumnType.SYNTHESIZED;
import static com.facebook.presto.hive.parquet.ParquetPageSourceFactory.getColumnType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
public class TestParquetPageSourceFactory
{
@Test
public void testGetColumnType()
{
MessageType messageType = buildMessageType();
SchemaTableName tableName = new SchemaTableName("db001", "tbl001");
Path path = new Path("/tmp/hello");
// Simple field by index.
Optional<org.apache.parquet.schema.Type> parquetType = getColumnType(VarcharType.VARCHAR, messageType, false,
buildSimpleColumnHandle("name", 0), tableName, path);
PrimitiveType expectedNameType = new PrimitiveType(REQUIRED, BINARY, "name");
assertTrue(parquetType.isPresent());
assertEquals(
parquetType.get(), expectedNameType);
// Simple field by name.
parquetType = getColumnType(VarcharType.VARCHAR, messageType, true,
buildSimpleColumnHandle("name", 0), tableName, path);
assertTrue(parquetType.isPresent());
assertEquals(
parquetType.get(), expectedNameType);
// Pushdown fields with useParquetColumnNames = false.
HiveColumnHandle addressCityColumn = buildNestedPushDownColumnHandle("address", "city");
parquetType = getColumnType(VarcharType.VARCHAR, messageType, false,
addressCityColumn, tableName, path);
PrimitiveType city = new PrimitiveType(REQUIRED, BINARY, "city");
MessageType expectedAddressWithCityType = new MessageType("address", ImmutableList.of(city));
assertTrue(parquetType.isPresent());
assertEquals(
parquetType.get(), expectedAddressWithCityType);
// Pushdown fields with useParquetColumnNames = true.
parquetType = getColumnType(VarcharType.VARCHAR, messageType, true,
addressCityColumn, tableName, path);
assertTrue(parquetType.isPresent());
assertEquals(
parquetType.get(), expectedAddressWithCityType);
}
private static HiveColumnHandle buildSimpleColumnHandle(String name, int index)
{
HiveColumnHandle column = new HiveColumnHandle(
name,
HiveType.HIVE_STRING,
VarcharType.VARCHAR.getTypeSignature(),
index,
REGULAR,
Optional.empty(),
ImmutableList.of(),
Optional.empty());
return column;
}
private static HiveColumnHandle buildNestedPushDownColumnHandle(String field1, String field2)
{
Subfield subfield = new Subfield(field1, ImmutableList.of(new Subfield.NestedField(field2)));
HiveColumnHandle column = new HiveColumnHandle(
String.format("%s$_$_$%s", field1, field2),
HiveType.HIVE_STRING,
VarcharType.VARCHAR.getTypeSignature(),
-1,
SYNTHESIZED,
Optional.of("nested column pushdown"),
ImmutableList.of(subfield),
Optional.empty());
return column;
}
private static MessageType buildMessageType()
{
PrimitiveType name = new PrimitiveType(REQUIRED, BINARY, "name");
PrimitiveType age = new PrimitiveType(REQUIRED, INT32, "age");
PrimitiveType city = new PrimitiveType(REQUIRED, BINARY, "city");
PrimitiveType block = new PrimitiveType(REQUIRED, BINARY, "block");
GroupType address = new GroupType(REQUIRED, "address", ImmutableList.of(city, block));
MessageType messageType = new MessageType("root", ImmutableList.of(name, age, address));
return messageType;
}
}