TestTpcdsMetadataStatistics.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.tpcds;
import com.facebook.airlift.json.JsonCodec;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.DoubleRange;
import com.facebook.presto.spi.statistics.Estimate;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.google.common.collect.ImmutableList;
import com.teradata.tpcds.Table;
import com.teradata.tpcds.column.CallCenterColumn;
import com.teradata.tpcds.column.WebSiteColumn;
import org.testng.annotations.Test;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Stream;
import static com.facebook.presto.spi.Constraint.alwaysTrue;
import static java.util.Map.Entry;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
public class TestTpcdsMetadataStatistics
{
private static final EstimateAssertion estimateAssertion = new EstimateAssertion(0.01);
private static final ConnectorSession session = null;
private final TpcdsMetadata metadata = new TpcdsMetadata(false);
@Test
public void testNoTableStatsForNotSupportedSchema()
{
Stream.of("sf0.001", "sf0.1", "sf10")
.forEach(schemaName -> Table.getBaseTables()
.forEach(table -> {
SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, alwaysTrue());
assertTrue(tableStatistics.getRowCount().isUnknown());
assertTrue(tableStatistics.getColumnStatistics().isEmpty());
}));
}
@Test
public void testTableStatsExistenceSupportedSchema()
{
Stream.of("sf0.01", "tiny", "sf1", "sf1.000")
.forEach(schemaName -> Table.getBaseTables()
.forEach(table -> {
SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, alwaysTrue());
assertFalse(tableStatistics.getRowCount().isUnknown());
for (ColumnHandle column : metadata.getColumnHandles(session, tableHandle).values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
}));
}
@Test
public void testTableStatsDetails()
{
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
// all columns have stats
for (ColumnHandle column : columnHandles.values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
// identifier
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0))
.setDistinctValuesCount(Estimate.of(6))
.setRange(new DoubleRange(1, 6))
.build());
// varchar
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0))
.setDistinctValuesCount(Estimate.of(3))
.setDataSize(Estimate.of(48.0))
.build());
// char
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0))
.setDistinctValuesCount(Estimate.of(1))
.setDataSize(Estimate.of(5.0))
.build());
// decimal
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0))
.setDistinctValuesCount(Estimate.of(1))
.setRange(new DoubleRange(-5, -5))
.build());
// date
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0))
.setDistinctValuesCount(Estimate.of(4))
.setRange(new DoubleRange(10227L, 11688L))
.build());
// only null values
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(1))
.setDistinctValuesCount(Estimate.of(0))
.build());
}
@Test
public void testNullFraction()
{
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
// some null values
assertColumnStatistics(
tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())),
ColumnStatistics.builder()
.setNullsFraction(Estimate.of(0.5))
.setDistinctValuesCount(Estimate.of(3))
.setRange(new DoubleRange(10819L, 11549L))
.build());
}
@Test
public void testTableStatisticsSerialization()
{
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, alwaysTrue());
Entry<ColumnHandle, ColumnStatistics> entry = tableStatistics.getColumnStatistics().entrySet().iterator().next();
TableStatistics expectedTableStatistics = TableStatistics.builder()
.setRowCount(tableStatistics.getRowCount())
.setColumnStatistics(entry.getKey(), entry.getValue())
.build();
JsonCodec<TableStatistics> codec = JsonCodec.jsonCodec(TableStatistics.class);
String json = codec.toJson(expectedTableStatistics);
assertEquals(json, "{\n" +
" \"rowCount\" : {\n" +
" \"value\" : 30.0\n" +
" },\n" +
" \"totalSize\" : {\n" +
" \"value\" : \"NaN\"\n" +
" },\n" +
" \"columnStatistics\" : {\n" +
" \"tpcds:web_site_sk\" : {\n" +
" \"nullsFraction\" : {\n" +
" \"value\" : 0.0\n" +
" },\n" +
" \"distinctValuesCount\" : {\n" +
" \"value\" : 30.0\n" +
" },\n" +
" \"dataSize\" : {\n" +
" \"value\" : \"NaN\"\n" +
" },\n" +
" \"range\" : {\n" +
" \"min\" : 1.0,\n" +
" \"max\" : 30.0\n" +
" }\n" +
" }\n" +
" },\n" +
" \"confidence\" : \"HIGH\"\n" +
"}");
}
private void assertColumnStatistics(ColumnStatistics actual, ColumnStatistics expected)
{
estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "Nulls fraction");
estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "Data size");
estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "Distinct values count");
assertEquals(actual.getRange(), expected.getRange());
}
}