ChecksumValidator.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.verifier.checksum;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.FunctionCall;
import com.facebook.presto.sql.tree.GroupBy;
import com.facebook.presto.sql.tree.GroupingElement;
import com.facebook.presto.sql.tree.Identifier;
import com.facebook.presto.sql.tree.OrderBy;
import com.facebook.presto.sql.tree.QualifiedName;
import com.facebook.presto.sql.tree.Query;
import com.facebook.presto.sql.tree.Select;
import com.facebook.presto.sql.tree.SelectItem;
import com.facebook.presto.sql.tree.SimpleGroupBy;
import com.facebook.presto.sql.tree.SingleColumn;
import com.facebook.presto.sql.tree.SortItem;
import com.facebook.presto.sql.tree.Table;
import com.facebook.presto.verifier.framework.Column;
import com.facebook.presto.verifier.framework.Column.Category;
import com.google.common.collect.ImmutableList;
import javax.inject.Inject;
import javax.inject.Provider;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.facebook.presto.sql.QueryUtil.simpleQuery;
import static com.facebook.presto.sql.tree.SortItem.NullOrdering.UNDEFINED;
import static com.facebook.presto.sql.tree.SortItem.Ordering.ASCENDING;
import static com.google.common.collect.ImmutableList.toImmutableList;
public class ChecksumValidator
{
private final Map<Category, Provider<ColumnValidator>> columnValidators;
@Inject
public ChecksumValidator(Map<Category, Provider<ColumnValidator>> columnValidators)
{
this.columnValidators = columnValidators;
}
public Query generateChecksumQuery(QualifiedName tableName, List<Column> columns, Optional<Expression> partitionPredicate)
{
ImmutableList.Builder<SelectItem> selectItems = ImmutableList.builder();
selectItems.add(new SingleColumn(new FunctionCall(QualifiedName.of("count"), ImmutableList.of())));
for (Column column : columns) {
selectItems.addAll(columnValidators.get(column.getCategory()).get().generateChecksumColumns(column));
}
return simpleQuery(new Select(false, selectItems.build()), new Table(tableName), partitionPredicate, Optional.empty());
}
public Query generatePartitionChecksumQuery(QualifiedName tableName, List<Column> dataColumns, List<Column> partitionColumns, Optional<Expression> partitionPredicate)
{
ImmutableList.Builder<SelectItem> selectItems = ImmutableList.builder();
selectItems.add(new SingleColumn(new FunctionCall(QualifiedName.of("count"), ImmutableList.of())));
for (Column column : dataColumns) {
selectItems.addAll(columnValidators.get(column.getCategory()).get().generateChecksumColumns(column));
}
ImmutableList.Builder<GroupingElement> groupByList = ImmutableList.builder();
ImmutableList.Builder<SortItem> orderByList = ImmutableList.builder();
for (Column partitionColumn : partitionColumns) {
orderByList.add(new SortItem(new Identifier(partitionColumn.getName()), ASCENDING, UNDEFINED));
groupByList.add(new SimpleGroupBy(ImmutableList.of(new Identifier(partitionColumn.getName()))));
}
return simpleQuery(
new Select(false, selectItems.build()),
new Table(tableName),
partitionPredicate,
Optional.of(new GroupBy(false, groupByList.build())),
Optional.empty(),
Optional.of(new OrderBy(orderByList.build())),
Optional.empty(),
Optional.empty());
}
public Query generateBucketChecksumQuery(QualifiedName tableName, List<Column> partitionColumns, List<Column> dataColumns, Optional<Expression> partitionPredicate)
{
ImmutableList.Builder<SelectItem> selectItems = ImmutableList.builder();
selectItems.add(new SingleColumn(new FunctionCall(QualifiedName.of("count"), ImmutableList.of())));
for (Column column : dataColumns) {
selectItems.addAll(columnValidators.get(column.getCategory()).get().generateChecksumColumns(column));
}
ImmutableList.Builder<GroupingElement> groupByList = ImmutableList.builder();
ImmutableList.Builder<SortItem> orderByList = ImmutableList.builder();
for (Column partitionColumn : partitionColumns) {
orderByList.add(new SortItem(new Identifier(partitionColumn.getName()), ASCENDING, UNDEFINED));
groupByList.add(new SimpleGroupBy(ImmutableList.of(new Identifier(partitionColumn.getName()))));
}
orderByList.add(new SortItem(new Identifier("$bucket"), ASCENDING, UNDEFINED));
groupByList.add(new SimpleGroupBy(ImmutableList.of(new Identifier("$bucket"))));
return simpleQuery(
new Select(false, selectItems.build()),
new Table(tableName),
partitionPredicate,
Optional.of(new GroupBy(false, groupByList.build())),
Optional.empty(),
Optional.of(new OrderBy(orderByList.build())),
Optional.empty(),
Optional.empty());
}
public List<ColumnMatchResult<?>> getMismatchedColumns(List<Column> columns, ChecksumResult controlChecksum, ChecksumResult testChecksum)
{
return columns.stream()
.flatMap(column -> columnValidators.get(column.getCategory()).get().validate(column, controlChecksum, testChecksum).stream())
.filter(columnMatchResult -> !columnMatchResult.isMatched())
.collect(toImmutableList());
}
}