ArrayDistinctFunction.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.scalar;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.operator.aggregation.TypedSet;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.OperatorDependency;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import com.facebook.presto.spi.function.TypeParameter;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.lang.invoke.MethodHandle;
import static com.facebook.presto.common.function.OperatorType.IS_DISTINCT_FROM;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.TypeUtils.readNativeValue;
import static com.facebook.presto.util.Failures.internalError;
import static com.google.common.base.Defaults.defaultValue;
@ScalarFunction("array_distinct")
@Description("Remove duplicate values from the given array")
public final class ArrayDistinctFunction
{
private ArrayDistinctFunction() {}
@TypeParameter("E")
@SqlType("array(E)")
public static Block distinct(
@TypeParameter("E") Type type,
@OperatorDependency(operator = IS_DISTINCT_FROM, argumentTypes = {"E", "E"}) MethodHandle elementIsDistinctFrom,
@SqlType("array(E)") Block array)
{
int arrayLength = array.getPositionCount();
if (arrayLength < 2) {
return array;
}
if (arrayLength == 2) {
boolean firstValueNull = array.isNull(0);
Object firstValue = firstValueNull ? defaultValue(type.getJavaType()) : readNativeValue(type, array, 0);
boolean secondValueNull = array.isNull(1);
Object secondValue = secondValueNull ? defaultValue(type.getJavaType()) : readNativeValue(type, array, 1);
boolean distinct;
try {
distinct = (boolean) elementIsDistinctFrom.invoke(firstValue, firstValueNull, secondValue, secondValueNull);
}
catch (Throwable t) {
throw internalError(t);
}
if (distinct) {
return array;
}
return array.getSingleValueBlock(0);
}
TypedSet typedSet = new TypedSet(type, elementIsDistinctFrom, array.getPositionCount(), "array_distinct");
BlockBuilder distinctElementBlockBuilder;
if (array.mayHaveNull()) {
int firstDuplicatePosition = 0;
// Keep adding the element to the set as long as there are no dupes.
while (firstDuplicatePosition < arrayLength && typedSet.add(array, firstDuplicatePosition)) {
firstDuplicatePosition++;
}
if (firstDuplicatePosition == arrayLength) {
// All elements are distinct, so just return the original.
return array;
}
int position = 0;
distinctElementBlockBuilder = type.createBlockBuilder(null, arrayLength);
while (position < firstDuplicatePosition) {
type.appendTo(array, position, distinctElementBlockBuilder);
position++;
}
while (position < arrayLength) {
if (typedSet.add(array, position)) {
type.appendTo(array, position, distinctElementBlockBuilder);
}
position++;
}
}
else {
int firstDuplicatePosition = 0;
while (firstDuplicatePosition < arrayLength && typedSet.addNonNull(array, firstDuplicatePosition)) {
firstDuplicatePosition++;
}
if (firstDuplicatePosition == arrayLength) {
// All elements are distinct, so just return the original.
return array;
}
int position = 0;
distinctElementBlockBuilder = type.createBlockBuilder(null, arrayLength);
while (position < firstDuplicatePosition) {
type.appendTo(array, position, distinctElementBlockBuilder);
position++;
}
while (position < arrayLength) {
if (typedSet.addNonNull(array, position)) {
type.appendTo(array, position, distinctElementBlockBuilder);
}
position++;
}
}
return distinctElementBlockBuilder.build();
}
@SqlType("array(bigint)")
public static Block bigintDistinct(@SqlType("array(bigint)") Block array)
{
final int arrayLength = array.getPositionCount();
if (arrayLength < 2) {
return array;
}
BlockBuilder distinctElementBlockBuilder;
LongSet set = new LongOpenHashSet(arrayLength);
if (array.mayHaveNull()) {
int position = 0;
boolean containsNull = false;
// Keep adding the element to the set as long as there are no dupes.
while (position < arrayLength) {
if (array.isNull(position)) {
if (!containsNull) {
containsNull = true;
}
else {
// Second null.
break;
}
}
else if (!set.add(BIGINT.getLong(array, position))) {
// Dupe found.
break;
}
position++;
}
if (position == arrayLength) {
// All elements are distinct, so just return the original.
return array;
}
distinctElementBlockBuilder = BIGINT.createBlockBuilder(null, arrayLength);
for (int i = 0; i < position; i++) {
BIGINT.appendTo(array, i, distinctElementBlockBuilder);
}
for (position++; position < arrayLength; position++) {
if (array.isNull(position)) {
if (!containsNull) {
BIGINT.appendTo(array, position, distinctElementBlockBuilder);
}
}
else if (set.add(BIGINT.getLong(array, position))) {
BIGINT.appendTo(array, position, distinctElementBlockBuilder);
}
}
}
else {
int position = 0;
// Keep adding the element to the set as long as there are no dupes.
while (position < arrayLength && set.add(BIGINT.getLong(array, position))) {
position++;
}
if (position == arrayLength) {
// All elements are distinct, so just return the original.
return array;
}
distinctElementBlockBuilder = BIGINT.createBlockBuilder(null, arrayLength);
for (int i = 0; i < position; i++) {
BIGINT.appendTo(array, i, distinctElementBlockBuilder);
}
for (position++; position < arrayLength; position++) {
if (set.add(BIGINT.getLong(array, position))) {
BIGINT.appendTo(array, position, distinctElementBlockBuilder);
}
}
}
return distinctElementBlockBuilder.build();
}
}