TestSortingUtil.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.algorithm.sort;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.lang.reflect.Array;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
import java.util.function.BiConsumer;
import java.util.function.Supplier;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.testing.RandomDataGenerator;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;

/** Utilities for sorting related utilities. */
public class TestSortingUtil {

  static final Random random = new Random(0);

  static final DataGenerator<TinyIntVector, Byte> TINY_INT_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.TINY_INT_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Byte.class);

  static final DataGenerator<SmallIntVector, Short> SMALL_INT_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.SMALL_INT_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Short.class);

  static final DataGenerator<IntVector, Integer> INT_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.INT_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Integer.class);

  static final DataGenerator<BigIntVector, Long> LONG_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.LONG_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Long.class);

  static final DataGenerator<Float4Vector, Float> FLOAT_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.FLOAT_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Float.class);

  static final DataGenerator<Float8Vector, Double> DOUBLE_GENERATOR =
      new DataGenerator<>(
          RandomDataGenerator.DOUBLE_GENERATOR,
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          Double.class);

  static final DataGenerator<VarCharVector, String> STRING_GENERATOR =
      new DataGenerator<>(
          () -> {
            int strLength = random.nextInt(20) + 1;
            return generateRandomString(strLength);
          },
          (vector, array) -> ValueVectorDataPopulator.setVector(vector, array),
          String.class);

  private TestSortingUtil() {}

  /** Verify that a vector is equal to an array. */
  public static <V extends ValueVector, U> void verifyResults(V vector, U[] expected) {
    assertEquals(vector.getValueCount(), expected.length);
    for (int i = 0; i < expected.length; i++) {
      assertEquals(vector.getObject(i), expected[i]);
    }
  }

  /** Sort an array with null values come first. */
  public static <U extends Comparable<U>> void sortArray(U[] array) {
    Arrays.sort(
        array,
        (a, b) -> {
          if (a == null || b == null) {
            if (a == null && b == null) {
              return 0;
            }

            // exactly one is null
            if (a == null) {
              return -1;
            } else {
              return 1;
            }
          }
          return a.compareTo(b);
        });
  }

  /** Generate a string with alphabetic characters only. */
  static String generateRandomString(int length) {
    byte[] str = new byte[length];
    final int lower = 'a';
    final int upper = 'z';

    for (int i = 0; i < length; i++) {
      // make r non-negative
      int r = random.nextInt() & Integer.MAX_VALUE;
      str[i] = (byte) (r % (upper - lower + 1) + lower);
    }

    return new String(str, StandardCharsets.UTF_8);
  }

  /**
   * Utility to generate data for testing.
   *
   * @param <V> vector type.
   * @param <U> data element type.
   */
  static class DataGenerator<V extends ValueVector, U extends Comparable<U>> {

    final Supplier<U> dataGenerator;

    final BiConsumer<V, U[]> vectorPopulator;

    final Class<U> clazz;

    DataGenerator(Supplier<U> dataGenerator, BiConsumer<V, U[]> vectorPopulator, Class<U> clazz) {
      this.dataGenerator = dataGenerator;
      this.vectorPopulator = vectorPopulator;
      this.clazz = clazz;
    }

    /**
     * Populate the vector according to the specified parameters.
     *
     * @param vector the vector to populate.
     * @param length vector length.
     * @param nullFraction the fraction of null values.
     * @return An array with the same data as the vector.
     */
    U[] populate(V vector, int length, double nullFraction) {
      U[] array = (U[]) Array.newInstance(clazz, length);
      for (int i = 0; i < length; i++) {
        double r = Math.random();
        U value = r < nullFraction ? null : dataGenerator.get();
        array[i] = value;
      }
      vectorPopulator.accept(vector, array);
      return array;
    }
  }
}